mac80211: merge performance improvement patches
authorFelix Fietkau <nbd@nbd.name>
Sun, 26 Jul 2020 13:12:32 +0000 (15:12 +0200)
committerFelix Fietkau <nbd@nbd.name>
Thu, 6 Aug 2020 10:43:25 +0000 (12:43 +0200)
Fix fq_codel performance issues
Add a new rx function for batch processing

Signed-off-by: Felix Fietkau <nbd@nbd.name>
package/kernel/mac80211/patches/subsys/307-mac80211-add-a-function-for-running-rx-without-passi.patch [new file with mode: 0644]
package/kernel/mac80211/patches/subsys/308-net-fq_impl-use-skb_get_hash-instead-of-skb_get_hash.patch [new file with mode: 0644]
package/kernel/mac80211/patches/subsys/309-mac80211-calculcate-skb-hash-early-when-using-itxq.patch [new file with mode: 0644]

diff --git a/package/kernel/mac80211/patches/subsys/307-mac80211-add-a-function-for-running-rx-without-passi.patch b/package/kernel/mac80211/patches/subsys/307-mac80211-add-a-function-for-running-rx-without-passi.patch
new file mode 100644 (file)
index 0000000..5837a7b
--- /dev/null
@@ -0,0 +1,186 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sat, 25 Jul 2020 20:53:23 +0200
+Subject: [PATCH] mac80211: add a function for running rx without passing skbs
+ to the stack
+
+This can be used to run mac80211 rx processing on a batch of frames in NAPI
+poll before passing them to the network stack in a large batch.
+This can improve icache footprint, or it can be used to pass frames via
+netif_receive_skb_list.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/mac80211.h
++++ b/include/net/mac80211.h
+@@ -4358,6 +4358,31 @@ void ieee80211_free_hw(struct ieee80211_
+ void ieee80211_restart_hw(struct ieee80211_hw *hw);
+ /**
++ * ieee80211_rx_list - receive frame and store processed skbs in a list
++ *
++ * Use this function to hand received frames to mac80211. The receive
++ * buffer in @skb must start with an IEEE 802.11 header. In case of a
++ * paged @skb is used, the driver is recommended to put the ieee80211
++ * header of the frame on the linear part of the @skb to avoid memory
++ * allocation and/or memcpy by the stack.
++ *
++ * This function may not be called in IRQ context. Calls to this function
++ * for a single hardware must be synchronized against each other. Calls to
++ * this function, ieee80211_rx_ni() and ieee80211_rx_irqsafe() may not be
++ * mixed for a single hardware. Must not run concurrently with
++ * ieee80211_tx_status() or ieee80211_tx_status_ni().
++ *
++ * This function must be called with BHs disabled and RCU read lock
++ *
++ * @hw: the hardware this frame came in on
++ * @sta: the station the frame was received from, or %NULL
++ * @skb: the buffer to receive, owned by mac80211 after this call
++ * @list: the destination list
++ */
++void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *sta,
++                     struct sk_buff *skb, struct list_head *list);
++
++/**
+  * ieee80211_rx_napi - receive frame from NAPI context
+  *
+  * Use this function to hand received frames to mac80211. The receive
+--- a/net/mac80211/ieee80211_i.h
++++ b/net/mac80211/ieee80211_i.h
+@@ -218,7 +218,7 @@ enum ieee80211_rx_flags {
+ };
+ struct ieee80211_rx_data {
+-      struct napi_struct *napi;
++      struct list_head *list;
+       struct sk_buff *skb;
+       struct ieee80211_local *local;
+       struct ieee80211_sub_if_data *sdata;
+--- a/net/mac80211/rx.c
++++ b/net/mac80211/rx.c
+@@ -2552,8 +2552,8 @@ static void ieee80211_deliver_skb_to_loc
+               memset(skb->cb, 0, sizeof(skb->cb));
+               /* deliver to local stack */
+-              if (rx->napi)
+-                      napi_gro_receive(rx->napi, skb);
++              if (rx->list)
++                      list_add_tail(&skb->list, rx->list);
+               else
+                       netif_receive_skb(skb);
+       }
+@@ -3843,7 +3843,6 @@ void ieee80211_release_reorder_timeout(s
+               /* This is OK -- must be QoS data frame */
+               .security_idx = tid,
+               .seqno_idx = tid,
+-              .napi = NULL, /* must be NULL to not have races */
+       };
+       struct tid_ampdu_rx *tid_agg_rx;
+@@ -4453,8 +4452,8 @@ static bool ieee80211_invoke_fast_rx(str
+       /* deliver to local stack */
+       skb->protocol = eth_type_trans(skb, fast_rx->dev);
+       memset(skb->cb, 0, sizeof(skb->cb));
+-      if (rx->napi)
+-              napi_gro_receive(rx->napi, skb);
++      if (rx->list)
++              list_add_tail(&skb->list, rx->list);
+       else
+               netif_receive_skb(skb);
+@@ -4521,7 +4520,7 @@ static bool ieee80211_prepare_and_rx_han
+ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
+                                        struct ieee80211_sta *pubsta,
+                                        struct sk_buff *skb,
+-                                       struct napi_struct *napi)
++                                       struct list_head *list)
+ {
+       struct ieee80211_local *local = hw_to_local(hw);
+       struct ieee80211_sub_if_data *sdata;
+@@ -4536,7 +4535,7 @@ static void __ieee80211_rx_handle_packet
+       memset(&rx, 0, sizeof(rx));
+       rx.skb = skb;
+       rx.local = local;
+-      rx.napi = napi;
++      rx.list = list;
+       if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc))
+               I802_DEBUG_INC(local->dot11ReceivedFragmentCount);
+@@ -4644,8 +4643,8 @@ static void __ieee80211_rx_handle_packet
+  * This is the receive path handler. It is called by a low level driver when an
+  * 802.11 MPDU is received from the hardware.
+  */
+-void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
+-                     struct sk_buff *skb, struct napi_struct *napi)
++void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
++                     struct sk_buff *skb, struct list_head *list)
+ {
+       struct ieee80211_local *local = hw_to_local(hw);
+       struct ieee80211_rate *rate = NULL;
+@@ -4737,36 +4736,53 @@ void ieee80211_rx_napi(struct ieee80211_
+       status->rx_flags = 0;
+       /*
+-       * key references and virtual interfaces are protected using RCU
+-       * and this requires that we are in a read-side RCU section during
+-       * receive processing
+-       */
+-      rcu_read_lock();
+-
+-      /*
+        * Frames with failed FCS/PLCP checksum are not returned,
+        * all other frames are returned without radiotap header
+        * if it was previously present.
+        * Also, frames with less than 16 bytes are dropped.
+        */
+       skb = ieee80211_rx_monitor(local, skb, rate);
+-      if (!skb) {
+-              rcu_read_unlock();
++      if (!skb)
+               return;
+-      }
+       ieee80211_tpt_led_trig_rx(local,
+                       ((struct ieee80211_hdr *)skb->data)->frame_control,
+                       skb->len);
+-      __ieee80211_rx_handle_packet(hw, pubsta, skb, napi);
+-
+-      rcu_read_unlock();
++      __ieee80211_rx_handle_packet(hw, pubsta, skb, list);
+       return;
+  drop:
+       kfree_skb(skb);
+ }
++EXPORT_SYMBOL(ieee80211_rx_list);
++
++void ieee80211_rx_napi(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
++                     struct sk_buff *skb, struct napi_struct *napi)
++{
++      struct sk_buff *tmp;
++      LIST_HEAD(list);
++
++
++      /*
++       * key references and virtual interfaces are protected using RCU
++       * and this requires that we are in a read-side RCU section during
++       * receive processing
++       */
++      rcu_read_lock();
++      ieee80211_rx_list(hw, pubsta, skb, &list);
++      rcu_read_unlock();
++
++      if (!napi) {
++              netif_receive_skb_list(&list);
++              return;
++      }
++
++      list_for_each_entry_safe(skb, tmp, &list, list) {
++              skb_list_del_init(skb);
++              napi_gro_receive(napi, skb);
++      }
++}
+ EXPORT_SYMBOL(ieee80211_rx_napi);
+ /* This is a version of the rx handler that can be called from hard irq
diff --git a/package/kernel/mac80211/patches/subsys/308-net-fq_impl-use-skb_get_hash-instead-of-skb_get_hash.patch b/package/kernel/mac80211/patches/subsys/308-net-fq_impl-use-skb_get_hash-instead-of-skb_get_hash.patch
new file mode 100644 (file)
index 0000000..77ecc82
--- /dev/null
@@ -0,0 +1,55 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sun, 26 Jul 2020 14:37:02 +0200
+Subject: [PATCH] net/fq_impl: use skb_get_hash instead of
+ skb_get_hash_perturb
+
+This avoids unnecessary regenerating of the skb flow hash
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/fq.h
++++ b/include/net/fq.h
+@@ -69,15 +69,6 @@ struct fq {
+       struct list_head backlogs;
+       spinlock_t lock;
+       u32 flows_cnt;
+-#if LINUX_VERSION_IS_GEQ(5,3,10) || \
+-    LINUX_VERSION_IN_RANGE(4,19,83, 4,20,0) || \
+-    LINUX_VERSION_IN_RANGE(4,14,153, 4,15,0) || \
+-    LINUX_VERSION_IN_RANGE(4,9,200, 4,10,0) || \
+-    LINUX_VERSION_IN_RANGE(4,4,200, 4,5,0)
+-      siphash_key_t   perturbation;
+-#else
+-      u32 perturbation;
+-#endif
+       u32 limit;
+       u32 memory_limit;
+       u32 memory_usage;
+--- a/include/net/fq_impl.h
++++ b/include/net/fq_impl.h
+@@ -108,15 +108,7 @@ begin:
+ static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb)
+ {
+-#if LINUX_VERSION_IS_GEQ(5,3,10) || \
+-    LINUX_VERSION_IN_RANGE(4,19,83, 4,20,0) || \
+-    LINUX_VERSION_IN_RANGE(4,14,153, 4,15,0) || \
+-    LINUX_VERSION_IN_RANGE(4,9,200, 4,10,0) || \
+-    LINUX_VERSION_IN_RANGE(4,4,200, 4,5,0)
+-      u32 hash = skb_get_hash_perturb(skb, &fq->perturbation);
+-#else
+-      u32 hash = skb_get_hash_perturb(skb, fq->perturbation);
+-#endif
++      u32 hash = skb_get_hash(skb);
+       return reciprocal_scale(hash, fq->flows_cnt);
+ }
+@@ -316,7 +308,6 @@ static int fq_init(struct fq *fq, int fl
+       INIT_LIST_HEAD(&fq->backlogs);
+       spin_lock_init(&fq->lock);
+       fq->flows_cnt = max_t(u32, flows_cnt, 1);
+-      get_random_bytes(&fq->perturbation, sizeof(fq->perturbation));
+       fq->quantum = 300;
+       fq->limit = 8192;
+       fq->memory_limit = 16 << 20; /* 16 MBytes */
diff --git a/package/kernel/mac80211/patches/subsys/309-mac80211-calculcate-skb-hash-early-when-using-itxq.patch b/package/kernel/mac80211/patches/subsys/309-mac80211-calculcate-skb-hash-early-when-using-itxq.patch
new file mode 100644 (file)
index 0000000..92b1362
--- /dev/null
@@ -0,0 +1,19 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sun, 26 Jul 2020 14:42:58 +0200
+Subject: [PATCH] mac80211: calculcate skb hash early when using itxq
+
+This avoids flow separation issues when using software encryption
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -3937,6 +3937,7 @@ void __ieee80211_subif_start_xmit(struct
+       if (local->ops->wake_tx_queue) {
+               u16 queue = __ieee80211_select_queue(sdata, sta, skb);
+               skb_set_queue_mapping(skb, queue);
++              skb_get_hash(skb);
+       }
+       if (sta) {