mac80211: add a few performance improvement patches
[openwrt/staging/blogic.git] / package / kernel / mac80211 / patches / subsys / 354-mac80211-calculate-hash-for-fq-without-holding-fq-lo.patch
diff --git a/package/kernel/mac80211/patches/subsys/354-mac80211-calculate-hash-for-fq-without-holding-fq-lo.patch b/package/kernel/mac80211/patches/subsys/354-mac80211-calculate-hash-for-fq-without-holding-fq-lo.patch
new file mode 100644 (file)
index 0000000..2b6d8ab
--- /dev/null
@@ -0,0 +1,124 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sat, 16 Mar 2019 17:57:38 +0100
+Subject: [PATCH] mac80211: calculate hash for fq without holding fq->lock
+ in itxq enqueue
+
+Reduces lock contention on enqueue/dequeue of iTXQ packets
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/fq_impl.h
++++ b/include/net/fq_impl.h
+@@ -107,21 +107,23 @@ begin:
+       return skb;
+ }
++static u32 fq_flow_idx(struct fq *fq, struct sk_buff *skb)
++{
++      u32 hash = skb_get_hash_perturb(skb, fq->perturbation);
++
++      return reciprocal_scale(hash, fq->flows_cnt);
++}
++
+ static struct fq_flow *fq_flow_classify(struct fq *fq,
+-                                      struct fq_tin *tin,
++                                      struct fq_tin *tin, u32 idx,
+                                       struct sk_buff *skb,
+                                       fq_flow_get_default_t get_default_func)
+ {
+       struct fq_flow *flow;
+-      u32 hash;
+-      u32 idx;
+       lockdep_assert_held(&fq->lock);
+-      hash = skb_get_hash_perturb(skb, fq->perturbation);
+-      idx = reciprocal_scale(hash, fq->flows_cnt);
+       flow = &fq->flows[idx];
+-
+       if (flow->tin && flow->tin != tin) {
+               flow = get_default_func(fq, tin, idx, skb);
+               tin->collisions++;
+@@ -153,7 +155,7 @@ static void fq_recalc_backlog(struct fq
+ }
+ static void fq_tin_enqueue(struct fq *fq,
+-                         struct fq_tin *tin,
++                         struct fq_tin *tin, u32 idx,
+                          struct sk_buff *skb,
+                          fq_skb_free_t free_func,
+                          fq_flow_get_default_t get_default_func)
+@@ -163,7 +165,7 @@ static void fq_tin_enqueue(struct fq *fq
+       lockdep_assert_held(&fq->lock);
+-      flow = fq_flow_classify(fq, tin, skb, get_default_func);
++      flow = fq_flow_classify(fq, tin, idx, skb, get_default_func);
+       flow->tin = tin;
+       flow->backlog += skb->len;
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -1390,11 +1390,15 @@ static void ieee80211_txq_enqueue(struct
+ {
+       struct fq *fq = &local->fq;
+       struct fq_tin *tin = &txqi->tin;
++      u32 flow_idx = fq_flow_idx(fq, skb);
+       ieee80211_set_skb_enqueue_time(skb);
+-      fq_tin_enqueue(fq, tin, skb,
++
++      spin_lock_bh(&fq->lock);
++      fq_tin_enqueue(fq, tin, flow_idx, skb,
+                      fq_skb_free_func,
+                      fq_flow_get_default_func);
++      spin_unlock_bh(&fq->lock);
+ }
+ static bool fq_vlan_filter_func(struct fq *fq, struct fq_tin *tin,
+@@ -1564,7 +1568,6 @@ static bool ieee80211_queue_skb(struct i
+                               struct sta_info *sta,
+                               struct sk_buff *skb)
+ {
+-      struct fq *fq = &local->fq;
+       struct ieee80211_vif *vif;
+       struct txq_info *txqi;
+@@ -1582,9 +1585,7 @@ static bool ieee80211_queue_skb(struct i
+       if (!txqi)
+               return false;
+-      spin_lock_bh(&fq->lock);
+       ieee80211_txq_enqueue(local, txqi, skb);
+-      spin_unlock_bh(&fq->lock);
+       schedule_and_wake_txq(local, txqi);
+@@ -3198,6 +3199,7 @@ static bool ieee80211_amsdu_aggregate(st
+       u8 max_subframes = sta->sta.max_amsdu_subframes;
+       int max_frags = local->hw.max_tx_fragments;
+       int max_amsdu_len = sta->sta.max_amsdu_len;
++      u32 flow_idx;
+       int orig_truesize;
+       __be16 len;
+       void *data;
+@@ -3220,6 +3222,8 @@ static bool ieee80211_amsdu_aggregate(st
+               max_amsdu_len = min_t(int, max_amsdu_len,
+                                     sta->sta.max_rc_amsdu_len);
++      flow_idx = fq_flow_idx(fq, skb);
++
+       spin_lock_bh(&fq->lock);
+       /* TODO: Ideally aggregation should be done on dequeue to remain
+@@ -3227,7 +3231,8 @@ static bool ieee80211_amsdu_aggregate(st
+        */
+       tin = &txqi->tin;
+-      flow = fq_flow_classify(fq, tin, skb, fq_flow_get_default_func);
++      flow = fq_flow_classify(fq, tin, flow_idx, skb,
++                              fq_flow_get_default_func);
+       head = skb_peek_tail(&flow->queue);
+       if (!head)
+               goto unlock;