mac80211: add fq performace improvements
authorFelix Fietkau <nbd@nbd.name>
Wed, 25 Nov 2020 18:51:31 +0000 (19:51 +0100)
committerFelix Fietkau <nbd@nbd.name>
Sun, 14 Feb 2021 18:41:07 +0000 (19:41 +0100)
Improves performance under load

Signed-off-by: Felix Fietkau <nbd@nbd.name>
package/kernel/mac80211/patches/subsys/310-net-fq_impl-bulk-free-packets-from-a-flow-on-overmem.patch [new file with mode: 0644]
package/kernel/mac80211/patches/subsys/311-net-fq_impl-drop-get_default_func-move-default-flow-.patch [new file with mode: 0644]
package/kernel/mac80211/patches/subsys/312-net-fq_impl-do-not-maintain-a-backlog-sorted-list-of.patch [new file with mode: 0644]
package/kernel/mac80211/patches/subsys/500-mac80211_configure_antenna_gain.patch

diff --git a/package/kernel/mac80211/patches/subsys/310-net-fq_impl-bulk-free-packets-from-a-flow-on-overmem.patch b/package/kernel/mac80211/patches/subsys/310-net-fq_impl-bulk-free-packets-from-a-flow-on-overmem.patch
new file mode 100644 (file)
index 0000000..05a8880
--- /dev/null
@@ -0,0 +1,95 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 25 Nov 2020 18:03:46 +0100
+Subject: [PATCH] net/fq_impl: bulk-free packets from a flow on overmemory
+
+This is similar to what sch_fq_codel does. It also amortizes the worst
+case cost of a follow-up patch that changes the selection of the biggest
+flow for dropping packets
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/fq_impl.h
++++ b/include/net/fq_impl.h
+@@ -11,17 +11,25 @@
+ /* functions that are embedded into includer */
++
++static void
++__fq_adjust_removal(struct fq *fq, struct fq_flow *flow, unsigned int packets,
++                  unsigned int bytes, unsigned int truesize)
++{
++      struct fq_tin *tin = flow->tin;
++
++      tin->backlog_bytes -= bytes;
++      tin->backlog_packets -= packets;
++      flow->backlog -= bytes;
++      fq->backlog -= packets;
++      fq->memory_usage -= truesize;
++}
++
+ static void fq_adjust_removal(struct fq *fq,
+                             struct fq_flow *flow,
+                             struct sk_buff *skb)
+ {
+-      struct fq_tin *tin = flow->tin;
+-
+-      tin->backlog_bytes -= skb->len;
+-      tin->backlog_packets--;
+-      flow->backlog -= skb->len;
+-      fq->backlog--;
+-      fq->memory_usage -= skb->truesize;
++      __fq_adjust_removal(fq, flow, 1, skb->len, skb->truesize);
+ }
+ static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow)
+@@ -59,6 +67,34 @@ static struct sk_buff *fq_flow_dequeue(s
+       return skb;
+ }
++static int fq_flow_drop(struct fq *fq, struct fq_flow *flow,
++                      fq_skb_free_t free_func)
++{
++      unsigned int packets = 0, bytes = 0, truesize = 0;
++      struct fq_tin *tin = flow->tin;
++      struct sk_buff *skb;
++      int pending;
++
++      lockdep_assert_held(&fq->lock);
++
++      pending = min_t(int, 32, skb_queue_len(&flow->queue) / 2);
++      do {
++              skb = __skb_dequeue(&flow->queue);
++              if (!skb)
++                      break;
++
++              packets++;
++              bytes += skb->len;
++              truesize += skb->truesize;
++              free_func(fq, tin, flow, skb);
++      } while (packets < pending);
++
++      __fq_adjust_removal(fq, flow, packets, bytes, truesize);
++      fq_rejigger_backlog(fq, flow);
++
++      return packets;
++}
++
+ static struct sk_buff *fq_tin_dequeue(struct fq *fq,
+                                     struct fq_tin *tin,
+                                     fq_tin_dequeue_t dequeue_func)
+@@ -190,12 +226,9 @@ static void fq_tin_enqueue(struct fq *fq
+               if (!flow)
+                       return;
+-              skb = fq_flow_dequeue(fq, flow);
+-              if (!skb)
++              if (!fq_flow_drop(fq, flow, free_func))
+                       return;
+-              free_func(fq, flow->tin, flow, skb);
+-
+               flow->tin->overlimit++;
+               fq->overlimit++;
+               if (oom) {
diff --git a/package/kernel/mac80211/patches/subsys/311-net-fq_impl-drop-get_default_func-move-default-flow-.patch b/package/kernel/mac80211/patches/subsys/311-net-fq_impl-drop-get_default_func-move-default-flow-.patch
new file mode 100644 (file)
index 0000000..de1f1bb
--- /dev/null
@@ -0,0 +1,144 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 25 Nov 2020 18:09:10 +0100
+Subject: [PATCH] net/fq_impl: drop get_default_func, move default flow to
+ fq_tin
+
+Simplifies the code and prepares for a rework of scanning for flows on
+overmemory drop.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/fq.h
++++ b/include/net/fq.h
+@@ -47,6 +47,7 @@ struct fq_flow {
+ struct fq_tin {
+       struct list_head new_flows;
+       struct list_head old_flows;
++      struct fq_flow default_flow;
+       u32 backlog_bytes;
+       u32 backlog_packets;
+       u32 overlimit;
+--- a/include/net/fq_impl.h
++++ b/include/net/fq_impl.h
+@@ -151,8 +151,7 @@ static u32 fq_flow_idx(struct fq *fq, st
+ static struct fq_flow *fq_flow_classify(struct fq *fq,
+                                       struct fq_tin *tin, u32 idx,
+-                                      struct sk_buff *skb,
+-                                      fq_flow_get_default_t get_default_func)
++                                      struct sk_buff *skb)
+ {
+       struct fq_flow *flow;
+@@ -160,7 +159,7 @@ static struct fq_flow *fq_flow_classify(
+       flow = &fq->flows[idx];
+       if (flow->tin && flow->tin != tin) {
+-              flow = get_default_func(fq, tin, idx, skb);
++              flow = &tin->default_flow;
+               tin->collisions++;
+               fq->collisions++;
+       }
+@@ -192,15 +191,14 @@ static void fq_recalc_backlog(struct fq
+ static void fq_tin_enqueue(struct fq *fq,
+                          struct fq_tin *tin, u32 idx,
+                          struct sk_buff *skb,
+-                         fq_skb_free_t free_func,
+-                         fq_flow_get_default_t get_default_func)
++                         fq_skb_free_t free_func)
+ {
+       struct fq_flow *flow;
+       bool oom;
+       lockdep_assert_held(&fq->lock);
+-      flow = fq_flow_classify(fq, tin, idx, skb, get_default_func);
++      flow = fq_flow_classify(fq, tin, idx, skb);
+       flow->tin = tin;
+       flow->backlog += skb->len;
+@@ -331,6 +329,7 @@ static void fq_tin_init(struct fq_tin *t
+ {
+       INIT_LIST_HEAD(&tin->new_flows);
+       INIT_LIST_HEAD(&tin->old_flows);
++      fq_flow_init(&tin->default_flow);
+ }
+ static int fq_init(struct fq *fq, int flows_cnt)
+--- a/net/mac80211/ieee80211_i.h
++++ b/net/mac80211/ieee80211_i.h
+@@ -855,7 +855,6 @@ enum txq_info_flags {
+  */
+ struct txq_info {
+       struct fq_tin tin;
+-      struct fq_flow def_flow;
+       struct codel_vars def_cvars;
+       struct codel_stats cstats;
+       struct sk_buff_head frags;
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -1322,7 +1322,7 @@ static struct sk_buff *codel_dequeue_fun
+       fq = &local->fq;
+       if (cvars == &txqi->def_cvars)
+-              flow = &txqi->def_flow;
++              flow = &txqi->tin.default_flow;
+       else
+               flow = &fq->flows[cvars - local->cvars];
+@@ -1365,7 +1365,7 @@ static struct sk_buff *fq_tin_dequeue_fu
+               cparams = &local->cparams;
+       }
+-      if (flow == &txqi->def_flow)
++      if (flow == &tin->default_flow)
+               cvars = &txqi->def_cvars;
+       else
+               cvars = &local->cvars[flow - fq->flows];
+@@ -1392,17 +1392,6 @@ static void fq_skb_free_func(struct fq *
+       ieee80211_free_txskb(&local->hw, skb);
+ }
+-static struct fq_flow *fq_flow_get_default_func(struct fq *fq,
+-                                              struct fq_tin *tin,
+-                                              int idx,
+-                                              struct sk_buff *skb)
+-{
+-      struct txq_info *txqi;
+-
+-      txqi = container_of(tin, struct txq_info, tin);
+-      return &txqi->def_flow;
+-}
+-
+ static void ieee80211_txq_enqueue(struct ieee80211_local *local,
+                                 struct txq_info *txqi,
+                                 struct sk_buff *skb)
+@@ -1415,8 +1404,7 @@ static void ieee80211_txq_enqueue(struct
+       spin_lock_bh(&fq->lock);
+       fq_tin_enqueue(fq, tin, flow_idx, skb,
+-                     fq_skb_free_func,
+-                     fq_flow_get_default_func);
++                     fq_skb_free_func);
+       spin_unlock_bh(&fq->lock);
+ }
+@@ -1459,7 +1447,6 @@ void ieee80211_txq_init(struct ieee80211
+                       struct txq_info *txqi, int tid)
+ {
+       fq_tin_init(&txqi->tin);
+-      fq_flow_init(&txqi->def_flow);
+       codel_vars_init(&txqi->def_cvars);
+       codel_stats_init(&txqi->cstats);
+       __skb_queue_head_init(&txqi->frags);
+@@ -3310,8 +3297,7 @@ static bool ieee80211_amsdu_aggregate(st
+        */
+       tin = &txqi->tin;
+-      flow = fq_flow_classify(fq, tin, flow_idx, skb,
+-                              fq_flow_get_default_func);
++      flow = fq_flow_classify(fq, tin, flow_idx, skb);
+       head = skb_peek_tail(&flow->queue);
+       if (!head || skb_is_gso(head))
+               goto out;
diff --git a/package/kernel/mac80211/patches/subsys/312-net-fq_impl-do-not-maintain-a-backlog-sorted-list-of.patch b/package/kernel/mac80211/patches/subsys/312-net-fq_impl-do-not-maintain-a-backlog-sorted-list-of.patch
new file mode 100644 (file)
index 0000000..0e92399
--- /dev/null
@@ -0,0 +1,314 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 25 Nov 2020 18:10:34 +0100
+Subject: [PATCH] net/fq_impl: do not maintain a backlog-sorted list of
+ flows
+
+A sorted flow list is only needed to drop packets in the biggest flow when
+hitting the overmemory condition.
+By scanning flows only when needed, we can avoid paying the cost of
+maintaining the list under normal conditions
+In order to avoid scanning lots of empty flows and touching too many cold
+cache lines, a bitmap of flows with backlog is maintained
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/fq.h
++++ b/include/net/fq.h
+@@ -19,8 +19,6 @@ struct fq_tin;
+  * @flowchain: can be linked to fq_tin's new_flows or old_flows. Used for DRR++
+  *    (deficit round robin) based round robin queuing similar to the one
+  *    found in net/sched/sch_fq_codel.c
+- * @backlogchain: can be linked to other fq_flow and fq. Used to keep track of
+- *    fat flows and efficient head-dropping if packet limit is reached
+  * @queue: sk_buff queue to hold packets
+  * @backlog: number of bytes pending in the queue. The number of packets can be
+  *    found in @queue.qlen
+@@ -29,7 +27,6 @@ struct fq_tin;
+ struct fq_flow {
+       struct fq_tin *tin;
+       struct list_head flowchain;
+-      struct list_head backlogchain;
+       struct sk_buff_head queue;
+       u32 backlog;
+       int deficit;
+@@ -47,6 +44,7 @@ struct fq_flow {
+ struct fq_tin {
+       struct list_head new_flows;
+       struct list_head old_flows;
++      struct list_head tin_list;
+       struct fq_flow default_flow;
+       u32 backlog_bytes;
+       u32 backlog_packets;
+@@ -60,14 +58,14 @@ struct fq_tin {
+ /**
+  * struct fq - main container for fair queuing purposes
+  *
+- * @backlogs: linked to fq_flows. Used to maintain fat flows for efficient
+- *    head-dropping when @backlog reaches @limit
+  * @limit: max number of packets that can be queued across all flows
+  * @backlog: number of packets queued across all flows
+  */
+ struct fq {
+       struct fq_flow *flows;
+-      struct list_head backlogs;
++      unsigned long *flows_bitmap;
++
++      struct list_head tin_backlog;
+       spinlock_t lock;
+       u32 flows_cnt;
+       u32 limit;
+--- a/include/net/fq_impl.h
++++ b/include/net/fq_impl.h
+@@ -17,12 +17,24 @@ __fq_adjust_removal(struct fq *fq, struc
+                   unsigned int bytes, unsigned int truesize)
+ {
+       struct fq_tin *tin = flow->tin;
++      int idx;
+       tin->backlog_bytes -= bytes;
+       tin->backlog_packets -= packets;
+       flow->backlog -= bytes;
+       fq->backlog -= packets;
+       fq->memory_usage -= truesize;
++
++      if (flow->backlog)
++              return;
++
++      if (flow == &tin->default_flow) {
++              list_del_init(&tin->tin_list);
++              return;
++      }
++
++      idx = flow - fq->flows;
++      __clear_bit(idx, fq->flows_bitmap);
+ }
+ static void fq_adjust_removal(struct fq *fq,
+@@ -32,24 +44,6 @@ static void fq_adjust_removal(struct fq
+       __fq_adjust_removal(fq, flow, 1, skb->len, skb->truesize);
+ }
+-static void fq_rejigger_backlog(struct fq *fq, struct fq_flow *flow)
+-{
+-      struct fq_flow *i;
+-
+-      if (flow->backlog == 0) {
+-              list_del_init(&flow->backlogchain);
+-      } else {
+-              i = flow;
+-
+-              list_for_each_entry_continue(i, &fq->backlogs, backlogchain)
+-                      if (i->backlog < flow->backlog)
+-                              break;
+-
+-              list_move_tail(&flow->backlogchain,
+-                             &i->backlogchain);
+-      }
+-}
+-
+ static struct sk_buff *fq_flow_dequeue(struct fq *fq,
+                                      struct fq_flow *flow)
+ {
+@@ -62,7 +56,6 @@ static struct sk_buff *fq_flow_dequeue(s
+               return NULL;
+       fq_adjust_removal(fq, flow, skb);
+-      fq_rejigger_backlog(fq, flow);
+       return skb;
+ }
+@@ -90,7 +83,6 @@ static int fq_flow_drop(struct fq *fq, s
+       } while (packets < pending);
+       __fq_adjust_removal(fq, flow, packets, bytes, truesize);
+-      fq_rejigger_backlog(fq, flow);
+       return packets;
+ }
+@@ -170,22 +162,36 @@ static struct fq_flow *fq_flow_classify(
+       return flow;
+ }
+-static void fq_recalc_backlog(struct fq *fq,
+-                            struct fq_tin *tin,
+-                            struct fq_flow *flow)
++static struct fq_flow *fq_find_fattest_flow(struct fq *fq)
+ {
+-      struct fq_flow *i;
++      struct fq_tin *tin;
++      struct fq_flow *flow = NULL;
++      u32 len = 0;
++      int i;
+-      if (list_empty(&flow->backlogchain))
+-              list_add_tail(&flow->backlogchain, &fq->backlogs);
++      for_each_set_bit(i, fq->flows_bitmap, fq->flows_cnt) {
++              struct fq_flow *cur = &fq->flows[i];
++              unsigned int cur_len;
+-      i = flow;
+-      list_for_each_entry_continue_reverse(i, &fq->backlogs,
+-                                           backlogchain)
+-              if (i->backlog > flow->backlog)
+-                      break;
++              cur_len = cur->backlog;
++              if (cur_len <= len)
++                      continue;
+-      list_move(&flow->backlogchain, &i->backlogchain);
++              flow = cur;
++              len = cur_len;
++      }
++
++      list_for_each_entry(tin, &fq->tin_backlog, tin_list) {
++              unsigned int cur_len = tin->default_flow.backlog;
++
++              if (cur_len <= len)
++                      continue;
++
++              flow = &tin->default_flow;
++              len = cur_len;
++      }
++
++      return flow;
+ }
+ static void fq_tin_enqueue(struct fq *fq,
+@@ -200,6 +206,13 @@ static void fq_tin_enqueue(struct fq *fq
+       flow = fq_flow_classify(fq, tin, idx, skb);
++      if (!flow->backlog) {
++              if (flow != &tin->default_flow)
++                      __set_bit(idx, fq->flows_bitmap);
++              else if (list_empty(&tin->tin_list))
++                      list_add(&tin->tin_list, &fq->tin_backlog);
++      }
++
+       flow->tin = tin;
+       flow->backlog += skb->len;
+       tin->backlog_bytes += skb->len;
+@@ -207,8 +220,6 @@ static void fq_tin_enqueue(struct fq *fq
+       fq->memory_usage += skb->truesize;
+       fq->backlog++;
+-      fq_recalc_backlog(fq, tin, flow);
+-
+       if (list_empty(&flow->flowchain)) {
+               flow->deficit = fq->quantum;
+               list_add_tail(&flow->flowchain,
+@@ -218,9 +229,7 @@ static void fq_tin_enqueue(struct fq *fq
+       __skb_queue_tail(&flow->queue, skb);
+       oom = (fq->memory_usage > fq->memory_limit);
+       while (fq->backlog > fq->limit || oom) {
+-              flow = list_first_entry_or_null(&fq->backlogs,
+-                                              struct fq_flow,
+-                                              backlogchain);
++              flow = fq_find_fattest_flow(fq);
+               if (!flow)
+                       return;
+@@ -255,8 +264,6 @@ static void fq_flow_filter(struct fq *fq
+               fq_adjust_removal(fq, flow, skb);
+               free_func(fq, tin, flow, skb);
+       }
+-
+-      fq_rejigger_backlog(fq, flow);
+ }
+ static void fq_tin_filter(struct fq *fq,
+@@ -279,16 +286,18 @@ static void fq_flow_reset(struct fq *fq,
+                         struct fq_flow *flow,
+                         fq_skb_free_t free_func)
+ {
++      struct fq_tin *tin = flow->tin;
+       struct sk_buff *skb;
+       while ((skb = fq_flow_dequeue(fq, flow)))
+-              free_func(fq, flow->tin, flow, skb);
++              free_func(fq, tin, flow, skb);
+-      if (!list_empty(&flow->flowchain))
++      if (!list_empty(&flow->flowchain)) {
+               list_del_init(&flow->flowchain);
+-
+-      if (!list_empty(&flow->backlogchain))
+-              list_del_init(&flow->backlogchain);
++              if (list_empty(&tin->new_flows) &&
++                  list_empty(&tin->old_flows))
++                      list_del_init(&tin->tin_list);
++      }
+       flow->tin = NULL;
+@@ -314,6 +323,7 @@ static void fq_tin_reset(struct fq *fq,
+               fq_flow_reset(fq, flow, free_func);
+       }
++      WARN_ON_ONCE(!list_empty(&tin->tin_list));
+       WARN_ON_ONCE(tin->backlog_bytes);
+       WARN_ON_ONCE(tin->backlog_packets);
+ }
+@@ -321,7 +331,6 @@ static void fq_tin_reset(struct fq *fq,
+ static void fq_flow_init(struct fq_flow *flow)
+ {
+       INIT_LIST_HEAD(&flow->flowchain);
+-      INIT_LIST_HEAD(&flow->backlogchain);
+       __skb_queue_head_init(&flow->queue);
+ }
+@@ -329,6 +338,7 @@ static void fq_tin_init(struct fq_tin *t
+ {
+       INIT_LIST_HEAD(&tin->new_flows);
+       INIT_LIST_HEAD(&tin->old_flows);
++      INIT_LIST_HEAD(&tin->tin_list);
+       fq_flow_init(&tin->default_flow);
+ }
+@@ -337,8 +347,8 @@ static int fq_init(struct fq *fq, int fl
+       int i;
+       memset(fq, 0, sizeof(fq[0]));
+-      INIT_LIST_HEAD(&fq->backlogs);
+       spin_lock_init(&fq->lock);
++      INIT_LIST_HEAD(&fq->tin_backlog);
+       fq->flows_cnt = max_t(u32, flows_cnt, 1);
+       fq->quantum = 300;
+       fq->limit = 8192;
+@@ -348,6 +358,14 @@ static int fq_init(struct fq *fq, int fl
+       if (!fq->flows)
+               return -ENOMEM;
++      fq->flows_bitmap = kcalloc(BITS_TO_LONGS(fq->flows_cnt), sizeof(long),
++                                 GFP_KERNEL);
++      if (!fq->flows_bitmap) {
++              kvfree(fq->flows);
++              fq->flows = NULL;
++              return -ENOMEM;
++      }
++
+       for (i = 0; i < fq->flows_cnt; i++)
+               fq_flow_init(&fq->flows[i]);
+@@ -364,6 +382,9 @@ static void fq_reset(struct fq *fq,
+       kvfree(fq->flows);
+       fq->flows = NULL;
++
++      kfree(fq->flows_bitmap);
++      fq->flows_bitmap = NULL;
+ }
+ #endif
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -3364,8 +3364,6 @@ out_recalc:
+       if (head->len != orig_len) {
+               flow->backlog += head->len - orig_len;
+               tin->backlog_bytes += head->len - orig_len;
+-
+-              fq_recalc_backlog(fq, tin, flow);
+       }
+ out:
+       spin_unlock_bh(&fq->lock);
index d76fc88..cd2a3b4 100644 (file)
@@ -87,7 +87,7 @@
        CFG80211_TESTMODE_CMD(ieee80211_testmode_cmd)
 --- a/net/mac80211/ieee80211_i.h
 +++ b/net/mac80211/ieee80211_i.h
-@@ -1401,6 +1401,7 @@ struct ieee80211_local {
+@@ -1400,6 +1400,7 @@ struct ieee80211_local {
        int dynamic_ps_forced_timeout;
  
        int user_power_level; /* in dBm, for all interfaces */