kernel: cake: renumber backport patches
authorKevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Thu, 25 Jun 2020 12:39:54 +0000 (13:39 +0100)
committerKevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
Tue, 30 Jun 2020 08:34:06 +0000 (09:34 +0100)
Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
12 files changed:
target/linux/generic/backport-4.19/390-v5.1-sch_cake-Make-the-dual-modes-fairer.patch [new file with mode: 0644]
target/linux/generic/backport-4.19/391-v5.1-sch_cake-Permit-use-of-connmarks-as-tin-classifiers.patch [new file with mode: 0644]
target/linux/generic/backport-4.19/392-v5.1-sch_cake-Interpret-fwmark-parameter-as-a-bitmask.patch [new file with mode: 0644]
target/linux/generic/backport-4.19/392-v5.1-sch_cake-Make-the-dual-modes-fairer.patch [deleted file]
target/linux/generic/backport-4.19/393-v5.1-sch_cake-Permit-use-of-connmarks-as-tin-classifiers.patch [deleted file]
target/linux/generic/backport-4.19/393-v5.4-sch_cake-drop-unused-variable-tin_quantum_prio.patch [new file with mode: 0644]
target/linux/generic/backport-4.19/394-v5.1-sch_cake-Interpret-fwmark-parameter-as-a-bitmask.patch [deleted file]
target/linux/generic/backport-4.19/394-v5.4-sch_cake-Add-missing-NLA-policy-entry-TCA_CAKE_SPLIT.patch [new file with mode: 0644]
target/linux/generic/backport-4.19/395-v5.4-sch_cake-drop-unused-variable-tin_quantum_prio.patch [deleted file]
target/linux/generic/backport-4.19/396-v5.4-sch_cake-Add-missing-NLA-policy-entry-TCA_CAKE_SPLIT.patch [deleted file]
target/linux/generic/backport-5.4/395-v5.8-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch [new file with mode: 0644]
target/linux/generic/backport-5.4/398-v5.9-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch [deleted file]

diff --git a/target/linux/generic/backport-4.19/390-v5.1-sch_cake-Make-the-dual-modes-fairer.patch b/target/linux/generic/backport-4.19/390-v5.1-sch_cake-Make-the-dual-modes-fairer.patch
new file mode 100644 (file)
index 0000000..cd94600
--- /dev/null
@@ -0,0 +1,217 @@
+From 712639929912c5eefb09facccb48d55b3f72c9f8 Mon Sep 17 00:00:00 2001
+From: George Amanakis <gamanakis@gmail.com>
+Date: Fri, 1 Mar 2019 16:04:05 +0100
+Subject: [PATCH] sch_cake: Make the dual modes fairer
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+CAKE host fairness does not work well with TCP flows in dual-srchost and
+dual-dsthost setup. The reason is that ACKs generated by TCP flows are
+classified as sparse flows, and affect flow isolation from other hosts. Fix
+this by calculating host_load based only on the bulk flows a host
+generates. In a hash collision the host_bulk_flow_count values must be
+decremented on the old hosts and incremented on the new ones *if* the queue
+is in the bulk set.
+
+Reported-by: Pete Heist <peteheist@gmail.com>
+Signed-off-by: George Amanakis <gamanakis@gmail.com>
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+---
+ net/sched/sch_cake.c | 92 ++++++++++++++++++++++++++++++--------------
+ 1 file changed, 63 insertions(+), 29 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -138,8 +138,8 @@ struct cake_flow {
+ struct cake_host {
+       u32 srchost_tag;
+       u32 dsthost_tag;
+-      u16 srchost_refcnt;
+-      u16 dsthost_refcnt;
++      u16 srchost_bulk_flow_count;
++      u16 dsthost_bulk_flow_count;
+ };
+ struct cake_heap_entry {
+@@ -746,8 +746,10 @@ skip_hash:
+                * queue, accept the collision, update the host tags.
+                */
+               q->way_collisions++;
+-              q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
+-              q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
++              if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
++                      q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
++                      q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
++              }
+               allocate_src = cake_dsrc(flow_mode);
+               allocate_dst = cake_ddst(flow_mode);
+ found:
+@@ -767,13 +769,14 @@ found:
+                       }
+                       for (i = 0; i < CAKE_SET_WAYS;
+                               i++, k = (k + 1) % CAKE_SET_WAYS) {
+-                              if (!q->hosts[outer_hash + k].srchost_refcnt)
++                              if (!q->hosts[outer_hash + k].srchost_bulk_flow_count)
+                                       break;
+                       }
+                       q->hosts[outer_hash + k].srchost_tag = srchost_hash;
+ found_src:
+                       srchost_idx = outer_hash + k;
+-                      q->hosts[srchost_idx].srchost_refcnt++;
++                      if (q->flows[reduced_hash].set == CAKE_SET_BULK)
++                              q->hosts[srchost_idx].srchost_bulk_flow_count++;
+                       q->flows[reduced_hash].srchost = srchost_idx;
+               }
+@@ -789,13 +792,14 @@ found_src:
+                       }
+                       for (i = 0; i < CAKE_SET_WAYS;
+                            i++, k = (k + 1) % CAKE_SET_WAYS) {
+-                              if (!q->hosts[outer_hash + k].dsthost_refcnt)
++                              if (!q->hosts[outer_hash + k].dsthost_bulk_flow_count)
+                                       break;
+                       }
+                       q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
+ found_dst:
+                       dsthost_idx = outer_hash + k;
+-                      q->hosts[dsthost_idx].dsthost_refcnt++;
++                      if (q->flows[reduced_hash].set == CAKE_SET_BULK)
++                              q->hosts[dsthost_idx].dsthost_bulk_flow_count++;
+                       q->flows[reduced_hash].dsthost = dsthost_idx;
+               }
+       }
+@@ -1793,20 +1797,30 @@ static s32 cake_enqueue(struct sk_buff *
+               b->sparse_flow_count++;
+               if (cake_dsrc(q->flow_mode))
+-                      host_load = max(host_load, srchost->srchost_refcnt);
++                      host_load = max(host_load, srchost->srchost_bulk_flow_count);
+               if (cake_ddst(q->flow_mode))
+-                      host_load = max(host_load, dsthost->dsthost_refcnt);
++                      host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
+               flow->deficit = (b->flow_quantum *
+                                quantum_div[host_load]) >> 16;
+       } else if (flow->set == CAKE_SET_SPARSE_WAIT) {
++              struct cake_host *srchost = &b->hosts[flow->srchost];
++              struct cake_host *dsthost = &b->hosts[flow->dsthost];
++
+               /* this flow was empty, accounted as a sparse flow, but actually
+                * in the bulk rotation.
+                */
+               flow->set = CAKE_SET_BULK;
+               b->sparse_flow_count--;
+               b->bulk_flow_count++;
++
++              if (cake_dsrc(q->flow_mode))
++                      srchost->srchost_bulk_flow_count++;
++
++              if (cake_ddst(q->flow_mode))
++                      dsthost->dsthost_bulk_flow_count++;
++
+       }
+       if (q->buffer_used > q->buffer_max_used)
+@@ -1974,23 +1988,8 @@ retry:
+       dsthost = &b->hosts[flow->dsthost];
+       host_load = 1;
+-      if (cake_dsrc(q->flow_mode))
+-              host_load = max(host_load, srchost->srchost_refcnt);
+-
+-      if (cake_ddst(q->flow_mode))
+-              host_load = max(host_load, dsthost->dsthost_refcnt);
+-
+-      WARN_ON(host_load > CAKE_QUEUES);
+-
+       /* flow isolation (DRR++) */
+       if (flow->deficit <= 0) {
+-              /* The shifted prandom_u32() is a way to apply dithering to
+-               * avoid accumulating roundoff errors
+-               */
+-              flow->deficit += (b->flow_quantum * quantum_div[host_load] +
+-                                (prandom_u32() >> 16)) >> 16;
+-              list_move_tail(&flow->flowchain, &b->old_flows);
+-
+               /* Keep all flows with deficits out of the sparse and decaying
+                * rotations.  No non-empty flow can go into the decaying
+                * rotation, so they can't get deficits
+@@ -1999,6 +1998,13 @@ retry:
+                       if (flow->head) {
+                               b->sparse_flow_count--;
+                               b->bulk_flow_count++;
++
++                              if (cake_dsrc(q->flow_mode))
++                                      srchost->srchost_bulk_flow_count++;
++
++                              if (cake_ddst(q->flow_mode))
++                                      dsthost->dsthost_bulk_flow_count++;
++
+                               flow->set = CAKE_SET_BULK;
+                       } else {
+                               /* we've moved it to the bulk rotation for
+@@ -2008,6 +2014,22 @@ retry:
+                               flow->set = CAKE_SET_SPARSE_WAIT;
+                       }
+               }
++
++              if (cake_dsrc(q->flow_mode))
++                      host_load = max(host_load, srchost->srchost_bulk_flow_count);
++
++              if (cake_ddst(q->flow_mode))
++                      host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
++
++              WARN_ON(host_load > CAKE_QUEUES);
++
++              /* The shifted prandom_u32() is a way to apply dithering to
++               * avoid accumulating roundoff errors
++               */
++              flow->deficit += (b->flow_quantum * quantum_div[host_load] +
++                                (prandom_u32() >> 16)) >> 16;
++              list_move_tail(&flow->flowchain, &b->old_flows);
++
+               goto retry;
+       }
+@@ -2028,6 +2050,13 @@ retry:
+                                              &b->decaying_flows);
+                               if (flow->set == CAKE_SET_BULK) {
+                                       b->bulk_flow_count--;
++
++                                      if (cake_dsrc(q->flow_mode))
++                                              srchost->srchost_bulk_flow_count--;
++
++                                      if (cake_ddst(q->flow_mode))
++                                              dsthost->dsthost_bulk_flow_count--;
++
+                                       b->decaying_flow_count++;
+                               } else if (flow->set == CAKE_SET_SPARSE ||
+                                          flow->set == CAKE_SET_SPARSE_WAIT) {
+@@ -2041,14 +2070,19 @@ retry:
+                               if (flow->set == CAKE_SET_SPARSE ||
+                                   flow->set == CAKE_SET_SPARSE_WAIT)
+                                       b->sparse_flow_count--;
+-                              else if (flow->set == CAKE_SET_BULK)
++                              else if (flow->set == CAKE_SET_BULK) {
+                                       b->bulk_flow_count--;
+-                              else
++
++                                      if (cake_dsrc(q->flow_mode))
++                                              srchost->srchost_bulk_flow_count--;
++
++                                      if (cake_ddst(q->flow_mode))
++                                              dsthost->dsthost_bulk_flow_count--;
++
++                              } else
+                                       b->decaying_flow_count--;
+                               flow->set = CAKE_SET_NONE;
+-                              srchost->srchost_refcnt--;
+-                              dsthost->dsthost_refcnt--;
+                       }
+                       goto begin;
+               }
diff --git a/target/linux/generic/backport-4.19/391-v5.1-sch_cake-Permit-use-of-connmarks-as-tin-classifiers.patch b/target/linux/generic/backport-4.19/391-v5.1-sch_cake-Permit-use-of-connmarks-as-tin-classifiers.patch
new file mode 100644 (file)
index 0000000..9ac1388
--- /dev/null
@@ -0,0 +1,118 @@
+From 0b5c7efdfc6e389ec6840579fe90bdb6f42b08dc Mon Sep 17 00:00:00 2001
+From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+Date: Fri, 1 Mar 2019 16:04:05 +0100
+Subject: [PATCH] sch_cake: Permit use of connmarks as tin classifiers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Add flag 'FWMARK' to enable use of firewall connmarks as tin selector.
+The connmark (skbuff->mark) needs to be in the range 1->tin_cnt ie.
+for diffserv3 the mark needs to be 1->3.
+
+Background
+
+Typically CAKE uses DSCP as the basis for tin selection.  DSCP values
+are relatively easily changed as part of the egress path, usually with
+iptables & the mangle table, ingress is more challenging.  CAKE is often
+used on the WAN interface of a residential gateway where passthrough of
+DSCP from the ISP is either missing or set to unhelpful values thus use
+of ingress DSCP values for tin selection isn't helpful in that
+environment.
+
+An approach to solving the ingress tin selection problem is to use
+CAKE's understanding of tc filters.  Naive tc filters could match on
+source/destination port numbers and force tin selection that way, but
+multiple filters don't scale particularly well as each filter must be
+traversed whether it matches or not. e.g. a simple example to map 3
+firewall marks to tins:
+
+MAJOR=$( tc qdisc show dev $DEV | head -1 | awk '{print $3}' )
+tc filter add dev $DEV parent $MAJOR protocol all handle 0x01 fw action skbedit priority ${MAJOR}1
+tc filter add dev $DEV parent $MAJOR protocol all handle 0x02 fw action skbedit priority ${MAJOR}2
+tc filter add dev $DEV parent $MAJOR protocol all handle 0x03 fw action skbedit priority ${MAJOR}3
+
+Another option is to use eBPF cls_act with tc filters e.g.
+
+MAJOR=$( tc qdisc show dev $DEV | head -1 | awk '{print $3}' )
+tc filter add dev $DEV parent $MAJOR bpf da obj my-bpf-fwmark-to-class.o
+
+This has the disadvantages of a) needing someone to write & maintain
+the bpf program, b) a bpf toolchain to compile it and c) needing to
+hardcode the major number in the bpf program so it matches the cake
+instance (or forcing the cake instance to a particular major number)
+since the major number cannot be passed to the bpf program via tc
+command line.
+
+As already hinted at by the previous examples, it would be helpful
+to associate tins with something that survives the Internet path and
+ideally allows tin selection on both egress and ingress.  Netfilter's
+conntrack permits setting an identifying mark on a connection which
+can also be restored to an ingress packet with tc action connmark e.g.
+
+tc filter add dev eth0 parent ffff: protocol all prio 10 u32 \
+       match u32 0 0 flowid 1:1 action connmark action mirred egress redirect dev ifb1
+
+Since tc's connmark action has restored any connmark into skb->mark,
+any of the previous solutions are based upon it and in one form or
+another copy that mark to the skb->priority field where again CAKE
+picks this up.
+
+This change cuts out at least one of the (less intuitive &
+non-scalable) middlemen and permit direct access to skb->mark.
+
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ include/uapi/linux/pkt_sched.h |  1 +
+ net/sched/sch_cake.c           | 34 +++++++++++++++++++++++++++-------
+ 2 files changed, 28 insertions(+), 7 deletions(-)
+
+--- a/include/uapi/linux/pkt_sched.h
++++ b/include/uapi/linux/pkt_sched.h
+@@ -991,6 +991,7 @@ enum {
+       TCA_CAKE_INGRESS,
+       TCA_CAKE_ACK_FILTER,
+       TCA_CAKE_SPLIT_GSO,
++      TCA_CAKE_FWMARK,
+       __TCA_CAKE_MAX
+ };
+ #define TCA_CAKE_MAX  (__TCA_CAKE_MAX - 1)
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -258,7 +258,8 @@ enum {
+       CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
+       CAKE_FLAG_INGRESS          = BIT(2),
+       CAKE_FLAG_WASH             = BIT(3),
+-      CAKE_FLAG_SPLIT_GSO        = BIT(4)
++      CAKE_FLAG_SPLIT_GSO        = BIT(4),
++      CAKE_FLAG_FWMARK           = BIT(5)
+ };
+ /* COBALT operates the Codel and BLUE algorithms in parallel, in order to
+@@ -2623,6 +2624,13 @@ static int cake_change(struct Qdisc *sch
+                       q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
+       }
++      if (tb[TCA_CAKE_FWMARK]) {
++              if (!!nla_get_u32(tb[TCA_CAKE_FWMARK]))
++                      q->rate_flags |= CAKE_FLAG_FWMARK;
++              else
++                      q->rate_flags &= ~CAKE_FLAG_FWMARK;
++      }
++
+       if (q->tins) {
+               sch_tree_lock(sch);
+               cake_reconfigure(sch);
+@@ -2782,6 +2790,10 @@ static int cake_dump(struct Qdisc *sch,
+                       !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
+               goto nla_put_failure;
++      if (nla_put_u32(skb, TCA_CAKE_FWMARK,
++                      !!(q->rate_flags & CAKE_FLAG_FWMARK)))
++              goto nla_put_failure;
++
+       return nla_nest_end(skb, opts);
+ nla_put_failure:
diff --git a/target/linux/generic/backport-4.19/392-v5.1-sch_cake-Interpret-fwmark-parameter-as-a-bitmask.patch b/target/linux/generic/backport-4.19/392-v5.1-sch_cake-Interpret-fwmark-parameter-as-a-bitmask.patch
new file mode 100644 (file)
index 0000000..325f571
--- /dev/null
@@ -0,0 +1,102 @@
+From eab2fc822af38f31fd5f4e731b5d10b94904d919 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
+Date: Thu, 14 Mar 2019 23:08:22 +0100
+Subject: [PATCH] sch_cake: Interpret fwmark parameter as a bitmask
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+We initially interpreted the fwmark parameter as a flag that simply turned
+on the feature, using the whole skb->mark field as the index into the CAKE
+tin_order array. However, it is quite common for different applications to
+use different parts of the mask field for their own purposes, each using a
+different mask.
+
+Support this use of subsets of the mark by interpreting the TCA_CAKE_FWMARK
+parameter as a bitmask to apply to the fwmark field when reading it. The
+result will be right-shifted by the number of unset lower bits of the mask
+before looking up the tin.
+
+In the original commit message we also failed to credit Felix Resch with
+originally suggesting the fwmark feature back in 2017; so the Suggested-By
+in this commit covers the whole fwmark feature.
+
+Fixes: 0b5c7efdfc6e ("sch_cake: Permit use of connmarks as tin classifiers")
+Suggested-by: Felix Resch <fuller@beif.de>
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+---
+ net/sched/sch_cake.c | 25 ++++++++++++-------------
+ 1 file changed, 12 insertions(+), 13 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -211,6 +211,9 @@ struct cake_sched_data {
+       u8              ack_filter;
+       u8              atm_mode;
++      u32             fwmark_mask;
++      u16             fwmark_shft;
++
+       /* time_next = time_this + ((len * rate_ns) >> rate_shft) */
+       u16             rate_shft;
+       ktime_t         time_next_packet;
+@@ -258,8 +261,7 @@ enum {
+       CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
+       CAKE_FLAG_INGRESS          = BIT(2),
+       CAKE_FLAG_WASH             = BIT(3),
+-      CAKE_FLAG_SPLIT_GSO        = BIT(4),
+-      CAKE_FLAG_FWMARK           = BIT(5)
++      CAKE_FLAG_SPLIT_GSO        = BIT(4)
+ };
+ /* COBALT operates the Codel and BLUE algorithms in parallel, in order to
+@@ -1554,7 +1556,7 @@ static struct cake_tin_data *cake_select
+                                            struct sk_buff *skb)
+ {
+       struct cake_sched_data *q = qdisc_priv(sch);
+-      u32 tin;
++      u32 tin, mark;
+       u8 dscp;
+       /* Tin selection: Default to diffserv-based selection, allow overriding
+@@ -1562,6 +1564,7 @@ static struct cake_tin_data *cake_select
+        */
+       dscp = cake_handle_diffserv(skb,
+                                   q->rate_flags & CAKE_FLAG_WASH);
++      mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
+       if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
+               tin = 0;
+@@ -2178,6 +2181,7 @@ static const struct nla_policy cake_poli
+       [TCA_CAKE_MPU]           = { .type = NLA_U32 },
+       [TCA_CAKE_INGRESS]       = { .type = NLA_U32 },
+       [TCA_CAKE_ACK_FILTER]    = { .type = NLA_U32 },
++      [TCA_CAKE_FWMARK]        = { .type = NLA_U32 },
+ };
+ static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
+@@ -2625,10 +2629,8 @@ static int cake_change(struct Qdisc *sch
+       }
+       if (tb[TCA_CAKE_FWMARK]) {
+-              if (!!nla_get_u32(tb[TCA_CAKE_FWMARK]))
+-                      q->rate_flags |= CAKE_FLAG_FWMARK;
+-              else
+-                      q->rate_flags &= ~CAKE_FLAG_FWMARK;
++              q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]);
++              q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0;
+       }
+       if (q->tins) {
+@@ -2790,8 +2792,7 @@ static int cake_dump(struct Qdisc *sch,
+                       !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
+               goto nla_put_failure;
+-      if (nla_put_u32(skb, TCA_CAKE_FWMARK,
+-                      !!(q->rate_flags & CAKE_FLAG_FWMARK)))
++      if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask))
+               goto nla_put_failure;
+       return nla_nest_end(skb, opts);
diff --git a/target/linux/generic/backport-4.19/392-v5.1-sch_cake-Make-the-dual-modes-fairer.patch b/target/linux/generic/backport-4.19/392-v5.1-sch_cake-Make-the-dual-modes-fairer.patch
deleted file mode 100644 (file)
index cd94600..0000000
+++ /dev/null
@@ -1,217 +0,0 @@
-From 712639929912c5eefb09facccb48d55b3f72c9f8 Mon Sep 17 00:00:00 2001
-From: George Amanakis <gamanakis@gmail.com>
-Date: Fri, 1 Mar 2019 16:04:05 +0100
-Subject: [PATCH] sch_cake: Make the dual modes fairer
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-CAKE host fairness does not work well with TCP flows in dual-srchost and
-dual-dsthost setup. The reason is that ACKs generated by TCP flows are
-classified as sparse flows, and affect flow isolation from other hosts. Fix
-this by calculating host_load based only on the bulk flows a host
-generates. In a hash collision the host_bulk_flow_count values must be
-decremented on the old hosts and incremented on the new ones *if* the queue
-is in the bulk set.
-
-Reported-by: Pete Heist <peteheist@gmail.com>
-Signed-off-by: George Amanakis <gamanakis@gmail.com>
-Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
----
- net/sched/sch_cake.c | 92 ++++++++++++++++++++++++++++++--------------
- 1 file changed, 63 insertions(+), 29 deletions(-)
-
---- a/net/sched/sch_cake.c
-+++ b/net/sched/sch_cake.c
-@@ -138,8 +138,8 @@ struct cake_flow {
- struct cake_host {
-       u32 srchost_tag;
-       u32 dsthost_tag;
--      u16 srchost_refcnt;
--      u16 dsthost_refcnt;
-+      u16 srchost_bulk_flow_count;
-+      u16 dsthost_bulk_flow_count;
- };
- struct cake_heap_entry {
-@@ -746,8 +746,10 @@ skip_hash:
-                * queue, accept the collision, update the host tags.
-                */
-               q->way_collisions++;
--              q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
--              q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
-+              if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
-+                      q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
-+                      q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
-+              }
-               allocate_src = cake_dsrc(flow_mode);
-               allocate_dst = cake_ddst(flow_mode);
- found:
-@@ -767,13 +769,14 @@ found:
-                       }
-                       for (i = 0; i < CAKE_SET_WAYS;
-                               i++, k = (k + 1) % CAKE_SET_WAYS) {
--                              if (!q->hosts[outer_hash + k].srchost_refcnt)
-+                              if (!q->hosts[outer_hash + k].srchost_bulk_flow_count)
-                                       break;
-                       }
-                       q->hosts[outer_hash + k].srchost_tag = srchost_hash;
- found_src:
-                       srchost_idx = outer_hash + k;
--                      q->hosts[srchost_idx].srchost_refcnt++;
-+                      if (q->flows[reduced_hash].set == CAKE_SET_BULK)
-+                              q->hosts[srchost_idx].srchost_bulk_flow_count++;
-                       q->flows[reduced_hash].srchost = srchost_idx;
-               }
-@@ -789,13 +792,14 @@ found_src:
-                       }
-                       for (i = 0; i < CAKE_SET_WAYS;
-                            i++, k = (k + 1) % CAKE_SET_WAYS) {
--                              if (!q->hosts[outer_hash + k].dsthost_refcnt)
-+                              if (!q->hosts[outer_hash + k].dsthost_bulk_flow_count)
-                                       break;
-                       }
-                       q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
- found_dst:
-                       dsthost_idx = outer_hash + k;
--                      q->hosts[dsthost_idx].dsthost_refcnt++;
-+                      if (q->flows[reduced_hash].set == CAKE_SET_BULK)
-+                              q->hosts[dsthost_idx].dsthost_bulk_flow_count++;
-                       q->flows[reduced_hash].dsthost = dsthost_idx;
-               }
-       }
-@@ -1793,20 +1797,30 @@ static s32 cake_enqueue(struct sk_buff *
-               b->sparse_flow_count++;
-               if (cake_dsrc(q->flow_mode))
--                      host_load = max(host_load, srchost->srchost_refcnt);
-+                      host_load = max(host_load, srchost->srchost_bulk_flow_count);
-               if (cake_ddst(q->flow_mode))
--                      host_load = max(host_load, dsthost->dsthost_refcnt);
-+                      host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
-               flow->deficit = (b->flow_quantum *
-                                quantum_div[host_load]) >> 16;
-       } else if (flow->set == CAKE_SET_SPARSE_WAIT) {
-+              struct cake_host *srchost = &b->hosts[flow->srchost];
-+              struct cake_host *dsthost = &b->hosts[flow->dsthost];
-+
-               /* this flow was empty, accounted as a sparse flow, but actually
-                * in the bulk rotation.
-                */
-               flow->set = CAKE_SET_BULK;
-               b->sparse_flow_count--;
-               b->bulk_flow_count++;
-+
-+              if (cake_dsrc(q->flow_mode))
-+                      srchost->srchost_bulk_flow_count++;
-+
-+              if (cake_ddst(q->flow_mode))
-+                      dsthost->dsthost_bulk_flow_count++;
-+
-       }
-       if (q->buffer_used > q->buffer_max_used)
-@@ -1974,23 +1988,8 @@ retry:
-       dsthost = &b->hosts[flow->dsthost];
-       host_load = 1;
--      if (cake_dsrc(q->flow_mode))
--              host_load = max(host_load, srchost->srchost_refcnt);
--
--      if (cake_ddst(q->flow_mode))
--              host_load = max(host_load, dsthost->dsthost_refcnt);
--
--      WARN_ON(host_load > CAKE_QUEUES);
--
-       /* flow isolation (DRR++) */
-       if (flow->deficit <= 0) {
--              /* The shifted prandom_u32() is a way to apply dithering to
--               * avoid accumulating roundoff errors
--               */
--              flow->deficit += (b->flow_quantum * quantum_div[host_load] +
--                                (prandom_u32() >> 16)) >> 16;
--              list_move_tail(&flow->flowchain, &b->old_flows);
--
-               /* Keep all flows with deficits out of the sparse and decaying
-                * rotations.  No non-empty flow can go into the decaying
-                * rotation, so they can't get deficits
-@@ -1999,6 +1998,13 @@ retry:
-                       if (flow->head) {
-                               b->sparse_flow_count--;
-                               b->bulk_flow_count++;
-+
-+                              if (cake_dsrc(q->flow_mode))
-+                                      srchost->srchost_bulk_flow_count++;
-+
-+                              if (cake_ddst(q->flow_mode))
-+                                      dsthost->dsthost_bulk_flow_count++;
-+
-                               flow->set = CAKE_SET_BULK;
-                       } else {
-                               /* we've moved it to the bulk rotation for
-@@ -2008,6 +2014,22 @@ retry:
-                               flow->set = CAKE_SET_SPARSE_WAIT;
-                       }
-               }
-+
-+              if (cake_dsrc(q->flow_mode))
-+                      host_load = max(host_load, srchost->srchost_bulk_flow_count);
-+
-+              if (cake_ddst(q->flow_mode))
-+                      host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
-+
-+              WARN_ON(host_load > CAKE_QUEUES);
-+
-+              /* The shifted prandom_u32() is a way to apply dithering to
-+               * avoid accumulating roundoff errors
-+               */
-+              flow->deficit += (b->flow_quantum * quantum_div[host_load] +
-+                                (prandom_u32() >> 16)) >> 16;
-+              list_move_tail(&flow->flowchain, &b->old_flows);
-+
-               goto retry;
-       }
-@@ -2028,6 +2050,13 @@ retry:
-                                              &b->decaying_flows);
-                               if (flow->set == CAKE_SET_BULK) {
-                                       b->bulk_flow_count--;
-+
-+                                      if (cake_dsrc(q->flow_mode))
-+                                              srchost->srchost_bulk_flow_count--;
-+
-+                                      if (cake_ddst(q->flow_mode))
-+                                              dsthost->dsthost_bulk_flow_count--;
-+
-                                       b->decaying_flow_count++;
-                               } else if (flow->set == CAKE_SET_SPARSE ||
-                                          flow->set == CAKE_SET_SPARSE_WAIT) {
-@@ -2041,14 +2070,19 @@ retry:
-                               if (flow->set == CAKE_SET_SPARSE ||
-                                   flow->set == CAKE_SET_SPARSE_WAIT)
-                                       b->sparse_flow_count--;
--                              else if (flow->set == CAKE_SET_BULK)
-+                              else if (flow->set == CAKE_SET_BULK) {
-                                       b->bulk_flow_count--;
--                              else
-+
-+                                      if (cake_dsrc(q->flow_mode))
-+                                              srchost->srchost_bulk_flow_count--;
-+
-+                                      if (cake_ddst(q->flow_mode))
-+                                              dsthost->dsthost_bulk_flow_count--;
-+
-+                              } else
-                                       b->decaying_flow_count--;
-                               flow->set = CAKE_SET_NONE;
--                              srchost->srchost_refcnt--;
--                              dsthost->dsthost_refcnt--;
-                       }
-                       goto begin;
-               }
diff --git a/target/linux/generic/backport-4.19/393-v5.1-sch_cake-Permit-use-of-connmarks-as-tin-classifiers.patch b/target/linux/generic/backport-4.19/393-v5.1-sch_cake-Permit-use-of-connmarks-as-tin-classifiers.patch
deleted file mode 100644 (file)
index 9ac1388..0000000
+++ /dev/null
@@ -1,118 +0,0 @@
-From 0b5c7efdfc6e389ec6840579fe90bdb6f42b08dc Mon Sep 17 00:00:00 2001
-From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
-Date: Fri, 1 Mar 2019 16:04:05 +0100
-Subject: [PATCH] sch_cake: Permit use of connmarks as tin classifiers
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Add flag 'FWMARK' to enable use of firewall connmarks as tin selector.
-The connmark (skbuff->mark) needs to be in the range 1->tin_cnt ie.
-for diffserv3 the mark needs to be 1->3.
-
-Background
-
-Typically CAKE uses DSCP as the basis for tin selection.  DSCP values
-are relatively easily changed as part of the egress path, usually with
-iptables & the mangle table, ingress is more challenging.  CAKE is often
-used on the WAN interface of a residential gateway where passthrough of
-DSCP from the ISP is either missing or set to unhelpful values thus use
-of ingress DSCP values for tin selection isn't helpful in that
-environment.
-
-An approach to solving the ingress tin selection problem is to use
-CAKE's understanding of tc filters.  Naive tc filters could match on
-source/destination port numbers and force tin selection that way, but
-multiple filters don't scale particularly well as each filter must be
-traversed whether it matches or not. e.g. a simple example to map 3
-firewall marks to tins:
-
-MAJOR=$( tc qdisc show dev $DEV | head -1 | awk '{print $3}' )
-tc filter add dev $DEV parent $MAJOR protocol all handle 0x01 fw action skbedit priority ${MAJOR}1
-tc filter add dev $DEV parent $MAJOR protocol all handle 0x02 fw action skbedit priority ${MAJOR}2
-tc filter add dev $DEV parent $MAJOR protocol all handle 0x03 fw action skbedit priority ${MAJOR}3
-
-Another option is to use eBPF cls_act with tc filters e.g.
-
-MAJOR=$( tc qdisc show dev $DEV | head -1 | awk '{print $3}' )
-tc filter add dev $DEV parent $MAJOR bpf da obj my-bpf-fwmark-to-class.o
-
-This has the disadvantages of a) needing someone to write & maintain
-the bpf program, b) a bpf toolchain to compile it and c) needing to
-hardcode the major number in the bpf program so it matches the cake
-instance (or forcing the cake instance to a particular major number)
-since the major number cannot be passed to the bpf program via tc
-command line.
-
-As already hinted at by the previous examples, it would be helpful
-to associate tins with something that survives the Internet path and
-ideally allows tin selection on both egress and ingress.  Netfilter's
-conntrack permits setting an identifying mark on a connection which
-can also be restored to an ingress packet with tc action connmark e.g.
-
-tc filter add dev eth0 parent ffff: protocol all prio 10 u32 \
-       match u32 0 0 flowid 1:1 action connmark action mirred egress redirect dev ifb1
-
-Since tc's connmark action has restored any connmark into skb->mark,
-any of the previous solutions are based upon it and in one form or
-another copy that mark to the skb->priority field where again CAKE
-picks this up.
-
-This change cuts out at least one of the (less intuitive &
-non-scalable) middlemen and permit direct access to skb->mark.
-
-Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
-Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
----
- include/uapi/linux/pkt_sched.h |  1 +
- net/sched/sch_cake.c           | 34 +++++++++++++++++++++++++++-------
- 2 files changed, 28 insertions(+), 7 deletions(-)
-
---- a/include/uapi/linux/pkt_sched.h
-+++ b/include/uapi/linux/pkt_sched.h
-@@ -991,6 +991,7 @@ enum {
-       TCA_CAKE_INGRESS,
-       TCA_CAKE_ACK_FILTER,
-       TCA_CAKE_SPLIT_GSO,
-+      TCA_CAKE_FWMARK,
-       __TCA_CAKE_MAX
- };
- #define TCA_CAKE_MAX  (__TCA_CAKE_MAX - 1)
---- a/net/sched/sch_cake.c
-+++ b/net/sched/sch_cake.c
-@@ -258,7 +258,8 @@ enum {
-       CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
-       CAKE_FLAG_INGRESS          = BIT(2),
-       CAKE_FLAG_WASH             = BIT(3),
--      CAKE_FLAG_SPLIT_GSO        = BIT(4)
-+      CAKE_FLAG_SPLIT_GSO        = BIT(4),
-+      CAKE_FLAG_FWMARK           = BIT(5)
- };
- /* COBALT operates the Codel and BLUE algorithms in parallel, in order to
-@@ -2623,6 +2624,13 @@ static int cake_change(struct Qdisc *sch
-                       q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
-       }
-+      if (tb[TCA_CAKE_FWMARK]) {
-+              if (!!nla_get_u32(tb[TCA_CAKE_FWMARK]))
-+                      q->rate_flags |= CAKE_FLAG_FWMARK;
-+              else
-+                      q->rate_flags &= ~CAKE_FLAG_FWMARK;
-+      }
-+
-       if (q->tins) {
-               sch_tree_lock(sch);
-               cake_reconfigure(sch);
-@@ -2782,6 +2790,10 @@ static int cake_dump(struct Qdisc *sch,
-                       !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
-               goto nla_put_failure;
-+      if (nla_put_u32(skb, TCA_CAKE_FWMARK,
-+                      !!(q->rate_flags & CAKE_FLAG_FWMARK)))
-+              goto nla_put_failure;
-+
-       return nla_nest_end(skb, opts);
- nla_put_failure:
diff --git a/target/linux/generic/backport-4.19/393-v5.4-sch_cake-drop-unused-variable-tin_quantum_prio.patch b/target/linux/generic/backport-4.19/393-v5.4-sch_cake-drop-unused-variable-tin_quantum_prio.patch
new file mode 100644 (file)
index 0000000..33e5c54
--- /dev/null
@@ -0,0 +1,158 @@
+From d7e1738f0a0b0573ac93cf570ba3df9dee61b68e Mon Sep 17 00:00:00 2001
+From: Kevin 'ldir' Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+Date: Wed, 18 Dec 2019 14:05:13 +0000
+Subject: [PATCH 2/2] sch_cake: drop unused variable tin_quantum_prio
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Turns out tin_quantum_prio isn't used anymore and is a leftover from a
+previous implementation of diffserv tins.  Since the variable isn't used
+in any calculations it can be eliminated.
+
+Drop variable and places where it was set.  Rename remaining variable
+and consolidate naming of intermediate variables that set it.
+
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ net/sched/sch_cake.c | 59 ++++++++++++++------------------------------
+ 1 file changed, 18 insertions(+), 41 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -173,8 +173,7 @@ struct cake_tin_data {
+       u64     tin_rate_bps;
+       u16     tin_rate_shft;
+-      u16     tin_quantum_prio;
+-      u16     tin_quantum_band;
++      u16     tin_quantum;
+       s32     tin_deficit;
+       u32     tin_backlog;
+       u32     tin_dropped;
+@@ -1916,7 +1915,7 @@ begin:
+               while (b->tin_deficit < 0 ||
+                      !(b->sparse_flow_count + b->bulk_flow_count)) {
+                       if (b->tin_deficit <= 0)
+-                              b->tin_deficit += b->tin_quantum_band;
++                              b->tin_deficit += b->tin_quantum;
+                       if (b->sparse_flow_count + b->bulk_flow_count)
+                               empty = false;
+@@ -2237,8 +2236,7 @@ static int cake_config_besteffort(struct
+       cake_set_rate(b, rate, mtu,
+                     us_to_ns(q->target), us_to_ns(q->interval));
+-      b->tin_quantum_band = 65535;
+-      b->tin_quantum_prio = 65535;
++      b->tin_quantum = 65535;
+       return 0;
+ }
+@@ -2249,8 +2247,7 @@ static int cake_config_precedence(struct
+       struct cake_sched_data *q = qdisc_priv(sch);
+       u32 mtu = psched_mtu(qdisc_dev(sch));
+       u64 rate = q->rate_bps;
+-      u32 quantum1 = 256;
+-      u32 quantum2 = 256;
++      u32 quantum = 256;
+       u32 i;
+       q->tin_cnt = 8;
+@@ -2263,18 +2260,14 @@ static int cake_config_precedence(struct
+               cake_set_rate(b, rate, mtu, us_to_ns(q->target),
+                             us_to_ns(q->interval));
+-              b->tin_quantum_prio = max_t(u16, 1U, quantum1);
+-              b->tin_quantum_band = max_t(u16, 1U, quantum2);
++              b->tin_quantum = max_t(u16, 1U, quantum);
+               /* calculate next class's parameters */
+               rate  *= 7;
+               rate >>= 3;
+-              quantum1  *= 3;
+-              quantum1 >>= 1;
+-
+-              quantum2  *= 7;
+-              quantum2 >>= 3;
++              quantum  *= 7;
++              quantum >>= 3;
+       }
+       return 0;
+@@ -2343,8 +2336,7 @@ static int cake_config_diffserv8(struct
+       struct cake_sched_data *q = qdisc_priv(sch);
+       u32 mtu = psched_mtu(qdisc_dev(sch));
+       u64 rate = q->rate_bps;
+-      u32 quantum1 = 256;
+-      u32 quantum2 = 256;
++      u32 quantum = 256;
+       u32 i;
+       q->tin_cnt = 8;
+@@ -2360,18 +2352,14 @@ static int cake_config_diffserv8(struct
+               cake_set_rate(b, rate, mtu, us_to_ns(q->target),
+                             us_to_ns(q->interval));
+-              b->tin_quantum_prio = max_t(u16, 1U, quantum1);
+-              b->tin_quantum_band = max_t(u16, 1U, quantum2);
++              b->tin_quantum = max_t(u16, 1U, quantum);
+               /* calculate next class's parameters */
+               rate  *= 7;
+               rate >>= 3;
+-              quantum1  *= 3;
+-              quantum1 >>= 1;
+-
+-              quantum2  *= 7;
+-              quantum2 >>= 3;
++              quantum  *= 7;
++              quantum >>= 3;
+       }
+       return 0;
+@@ -2410,17 +2398,11 @@ static int cake_config_diffserv4(struct
+       cake_set_rate(&q->tins[3], rate >> 2, mtu,
+                     us_to_ns(q->target), us_to_ns(q->interval));
+-      /* priority weights */
+-      q->tins[0].tin_quantum_prio = quantum;
+-      q->tins[1].tin_quantum_prio = quantum >> 4;
+-      q->tins[2].tin_quantum_prio = quantum << 2;
+-      q->tins[3].tin_quantum_prio = quantum << 4;
+-
+       /* bandwidth-sharing weights */
+-      q->tins[0].tin_quantum_band = quantum;
+-      q->tins[1].tin_quantum_band = quantum >> 4;
+-      q->tins[2].tin_quantum_band = quantum >> 1;
+-      q->tins[3].tin_quantum_band = quantum >> 2;
++      q->tins[0].tin_quantum = quantum;
++      q->tins[1].tin_quantum = quantum >> 4;
++      q->tins[2].tin_quantum = quantum >> 1;
++      q->tins[3].tin_quantum = quantum >> 2;
+       return 0;
+ }
+@@ -2451,15 +2433,10 @@ static int cake_config_diffserv3(struct
+       cake_set_rate(&q->tins[2], rate >> 2, mtu,
+                     us_to_ns(q->target), us_to_ns(q->interval));
+-      /* priority weights */
+-      q->tins[0].tin_quantum_prio = quantum;
+-      q->tins[1].tin_quantum_prio = quantum >> 4;
+-      q->tins[2].tin_quantum_prio = quantum << 4;
+-
+       /* bandwidth-sharing weights */
+-      q->tins[0].tin_quantum_band = quantum;
+-      q->tins[1].tin_quantum_band = quantum >> 4;
+-      q->tins[2].tin_quantum_band = quantum >> 2;
++      q->tins[0].tin_quantum = quantum;
++      q->tins[1].tin_quantum = quantum >> 4;
++      q->tins[2].tin_quantum = quantum >> 2;
+       return 0;
+ }
diff --git a/target/linux/generic/backport-4.19/394-v5.1-sch_cake-Interpret-fwmark-parameter-as-a-bitmask.patch b/target/linux/generic/backport-4.19/394-v5.1-sch_cake-Interpret-fwmark-parameter-as-a-bitmask.patch
deleted file mode 100644 (file)
index 325f571..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-From eab2fc822af38f31fd5f4e731b5d10b94904d919 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
-Date: Thu, 14 Mar 2019 23:08:22 +0100
-Subject: [PATCH] sch_cake: Interpret fwmark parameter as a bitmask
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-We initially interpreted the fwmark parameter as a flag that simply turned
-on the feature, using the whole skb->mark field as the index into the CAKE
-tin_order array. However, it is quite common for different applications to
-use different parts of the mask field for their own purposes, each using a
-different mask.
-
-Support this use of subsets of the mark by interpreting the TCA_CAKE_FWMARK
-parameter as a bitmask to apply to the fwmark field when reading it. The
-result will be right-shifted by the number of unset lower bits of the mask
-before looking up the tin.
-
-In the original commit message we also failed to credit Felix Resch with
-originally suggesting the fwmark feature back in 2017; so the Suggested-By
-in this commit covers the whole fwmark feature.
-
-Fixes: 0b5c7efdfc6e ("sch_cake: Permit use of connmarks as tin classifiers")
-Suggested-by: Felix Resch <fuller@beif.de>
-Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
----
- net/sched/sch_cake.c | 25 ++++++++++++-------------
- 1 file changed, 12 insertions(+), 13 deletions(-)
-
---- a/net/sched/sch_cake.c
-+++ b/net/sched/sch_cake.c
-@@ -211,6 +211,9 @@ struct cake_sched_data {
-       u8              ack_filter;
-       u8              atm_mode;
-+      u32             fwmark_mask;
-+      u16             fwmark_shft;
-+
-       /* time_next = time_this + ((len * rate_ns) >> rate_shft) */
-       u16             rate_shft;
-       ktime_t         time_next_packet;
-@@ -258,8 +261,7 @@ enum {
-       CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
-       CAKE_FLAG_INGRESS          = BIT(2),
-       CAKE_FLAG_WASH             = BIT(3),
--      CAKE_FLAG_SPLIT_GSO        = BIT(4),
--      CAKE_FLAG_FWMARK           = BIT(5)
-+      CAKE_FLAG_SPLIT_GSO        = BIT(4)
- };
- /* COBALT operates the Codel and BLUE algorithms in parallel, in order to
-@@ -1554,7 +1556,7 @@ static struct cake_tin_data *cake_select
-                                            struct sk_buff *skb)
- {
-       struct cake_sched_data *q = qdisc_priv(sch);
--      u32 tin;
-+      u32 tin, mark;
-       u8 dscp;
-       /* Tin selection: Default to diffserv-based selection, allow overriding
-@@ -1562,6 +1564,7 @@ static struct cake_tin_data *cake_select
-        */
-       dscp = cake_handle_diffserv(skb,
-                                   q->rate_flags & CAKE_FLAG_WASH);
-+      mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
-       if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
-               tin = 0;
-@@ -2178,6 +2181,7 @@ static const struct nla_policy cake_poli
-       [TCA_CAKE_MPU]           = { .type = NLA_U32 },
-       [TCA_CAKE_INGRESS]       = { .type = NLA_U32 },
-       [TCA_CAKE_ACK_FILTER]    = { .type = NLA_U32 },
-+      [TCA_CAKE_FWMARK]        = { .type = NLA_U32 },
- };
- static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
-@@ -2625,10 +2629,8 @@ static int cake_change(struct Qdisc *sch
-       }
-       if (tb[TCA_CAKE_FWMARK]) {
--              if (!!nla_get_u32(tb[TCA_CAKE_FWMARK]))
--                      q->rate_flags |= CAKE_FLAG_FWMARK;
--              else
--                      q->rate_flags &= ~CAKE_FLAG_FWMARK;
-+              q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]);
-+              q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0;
-       }
-       if (q->tins) {
-@@ -2790,8 +2792,7 @@ static int cake_dump(struct Qdisc *sch,
-                       !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
-               goto nla_put_failure;
--      if (nla_put_u32(skb, TCA_CAKE_FWMARK,
--                      !!(q->rate_flags & CAKE_FLAG_FWMARK)))
-+      if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask))
-               goto nla_put_failure;
-       return nla_nest_end(skb, opts);
diff --git a/target/linux/generic/backport-4.19/394-v5.4-sch_cake-Add-missing-NLA-policy-entry-TCA_CAKE_SPLIT.patch b/target/linux/generic/backport-4.19/394-v5.4-sch_cake-Add-missing-NLA-policy-entry-TCA_CAKE_SPLIT.patch
new file mode 100644 (file)
index 0000000..32f0e1a
--- /dev/null
@@ -0,0 +1,30 @@
+From b3c424eb6a1a3c485de64619418a471dee6ce849 Mon Sep 17 00:00:00 2001
+From: Victorien Molle <victorien.molle@wifirst.fr>
+Date: Mon, 2 Dec 2019 15:11:38 +0100
+Subject: [PATCH] sch_cake: Add missing NLA policy entry TCA_CAKE_SPLIT_GSO
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This field has never been checked since introduction in mainline kernel
+
+Signed-off-by: Victorien Molle <victorien.molle@wifirst.fr>
+Signed-off-by: Florent Fourcot <florent.fourcot@wifirst.fr>
+Fixes: 2db6dc2662ba "sch_cake: Make gso-splitting configurable from userspace"
+Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+---
+ net/sched/sch_cake.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -2180,6 +2180,7 @@ static const struct nla_policy cake_poli
+       [TCA_CAKE_MPU]           = { .type = NLA_U32 },
+       [TCA_CAKE_INGRESS]       = { .type = NLA_U32 },
+       [TCA_CAKE_ACK_FILTER]    = { .type = NLA_U32 },
++      [TCA_CAKE_SPLIT_GSO]     = { .type = NLA_U32 },
+       [TCA_CAKE_FWMARK]        = { .type = NLA_U32 },
+ };
diff --git a/target/linux/generic/backport-4.19/395-v5.4-sch_cake-drop-unused-variable-tin_quantum_prio.patch b/target/linux/generic/backport-4.19/395-v5.4-sch_cake-drop-unused-variable-tin_quantum_prio.patch
deleted file mode 100644 (file)
index 33e5c54..0000000
+++ /dev/null
@@ -1,158 +0,0 @@
-From d7e1738f0a0b0573ac93cf570ba3df9dee61b68e Mon Sep 17 00:00:00 2001
-From: Kevin 'ldir' Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
-Date: Wed, 18 Dec 2019 14:05:13 +0000
-Subject: [PATCH 2/2] sch_cake: drop unused variable tin_quantum_prio
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Turns out tin_quantum_prio isn't used anymore and is a leftover from a
-previous implementation of diffserv tins.  Since the variable isn't used
-in any calculations it can be eliminated.
-
-Drop variable and places where it was set.  Rename remaining variable
-and consolidate naming of intermediate variables that set it.
-
-Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
-Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
----
- net/sched/sch_cake.c | 59 ++++++++++++++------------------------------
- 1 file changed, 18 insertions(+), 41 deletions(-)
-
---- a/net/sched/sch_cake.c
-+++ b/net/sched/sch_cake.c
-@@ -173,8 +173,7 @@ struct cake_tin_data {
-       u64     tin_rate_bps;
-       u16     tin_rate_shft;
--      u16     tin_quantum_prio;
--      u16     tin_quantum_band;
-+      u16     tin_quantum;
-       s32     tin_deficit;
-       u32     tin_backlog;
-       u32     tin_dropped;
-@@ -1916,7 +1915,7 @@ begin:
-               while (b->tin_deficit < 0 ||
-                      !(b->sparse_flow_count + b->bulk_flow_count)) {
-                       if (b->tin_deficit <= 0)
--                              b->tin_deficit += b->tin_quantum_band;
-+                              b->tin_deficit += b->tin_quantum;
-                       if (b->sparse_flow_count + b->bulk_flow_count)
-                               empty = false;
-@@ -2237,8 +2236,7 @@ static int cake_config_besteffort(struct
-       cake_set_rate(b, rate, mtu,
-                     us_to_ns(q->target), us_to_ns(q->interval));
--      b->tin_quantum_band = 65535;
--      b->tin_quantum_prio = 65535;
-+      b->tin_quantum = 65535;
-       return 0;
- }
-@@ -2249,8 +2247,7 @@ static int cake_config_precedence(struct
-       struct cake_sched_data *q = qdisc_priv(sch);
-       u32 mtu = psched_mtu(qdisc_dev(sch));
-       u64 rate = q->rate_bps;
--      u32 quantum1 = 256;
--      u32 quantum2 = 256;
-+      u32 quantum = 256;
-       u32 i;
-       q->tin_cnt = 8;
-@@ -2263,18 +2260,14 @@ static int cake_config_precedence(struct
-               cake_set_rate(b, rate, mtu, us_to_ns(q->target),
-                             us_to_ns(q->interval));
--              b->tin_quantum_prio = max_t(u16, 1U, quantum1);
--              b->tin_quantum_band = max_t(u16, 1U, quantum2);
-+              b->tin_quantum = max_t(u16, 1U, quantum);
-               /* calculate next class's parameters */
-               rate  *= 7;
-               rate >>= 3;
--              quantum1  *= 3;
--              quantum1 >>= 1;
--
--              quantum2  *= 7;
--              quantum2 >>= 3;
-+              quantum  *= 7;
-+              quantum >>= 3;
-       }
-       return 0;
-@@ -2343,8 +2336,7 @@ static int cake_config_diffserv8(struct
-       struct cake_sched_data *q = qdisc_priv(sch);
-       u32 mtu = psched_mtu(qdisc_dev(sch));
-       u64 rate = q->rate_bps;
--      u32 quantum1 = 256;
--      u32 quantum2 = 256;
-+      u32 quantum = 256;
-       u32 i;
-       q->tin_cnt = 8;
-@@ -2360,18 +2352,14 @@ static int cake_config_diffserv8(struct
-               cake_set_rate(b, rate, mtu, us_to_ns(q->target),
-                             us_to_ns(q->interval));
--              b->tin_quantum_prio = max_t(u16, 1U, quantum1);
--              b->tin_quantum_band = max_t(u16, 1U, quantum2);
-+              b->tin_quantum = max_t(u16, 1U, quantum);
-               /* calculate next class's parameters */
-               rate  *= 7;
-               rate >>= 3;
--              quantum1  *= 3;
--              quantum1 >>= 1;
--
--              quantum2  *= 7;
--              quantum2 >>= 3;
-+              quantum  *= 7;
-+              quantum >>= 3;
-       }
-       return 0;
-@@ -2410,17 +2398,11 @@ static int cake_config_diffserv4(struct
-       cake_set_rate(&q->tins[3], rate >> 2, mtu,
-                     us_to_ns(q->target), us_to_ns(q->interval));
--      /* priority weights */
--      q->tins[0].tin_quantum_prio = quantum;
--      q->tins[1].tin_quantum_prio = quantum >> 4;
--      q->tins[2].tin_quantum_prio = quantum << 2;
--      q->tins[3].tin_quantum_prio = quantum << 4;
--
-       /* bandwidth-sharing weights */
--      q->tins[0].tin_quantum_band = quantum;
--      q->tins[1].tin_quantum_band = quantum >> 4;
--      q->tins[2].tin_quantum_band = quantum >> 1;
--      q->tins[3].tin_quantum_band = quantum >> 2;
-+      q->tins[0].tin_quantum = quantum;
-+      q->tins[1].tin_quantum = quantum >> 4;
-+      q->tins[2].tin_quantum = quantum >> 1;
-+      q->tins[3].tin_quantum = quantum >> 2;
-       return 0;
- }
-@@ -2451,15 +2433,10 @@ static int cake_config_diffserv3(struct
-       cake_set_rate(&q->tins[2], rate >> 2, mtu,
-                     us_to_ns(q->target), us_to_ns(q->interval));
--      /* priority weights */
--      q->tins[0].tin_quantum_prio = quantum;
--      q->tins[1].tin_quantum_prio = quantum >> 4;
--      q->tins[2].tin_quantum_prio = quantum << 4;
--
-       /* bandwidth-sharing weights */
--      q->tins[0].tin_quantum_band = quantum;
--      q->tins[1].tin_quantum_band = quantum >> 4;
--      q->tins[2].tin_quantum_band = quantum >> 2;
-+      q->tins[0].tin_quantum = quantum;
-+      q->tins[1].tin_quantum = quantum >> 4;
-+      q->tins[2].tin_quantum = quantum >> 2;
-       return 0;
- }
diff --git a/target/linux/generic/backport-4.19/396-v5.4-sch_cake-Add-missing-NLA-policy-entry-TCA_CAKE_SPLIT.patch b/target/linux/generic/backport-4.19/396-v5.4-sch_cake-Add-missing-NLA-policy-entry-TCA_CAKE_SPLIT.patch
deleted file mode 100644 (file)
index 32f0e1a..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-From b3c424eb6a1a3c485de64619418a471dee6ce849 Mon Sep 17 00:00:00 2001
-From: Victorien Molle <victorien.molle@wifirst.fr>
-Date: Mon, 2 Dec 2019 15:11:38 +0100
-Subject: [PATCH] sch_cake: Add missing NLA policy entry TCA_CAKE_SPLIT_GSO
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This field has never been checked since introduction in mainline kernel
-
-Signed-off-by: Victorien Molle <victorien.molle@wifirst.fr>
-Signed-off-by: Florent Fourcot <florent.fourcot@wifirst.fr>
-Fixes: 2db6dc2662ba "sch_cake: Make gso-splitting configurable from userspace"
-Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
----
- net/sched/sch_cake.c | 1 +
- 1 file changed, 1 insertion(+)
-
---- a/net/sched/sch_cake.c
-+++ b/net/sched/sch_cake.c
-@@ -2180,6 +2180,7 @@ static const struct nla_policy cake_poli
-       [TCA_CAKE_MPU]           = { .type = NLA_U32 },
-       [TCA_CAKE_INGRESS]       = { .type = NLA_U32 },
-       [TCA_CAKE_ACK_FILTER]    = { .type = NLA_U32 },
-+      [TCA_CAKE_SPLIT_GSO]     = { .type = NLA_U32 },
-       [TCA_CAKE_FWMARK]        = { .type = NLA_U32 },
- };
diff --git a/target/linux/generic/backport-5.4/395-v5.8-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch b/target/linux/generic/backport-5.4/395-v5.8-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch
new file mode 100644 (file)
index 0000000..7b3396c
--- /dev/null
@@ -0,0 +1,170 @@
+From b0c19ed6088ab41dd2a727b60594b7297c15d6ce Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
+Date: Fri, 29 May 2020 14:43:44 +0200
+Subject: [PATCH] sch_cake: Take advantage of skb->hash where appropriate
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+While the other fq-based qdiscs take advantage of skb->hash and doesn't
+recompute it if it is already set, sch_cake does not.
+
+This was a deliberate choice because sch_cake hashes various parts of the
+packet header to support its advanced flow isolation modes. However,
+foregoing the use of skb->hash entirely loses a few important benefits:
+
+- When skb->hash is set by hardware, a few CPU cycles can be saved by not
+  hashing again in software.
+
+- Tunnel encapsulations will generally preserve the value of skb->hash from
+  before the encapsulation, which allows flow-based qdiscs to distinguish
+  between flows even though the outer packet header no longer has flow
+  information.
+
+It turns out that we can preserve these desirable properties in many cases,
+while still supporting the advanced flow isolation properties of sch_cake.
+This patch does so by reusing the skb->hash value as the flow_hash part of
+the hashing procedure in cake_hash() only in the following conditions:
+
+- If the skb->hash is marked as covering the flow headers (skb->l4_hash is
+  set)
+
+AND
+
+- NAT header rewriting is either disabled, or did not change any values
+  used for hashing. The latter is important to match local-origin packets
+  such as those of a tunnel endpoint.
+
+The immediate motivation for fixing this was the recent patch to WireGuard
+to preserve the skb->hash on encapsulation. As such, this is also what I
+tested against; with this patch, added latency under load for competing
+flows drops from ~8 ms to sub-1ms on an RRUL test over a WireGuard tunnel
+going through a virtual link shaped to 1Gbps using sch_cake. This matches
+the results we saw with a similar setup using sch_fq_codel when testing the
+WireGuard patch.
+
+Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
+Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
+---
+ net/sched/sch_cake.c | 65 ++++++++++++++++++++++++++++++++++----------
+ 1 file changed, 51 insertions(+), 14 deletions(-)
+
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -585,26 +585,48 @@ static bool cobalt_should_drop(struct co
+       return drop;
+ }
+-static void cake_update_flowkeys(struct flow_keys *keys,
++static bool cake_update_flowkeys(struct flow_keys *keys,
+                                const struct sk_buff *skb)
+ {
+ #if IS_ENABLED(CONFIG_NF_CONNTRACK)
+       struct nf_conntrack_tuple tuple = {};
+-      bool rev = !skb->_nfct;
++      bool rev = !skb->_nfct, upd = false;
++      __be32 ip;
+       if (tc_skb_protocol(skb) != htons(ETH_P_IP))
+-              return;
++              return false;
+       if (!nf_ct_get_tuple_skb(&tuple, skb))
+-              return;
++              return false;
+-      keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
+-      keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
++      ip = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
++      if (ip != keys->addrs.v4addrs.src) {
++              keys->addrs.v4addrs.src = ip;
++              upd = true;
++      }
++      ip = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
++      if (ip != keys->addrs.v4addrs.dst) {
++              keys->addrs.v4addrs.dst = ip;
++              upd = true;
++      }
+       if (keys->ports.ports) {
+-              keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all;
+-              keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all;
++              __be16 port;
++
++              port = rev ? tuple.dst.u.all : tuple.src.u.all;
++              if (port != keys->ports.src) {
++                      keys->ports.src = port;
++                      upd = true;
++              }
++              port = rev ? tuple.src.u.all : tuple.dst.u.all;
++              if (port != keys->ports.dst) {
++                      port = keys->ports.dst;
++                      upd = true;
++              }
+       }
++      return upd;
++#else
++      return false;
+ #endif
+ }
+@@ -625,23 +647,36 @@ static bool cake_ddst(int flow_mode)
+ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
+                    int flow_mode, u16 flow_override, u16 host_override)
+ {
++      bool hash_flows = (!flow_override && !!(flow_mode & CAKE_FLOW_FLOWS));
++      bool hash_hosts = (!host_override && !!(flow_mode & CAKE_FLOW_HOSTS));
++      bool nat_enabled = !!(flow_mode & CAKE_FLOW_NAT_FLAG);
+       u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0;
+       u16 reduced_hash, srchost_idx, dsthost_idx;
+       struct flow_keys keys, host_keys;
++      bool use_skbhash = skb->l4_hash;
+       if (unlikely(flow_mode == CAKE_FLOW_NONE))
+               return 0;
+-      /* If both overrides are set we can skip packet dissection entirely */
+-      if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) &&
+-          (host_override || !(flow_mode & CAKE_FLOW_HOSTS)))
++      /* If both overrides are set, or we can use the SKB hash and nat mode is
++       * disabled, we can skip packet dissection entirely. If nat mode is
++       * enabled there's another check below after doing the conntrack lookup.
++       */
++      if ((!hash_flows || (use_skbhash && !nat_enabled)) && !hash_hosts)
+               goto skip_hash;
+       skb_flow_dissect_flow_keys(skb, &keys,
+                                  FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
+-      if (flow_mode & CAKE_FLOW_NAT_FLAG)
+-              cake_update_flowkeys(&keys, skb);
++      /* Don't use the SKB hash if we change the lookup keys from conntrack */
++      if (nat_enabled && cake_update_flowkeys(&keys, skb))
++              use_skbhash = false;
++
++      /* If we can still use the SKB hash and don't need the host hash, we can
++       * skip the rest of the hashing procedure
++       */
++      if (use_skbhash && !hash_hosts)
++              goto skip_hash;
+       /* flow_hash_from_keys() sorts the addresses by value, so we have
+        * to preserve their order in a separate data structure to treat
+@@ -680,12 +715,14 @@ static u32 cake_hash(struct cake_tin_dat
+       /* This *must* be after the above switch, since as a
+        * side-effect it sorts the src and dst addresses.
+        */
+-      if (flow_mode & CAKE_FLOW_FLOWS)
++      if (hash_flows && !use_skbhash)
+               flow_hash = flow_hash_from_keys(&keys);
+ skip_hash:
+       if (flow_override)
+               flow_hash = flow_override - 1;
++      else if (use_skbhash)
++              flow_hash = skb->hash;
+       if (host_override) {
+               dsthost_hash = host_override - 1;
+               srchost_hash = host_override - 1;
diff --git a/target/linux/generic/backport-5.4/398-v5.9-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch b/target/linux/generic/backport-5.4/398-v5.9-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch
deleted file mode 100644 (file)
index 7b3396c..0000000
+++ /dev/null
@@ -1,170 +0,0 @@
-From b0c19ed6088ab41dd2a727b60594b7297c15d6ce Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
-Date: Fri, 29 May 2020 14:43:44 +0200
-Subject: [PATCH] sch_cake: Take advantage of skb->hash where appropriate
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-While the other fq-based qdiscs take advantage of skb->hash and doesn't
-recompute it if it is already set, sch_cake does not.
-
-This was a deliberate choice because sch_cake hashes various parts of the
-packet header to support its advanced flow isolation modes. However,
-foregoing the use of skb->hash entirely loses a few important benefits:
-
-- When skb->hash is set by hardware, a few CPU cycles can be saved by not
-  hashing again in software.
-
-- Tunnel encapsulations will generally preserve the value of skb->hash from
-  before the encapsulation, which allows flow-based qdiscs to distinguish
-  between flows even though the outer packet header no longer has flow
-  information.
-
-It turns out that we can preserve these desirable properties in many cases,
-while still supporting the advanced flow isolation properties of sch_cake.
-This patch does so by reusing the skb->hash value as the flow_hash part of
-the hashing procedure in cake_hash() only in the following conditions:
-
-- If the skb->hash is marked as covering the flow headers (skb->l4_hash is
-  set)
-
-AND
-
-- NAT header rewriting is either disabled, or did not change any values
-  used for hashing. The latter is important to match local-origin packets
-  such as those of a tunnel endpoint.
-
-The immediate motivation for fixing this was the recent patch to WireGuard
-to preserve the skb->hash on encapsulation. As such, this is also what I
-tested against; with this patch, added latency under load for competing
-flows drops from ~8 ms to sub-1ms on an RRUL test over a WireGuard tunnel
-going through a virtual link shaped to 1Gbps using sch_cake. This matches
-the results we saw with a similar setup using sch_fq_codel when testing the
-WireGuard patch.
-
-Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
-Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
----
- net/sched/sch_cake.c | 65 ++++++++++++++++++++++++++++++++++----------
- 1 file changed, 51 insertions(+), 14 deletions(-)
-
---- a/net/sched/sch_cake.c
-+++ b/net/sched/sch_cake.c
-@@ -585,26 +585,48 @@ static bool cobalt_should_drop(struct co
-       return drop;
- }
--static void cake_update_flowkeys(struct flow_keys *keys,
-+static bool cake_update_flowkeys(struct flow_keys *keys,
-                                const struct sk_buff *skb)
- {
- #if IS_ENABLED(CONFIG_NF_CONNTRACK)
-       struct nf_conntrack_tuple tuple = {};
--      bool rev = !skb->_nfct;
-+      bool rev = !skb->_nfct, upd = false;
-+      __be32 ip;
-       if (tc_skb_protocol(skb) != htons(ETH_P_IP))
--              return;
-+              return false;
-       if (!nf_ct_get_tuple_skb(&tuple, skb))
--              return;
-+              return false;
--      keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
--      keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
-+      ip = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
-+      if (ip != keys->addrs.v4addrs.src) {
-+              keys->addrs.v4addrs.src = ip;
-+              upd = true;
-+      }
-+      ip = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
-+      if (ip != keys->addrs.v4addrs.dst) {
-+              keys->addrs.v4addrs.dst = ip;
-+              upd = true;
-+      }
-       if (keys->ports.ports) {
--              keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all;
--              keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all;
-+              __be16 port;
-+
-+              port = rev ? tuple.dst.u.all : tuple.src.u.all;
-+              if (port != keys->ports.src) {
-+                      keys->ports.src = port;
-+                      upd = true;
-+              }
-+              port = rev ? tuple.src.u.all : tuple.dst.u.all;
-+              if (port != keys->ports.dst) {
-+                      port = keys->ports.dst;
-+                      upd = true;
-+              }
-       }
-+      return upd;
-+#else
-+      return false;
- #endif
- }
-@@ -625,23 +647,36 @@ static bool cake_ddst(int flow_mode)
- static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
-                    int flow_mode, u16 flow_override, u16 host_override)
- {
-+      bool hash_flows = (!flow_override && !!(flow_mode & CAKE_FLOW_FLOWS));
-+      bool hash_hosts = (!host_override && !!(flow_mode & CAKE_FLOW_HOSTS));
-+      bool nat_enabled = !!(flow_mode & CAKE_FLOW_NAT_FLAG);
-       u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0;
-       u16 reduced_hash, srchost_idx, dsthost_idx;
-       struct flow_keys keys, host_keys;
-+      bool use_skbhash = skb->l4_hash;
-       if (unlikely(flow_mode == CAKE_FLOW_NONE))
-               return 0;
--      /* If both overrides are set we can skip packet dissection entirely */
--      if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) &&
--          (host_override || !(flow_mode & CAKE_FLOW_HOSTS)))
-+      /* If both overrides are set, or we can use the SKB hash and nat mode is
-+       * disabled, we can skip packet dissection entirely. If nat mode is
-+       * enabled there's another check below after doing the conntrack lookup.
-+       */
-+      if ((!hash_flows || (use_skbhash && !nat_enabled)) && !hash_hosts)
-               goto skip_hash;
-       skb_flow_dissect_flow_keys(skb, &keys,
-                                  FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
--      if (flow_mode & CAKE_FLOW_NAT_FLAG)
--              cake_update_flowkeys(&keys, skb);
-+      /* Don't use the SKB hash if we change the lookup keys from conntrack */
-+      if (nat_enabled && cake_update_flowkeys(&keys, skb))
-+              use_skbhash = false;
-+
-+      /* If we can still use the SKB hash and don't need the host hash, we can
-+       * skip the rest of the hashing procedure
-+       */
-+      if (use_skbhash && !hash_hosts)
-+              goto skip_hash;
-       /* flow_hash_from_keys() sorts the addresses by value, so we have
-        * to preserve their order in a separate data structure to treat
-@@ -680,12 +715,14 @@ static u32 cake_hash(struct cake_tin_dat
-       /* This *must* be after the above switch, since as a
-        * side-effect it sorts the src and dst addresses.
-        */
--      if (flow_mode & CAKE_FLOW_FLOWS)
-+      if (hash_flows && !use_skbhash)
-               flow_hash = flow_hash_from_keys(&keys);
- skip_hash:
-       if (flow_override)
-               flow_hash = flow_override - 1;
-+      else if (use_skbhash)
-+              flow_hash = skb->hash;
-       if (host_override) {
-               dsthost_hash = host_override - 1;
-               srchost_hash = host_override - 1;