kernel: cake: skb hash backport to 419
[openwrt/staging/dedeckeh.git] / target / linux / generic / backport-4.19 / 395-v5.8-net-sch_cake-Take-advantage-of-skb-hash-where-appropriate.patch
1 From b0c19ed6088ab41dd2a727b60594b7297c15d6ce Mon Sep 17 00:00:00 2001
2 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
3 Date: Fri, 29 May 2020 14:43:44 +0200
4 Subject: [PATCH] sch_cake: Take advantage of skb->hash where appropriate
5 MIME-Version: 1.0
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
8
9 While the other fq-based qdiscs take advantage of skb->hash and doesn't
10 recompute it if it is already set, sch_cake does not.
11
12 This was a deliberate choice because sch_cake hashes various parts of the
13 packet header to support its advanced flow isolation modes. However,
14 foregoing the use of skb->hash entirely loses a few important benefits:
15
16 - When skb->hash is set by hardware, a few CPU cycles can be saved by not
17 hashing again in software.
18
19 - Tunnel encapsulations will generally preserve the value of skb->hash from
20 before the encapsulation, which allows flow-based qdiscs to distinguish
21 between flows even though the outer packet header no longer has flow
22 information.
23
24 It turns out that we can preserve these desirable properties in many cases,
25 while still supporting the advanced flow isolation properties of sch_cake.
26 This patch does so by reusing the skb->hash value as the flow_hash part of
27 the hashing procedure in cake_hash() only in the following conditions:
28
29 - If the skb->hash is marked as covering the flow headers (skb->l4_hash is
30 set)
31
32 AND
33
34 - NAT header rewriting is either disabled, or did not change any values
35 used for hashing. The latter is important to match local-origin packets
36 such as those of a tunnel endpoint.
37
38 The immediate motivation for fixing this was the recent patch to WireGuard
39 to preserve the skb->hash on encapsulation. As such, this is also what I
40 tested against; with this patch, added latency under load for competing
41 flows drops from ~8 ms to sub-1ms on an RRUL test over a WireGuard tunnel
42 going through a virtual link shaped to 1Gbps using sch_cake. This matches
43 the results we saw with a similar setup using sch_fq_codel when testing the
44 WireGuard patch.
45
46 Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
47 Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
48 Signed-off-by: David S. Miller <davem@davemloft.net>
49 Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
50 ---
51 net/sched/sch_cake.c | 65 ++++++++++++++++++++++++++++++++++----------
52 1 file changed, 51 insertions(+), 14 deletions(-)
53
54 --- a/net/sched/sch_cake.c
55 +++ b/net/sched/sch_cake.c
56 @@ -585,26 +585,48 @@ static bool cobalt_should_drop(struct co
57 return drop;
58 }
59
60 -static void cake_update_flowkeys(struct flow_keys *keys,
61 +static bool cake_update_flowkeys(struct flow_keys *keys,
62 const struct sk_buff *skb)
63 {
64 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
65 struct nf_conntrack_tuple tuple = {};
66 - bool rev = !skb->_nfct;
67 + bool rev = !skb->_nfct, upd = false;
68 + __be32 ip;
69
70 if (tc_skb_protocol(skb) != htons(ETH_P_IP))
71 - return;
72 + return false;
73
74 if (!nf_ct_get_tuple_skb(&tuple, skb))
75 - return;
76 + return false;
77
78 - keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
79 - keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
80 + ip = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
81 + if (ip != keys->addrs.v4addrs.src) {
82 + keys->addrs.v4addrs.src = ip;
83 + upd = true;
84 + }
85 + ip = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
86 + if (ip != keys->addrs.v4addrs.dst) {
87 + keys->addrs.v4addrs.dst = ip;
88 + upd = true;
89 + }
90
91 if (keys->ports.ports) {
92 - keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all;
93 - keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all;
94 + __be16 port;
95 +
96 + port = rev ? tuple.dst.u.all : tuple.src.u.all;
97 + if (port != keys->ports.src) {
98 + keys->ports.src = port;
99 + upd = true;
100 + }
101 + port = rev ? tuple.src.u.all : tuple.dst.u.all;
102 + if (port != keys->ports.dst) {
103 + port = keys->ports.dst;
104 + upd = true;
105 + }
106 }
107 + return upd;
108 +#else
109 + return false;
110 #endif
111 }
112
113 @@ -625,23 +647,36 @@ static bool cake_ddst(int flow_mode)
114 static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
115 int flow_mode, u16 flow_override, u16 host_override)
116 {
117 + bool hash_flows = (!flow_override && !!(flow_mode & CAKE_FLOW_FLOWS));
118 + bool hash_hosts = (!host_override && !!(flow_mode & CAKE_FLOW_HOSTS));
119 + bool nat_enabled = !!(flow_mode & CAKE_FLOW_NAT_FLAG);
120 u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0;
121 u16 reduced_hash, srchost_idx, dsthost_idx;
122 struct flow_keys keys, host_keys;
123 + bool use_skbhash = skb->l4_hash;
124
125 if (unlikely(flow_mode == CAKE_FLOW_NONE))
126 return 0;
127
128 - /* If both overrides are set we can skip packet dissection entirely */
129 - if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) &&
130 - (host_override || !(flow_mode & CAKE_FLOW_HOSTS)))
131 + /* If both overrides are set, or we can use the SKB hash and nat mode is
132 + * disabled, we can skip packet dissection entirely. If nat mode is
133 + * enabled there's another check below after doing the conntrack lookup.
134 + */
135 + if ((!hash_flows || (use_skbhash && !nat_enabled)) && !hash_hosts)
136 goto skip_hash;
137
138 skb_flow_dissect_flow_keys(skb, &keys,
139 FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
140
141 - if (flow_mode & CAKE_FLOW_NAT_FLAG)
142 - cake_update_flowkeys(&keys, skb);
143 + /* Don't use the SKB hash if we change the lookup keys from conntrack */
144 + if (nat_enabled && cake_update_flowkeys(&keys, skb))
145 + use_skbhash = false;
146 +
147 + /* If we can still use the SKB hash and don't need the host hash, we can
148 + * skip the rest of the hashing procedure
149 + */
150 + if (use_skbhash && !hash_hosts)
151 + goto skip_hash;
152
153 /* flow_hash_from_keys() sorts the addresses by value, so we have
154 * to preserve their order in a separate data structure to treat
155 @@ -680,12 +715,14 @@ static u32 cake_hash(struct cake_tin_dat
156 /* This *must* be after the above switch, since as a
157 * side-effect it sorts the src and dst addresses.
158 */
159 - if (flow_mode & CAKE_FLOW_FLOWS)
160 + if (hash_flows && !use_skbhash)
161 flow_hash = flow_hash_from_keys(&keys);
162
163 skip_hash:
164 if (flow_override)
165 flow_hash = flow_override - 1;
166 + else if (use_skbhash)
167 + flow_hash = skb->hash;
168 if (host_override) {
169 dsthost_hash = host_override - 1;
170 srchost_hash = host_override - 1;