kernel: cake: backport upstream tweaks & fixes
[openwrt/openwrt.git] / target / linux / generic / backport-4.19 / 396-5.8-sch_cake-don-t-try-to-reallocate-or-unshare-skb-unco.patch
1 From 9208d2863ac689a563b92f2161d8d1e7127d0add Mon Sep 17 00:00:00 2001
2 From: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
3 Date: Thu, 25 Jun 2020 22:12:07 +0200
4 Subject: [PATCH] sch_cake: don't try to reallocate or unshare skb
5 unconditionally
6 MIME-Version: 1.0
7 Content-Type: text/plain; charset=UTF-8
8 Content-Transfer-Encoding: 8bit
9
10 cake_handle_diffserv() tries to linearize mac and network header parts of
11 skb and to make it writable unconditionally. In some cases it leads to full
12 skb reallocation, which reduces throughput and increases CPU load. Some
13 measurements of IPv4 forward + NAPT on MIPS router with 580 MHz single-core
14 CPU was conducted. It appears that on kernel 4.9 skb_try_make_writable()
15 reallocates skb, if skb was allocated in ethernet driver via so-called
16 'build skb' method from page cache (it was discovered by strange increase
17 of kmalloc-2048 slab at first).
18
19 Obtain DSCP value via read-only skb_header_pointer() call, and leave
20 linearization only for DSCP bleaching or ECN CE setting. And, as an
21 additional optimisation, skip diffserv parsing entirely if it is not needed
22 by the current configuration.
23
24 Fixes: c87b4ecdbe8d ("sch_cake: Make sure we can write the IP header before changing DSCP bits")
25 Signed-off-by: Ilya Ponetayev <i.ponetaev@ndmsystems.com>
26 [ fix a few style issues, reflow commit message ]
27 Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
28 Signed-off-by: David S. Miller <davem@davemloft.net>
29 Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
30 ---
31 net/sched/sch_cake.c | 41 ++++++++++++++++++++++++++++++-----------
32 1 file changed, 30 insertions(+), 11 deletions(-)
33
34 --- a/net/sched/sch_cake.c
35 +++ b/net/sched/sch_cake.c
36 @@ -1553,30 +1553,49 @@ static unsigned int cake_drop(struct Qdi
37
38 static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
39 {
40 - int wlen = skb_network_offset(skb);
41 + const int offset = skb_network_offset(skb);
42 + u16 *buf, buf_;
43 u8 dscp;
44
45 switch (tc_skb_protocol(skb)) {
46 case htons(ETH_P_IP):
47 - wlen += sizeof(struct iphdr);
48 - if (!pskb_may_pull(skb, wlen) ||
49 - skb_try_make_writable(skb, wlen))
50 + buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
51 + if (unlikely(!buf))
52 return 0;
53
54 - dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
55 - if (wash && dscp)
56 + /* ToS is in the second byte of iphdr */
57 + dscp = ipv4_get_dsfield((struct iphdr *)buf) >> 2;
58 +
59 + if (wash && dscp) {
60 + const int wlen = offset + sizeof(struct iphdr);
61 +
62 + if (!pskb_may_pull(skb, wlen) ||
63 + skb_try_make_writable(skb, wlen))
64 + return 0;
65 +
66 ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
67 + }
68 +
69 return dscp;
70
71 case htons(ETH_P_IPV6):
72 - wlen += sizeof(struct ipv6hdr);
73 - if (!pskb_may_pull(skb, wlen) ||
74 - skb_try_make_writable(skb, wlen))
75 + buf = skb_header_pointer(skb, offset, sizeof(buf_), &buf_);
76 + if (unlikely(!buf))
77 return 0;
78
79 - dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
80 - if (wash && dscp)
81 + /* Traffic class is in the first and second bytes of ipv6hdr */
82 + dscp = ipv6_get_dsfield((struct ipv6hdr *)buf) >> 2;
83 +
84 + if (wash && dscp) {
85 + const int wlen = offset + sizeof(struct ipv6hdr);
86 +
87 + if (!pskb_may_pull(skb, wlen) ||
88 + skb_try_make_writable(skb, wlen))
89 + return 0;
90 +
91 ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
92 + }
93 +
94 return dscp;
95
96 case htons(ETH_P_ARP):