b2b014832c688004daa5b0d9302687442086bacc
[openwrt/staging/stintel.git] / target / linux / lantiq / patches-5.15 / 0715-v5.17-net-lantiq_xrx200-convert-to-build_skb.patch
1 From e015593573b3e3f74bd8a63c05fa92902194a354 Mon Sep 17 00:00:00 2001
2 From: Aleksander Jan Bajkowski <olek2@wp.pl>
3 Date: Tue, 4 Jan 2022 16:11:44 +0100
4 Subject: [PATCH 715/715] net: lantiq_xrx200: convert to build_skb
5
6 We can increase the efficiency of rx path by using buffers to receive
7 packets then build SKBs around them just before passing into the network
8 stack. In contrast, preallocating SKBs too early reduces CPU cache
9 efficiency.
10
11 NAT Performance results on BT Home Hub 5A (kernel 5.10.89, mtu 1500):
12
13 Down Up
14 Before 577 Mbps 648 Mbps
15 After 624 Mbps 695 Mbps
16
17 Signed-off-by: Aleksander Jan Bajkowski <olek2@wp.pl>
18 Signed-off-by: Jakub Kicinski <kuba@kernel.org>
19 ---
20 drivers/net/ethernet/lantiq_xrx200.c | 56 ++++++++++++++++++----------
21 1 file changed, 36 insertions(+), 20 deletions(-)
22
23 --- a/drivers/net/ethernet/lantiq_xrx200.c
24 +++ b/drivers/net/ethernet/lantiq_xrx200.c
25 @@ -63,7 +63,11 @@ struct xrx200_chan {
26
27 struct napi_struct napi;
28 struct ltq_dma_channel dma;
29 - struct sk_buff *skb[LTQ_DESC_NUM];
30 +
31 + union {
32 + struct sk_buff *skb[LTQ_DESC_NUM];
33 + void *rx_buff[LTQ_DESC_NUM];
34 + };
35
36 struct sk_buff *skb_head;
37 struct sk_buff *skb_tail;
38 @@ -78,6 +82,7 @@ struct xrx200_priv {
39 struct xrx200_chan chan_rx;
40
41 u16 rx_buf_size;
42 + u16 rx_skb_size;
43
44 struct net_device *net_dev;
45 struct device *dev;
46 @@ -115,6 +120,12 @@ static int xrx200_buffer_size(int mtu)
47 return round_up(xrx200_max_frame_len(mtu), 4 * XRX200_DMA_BURST_LEN);
48 }
49
50 +static int xrx200_skb_size(u16 buf_size)
51 +{
52 + return SKB_DATA_ALIGN(buf_size + NET_SKB_PAD + NET_IP_ALIGN) +
53 + SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
54 +}
55 +
56 /* drop all the packets from the DMA ring */
57 static void xrx200_flush_dma(struct xrx200_chan *ch)
58 {
59 @@ -173,30 +184,29 @@ static int xrx200_close(struct net_devic
60 return 0;
61 }
62
63 -static int xrx200_alloc_skb(struct xrx200_chan *ch)
64 +static int xrx200_alloc_buf(struct xrx200_chan *ch, void *(*alloc)(unsigned int size))
65 {
66 - struct sk_buff *skb = ch->skb[ch->dma.desc];
67 + void *buf = ch->rx_buff[ch->dma.desc];
68 struct xrx200_priv *priv = ch->priv;
69 dma_addr_t mapping;
70 int ret = 0;
71
72 - ch->skb[ch->dma.desc] = netdev_alloc_skb_ip_align(priv->net_dev,
73 - priv->rx_buf_size);
74 - if (!ch->skb[ch->dma.desc]) {
75 + ch->rx_buff[ch->dma.desc] = alloc(priv->rx_skb_size);
76 + if (!ch->rx_buff[ch->dma.desc]) {
77 ret = -ENOMEM;
78 goto skip;
79 }
80
81 - mapping = dma_map_single(priv->dev, ch->skb[ch->dma.desc]->data,
82 + mapping = dma_map_single(priv->dev, ch->rx_buff[ch->dma.desc],
83 priv->rx_buf_size, DMA_FROM_DEVICE);
84 if (unlikely(dma_mapping_error(priv->dev, mapping))) {
85 - dev_kfree_skb_any(ch->skb[ch->dma.desc]);
86 - ch->skb[ch->dma.desc] = skb;
87 + skb_free_frag(ch->rx_buff[ch->dma.desc]);
88 + ch->rx_buff[ch->dma.desc] = buf;
89 ret = -ENOMEM;
90 goto skip;
91 }
92
93 - ch->dma.desc_base[ch->dma.desc].addr = mapping;
94 + ch->dma.desc_base[ch->dma.desc].addr = mapping + NET_SKB_PAD + NET_IP_ALIGN;
95 /* Make sure the address is written before we give it to HW */
96 wmb();
97 skip:
98 @@ -210,13 +220,14 @@ static int xrx200_hw_receive(struct xrx2
99 {
100 struct xrx200_priv *priv = ch->priv;
101 struct ltq_dma_desc *desc = &ch->dma.desc_base[ch->dma.desc];
102 - struct sk_buff *skb = ch->skb[ch->dma.desc];
103 + void *buf = ch->rx_buff[ch->dma.desc];
104 u32 ctl = desc->ctl;
105 int len = (ctl & LTQ_DMA_SIZE_MASK);
106 struct net_device *net_dev = priv->net_dev;
107 + struct sk_buff *skb;
108 int ret;
109
110 - ret = xrx200_alloc_skb(ch);
111 + ret = xrx200_alloc_buf(ch, napi_alloc_frag);
112
113 ch->dma.desc++;
114 ch->dma.desc %= LTQ_DESC_NUM;
115 @@ -227,19 +238,21 @@ static int xrx200_hw_receive(struct xrx2
116 return ret;
117 }
118
119 + skb = build_skb(buf, priv->rx_skb_size);
120 + skb_reserve(skb, NET_SKB_PAD);
121 skb_put(skb, len);
122
123 /* add buffers to skb via skb->frag_list */
124 if (ctl & LTQ_DMA_SOP) {
125 ch->skb_head = skb;
126 ch->skb_tail = skb;
127 + skb_reserve(skb, NET_IP_ALIGN);
128 } else if (ch->skb_head) {
129 if (ch->skb_head == ch->skb_tail)
130 skb_shinfo(ch->skb_tail)->frag_list = skb;
131 else
132 ch->skb_tail->next = skb;
133 ch->skb_tail = skb;
134 - skb_reserve(ch->skb_tail, -NET_IP_ALIGN);
135 ch->skb_head->len += skb->len;
136 ch->skb_head->data_len += skb->len;
137 ch->skb_head->truesize += skb->truesize;
138 @@ -395,12 +408,13 @@ xrx200_change_mtu(struct net_device *net
139 struct xrx200_chan *ch_rx = &priv->chan_rx;
140 int old_mtu = net_dev->mtu;
141 bool running = false;
142 - struct sk_buff *skb;
143 + void *buff;
144 int curr_desc;
145 int ret = 0;
146
147 net_dev->mtu = new_mtu;
148 priv->rx_buf_size = xrx200_buffer_size(new_mtu);
149 + priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size);
150
151 if (new_mtu <= old_mtu)
152 return ret;
153 @@ -416,14 +430,15 @@ xrx200_change_mtu(struct net_device *net
154
155 for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM;
156 ch_rx->dma.desc++) {
157 - skb = ch_rx->skb[ch_rx->dma.desc];
158 - ret = xrx200_alloc_skb(ch_rx);
159 + buff = ch_rx->rx_buff[ch_rx->dma.desc];
160 + ret = xrx200_alloc_buf(ch_rx, netdev_alloc_frag);
161 if (ret) {
162 net_dev->mtu = old_mtu;
163 priv->rx_buf_size = xrx200_buffer_size(old_mtu);
164 + priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size);
165 break;
166 }
167 - dev_kfree_skb_any(skb);
168 + skb_free_frag(buff);
169 }
170
171 ch_rx->dma.desc = curr_desc;
172 @@ -476,7 +491,7 @@ static int xrx200_dma_init(struct xrx200
173 ltq_dma_alloc_rx(&ch_rx->dma);
174 for (ch_rx->dma.desc = 0; ch_rx->dma.desc < LTQ_DESC_NUM;
175 ch_rx->dma.desc++) {
176 - ret = xrx200_alloc_skb(ch_rx);
177 + ret = xrx200_alloc_buf(ch_rx, netdev_alloc_frag);
178 if (ret)
179 goto rx_free;
180 }
181 @@ -511,7 +526,7 @@ rx_ring_free:
182 /* free the allocated RX ring */
183 for (i = 0; i < LTQ_DESC_NUM; i++) {
184 if (priv->chan_rx.skb[i])
185 - dev_kfree_skb_any(priv->chan_rx.skb[i]);
186 + skb_free_frag(priv->chan_rx.rx_buff[i]);
187 }
188
189 rx_free:
190 @@ -528,7 +543,7 @@ static void xrx200_hw_cleanup(struct xrx
191
192 /* free the allocated RX ring */
193 for (i = 0; i < LTQ_DESC_NUM; i++)
194 - dev_kfree_skb_any(priv->chan_rx.skb[i]);
195 + skb_free_frag(priv->chan_rx.rx_buff[i]);
196 }
197
198 static int xrx200_probe(struct platform_device *pdev)
199 @@ -554,6 +569,7 @@ static int xrx200_probe(struct platform_
200 net_dev->min_mtu = ETH_ZLEN;
201 net_dev->max_mtu = XRX200_DMA_DATA_LEN - xrx200_max_frame_len(0);
202 priv->rx_buf_size = xrx200_buffer_size(ETH_DATA_LEN);
203 + priv->rx_skb_size = xrx200_skb_size(priv->rx_buf_size);
204
205 /* load the memory ranges */
206 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);