mediatek: Add support for Xiaomi Redmi Router AX6S
[openwrt/openwrt.git] / target / linux / bcm63xx / patches-5.4 / 045-v5.12-bcm63xx_enet-convert-to-build_skb.patch
1 From d27de0ef5ef995df2cc5f5c006c0efcf0a62b6af Mon Sep 17 00:00:00 2001
2 From: Sieng Piaw Liew <liew.s.piaw@gmail.com>
3 Date: Wed, 6 Jan 2021 22:42:07 +0800
4 Subject: [PATCH 6/7] bcm63xx_enet: convert to build_skb
5
6 We can increase the efficiency of rx path by using buffers to receive
7 packets then build SKBs around them just before passing into the network
8 stack. In contrast, preallocating SKBs too early reduces CPU cache
9 efficiency.
10
11 Check if we're in NAPI context when refilling RX. Normally we're almost
12 always running in NAPI context. Dispatch to napi_alloc_frag directly
13 instead of relying on netdev_alloc_frag which does the same but
14 with the overhead of local_bh_disable/enable.
15
16 Tested on BCM6328 320 MHz and iperf3 -M 512 to measure packet/sec
17 performance. Included netif_receive_skb_list and NET_IP_ALIGN
18 optimizations.
19
20 Before:
21 [ ID] Interval Transfer Bandwidth Retr
22 [ 4] 0.00-10.00 sec 49.9 MBytes 41.9 Mbits/sec 197 sender
23 [ 4] 0.00-10.00 sec 49.3 MBytes 41.3 Mbits/sec receiver
24
25 After:
26 [ ID] Interval Transfer Bandwidth Retr
27 [ 4] 0.00-30.00 sec 171 MBytes 47.8 Mbits/sec 272 sender
28 [ 4] 0.00-30.00 sec 170 MBytes 47.6 Mbits/sec receiver
29
30 Signed-off-by: Sieng Piaw Liew <liew.s.piaw@gmail.com>
31 Acked-by: Florian Fainelli <f.fainelli@gmail.com>
32 Signed-off-by: Jakub Kicinski <kuba@kernel.org>
33 ---
34 drivers/net/ethernet/broadcom/bcm63xx_enet.c | 111 ++++++++++---------
35 drivers/net/ethernet/broadcom/bcm63xx_enet.h | 14 ++-
36 2 files changed, 71 insertions(+), 54 deletions(-)
37
38 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
39 +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
40 @@ -221,7 +221,7 @@ static void bcm_enet_mdio_write_mii(stru
41 /*
42 * refill rx queue
43 */
44 -static int bcm_enet_refill_rx(struct net_device *dev)
45 +static int bcm_enet_refill_rx(struct net_device *dev, bool napi_mode)
46 {
47 struct bcm_enet_priv *priv;
48
49 @@ -229,29 +229,29 @@ static int bcm_enet_refill_rx(struct net
50
51 while (priv->rx_desc_count < priv->rx_ring_size) {
52 struct bcm_enet_desc *desc;
53 - struct sk_buff *skb;
54 - dma_addr_t p;
55 int desc_idx;
56 u32 len_stat;
57
58 desc_idx = priv->rx_dirty_desc;
59 desc = &priv->rx_desc_cpu[desc_idx];
60
61 - if (!priv->rx_skb[desc_idx]) {
62 - if (priv->enet_is_sw)
63 - skb = netdev_alloc_skb_ip_align(dev, priv->rx_skb_size);
64 + if (!priv->rx_buf[desc_idx]) {
65 + void *buf;
66 +
67 + if (likely(napi_mode))
68 + buf = napi_alloc_frag(priv->rx_frag_size);
69 else
70 - skb = netdev_alloc_skb(dev, priv->rx_skb_size);
71 - if (!skb)
72 + buf = netdev_alloc_frag(priv->rx_frag_size);
73 + if (unlikely(!buf))
74 break;
75 - priv->rx_skb[desc_idx] = skb;
76 - p = dma_map_single(&priv->pdev->dev, skb->data,
77 - priv->rx_skb_size,
78 - DMA_FROM_DEVICE);
79 - desc->address = p;
80 + priv->rx_buf[desc_idx] = buf;
81 + desc->address = dma_map_single(&priv->pdev->dev,
82 + buf + priv->rx_buf_offset,
83 + priv->rx_buf_size,
84 + DMA_FROM_DEVICE);
85 }
86
87 - len_stat = priv->rx_skb_size << DMADESC_LENGTH_SHIFT;
88 + len_stat = priv->rx_buf_size << DMADESC_LENGTH_SHIFT;
89 len_stat |= DMADESC_OWNER_MASK;
90 if (priv->rx_dirty_desc == priv->rx_ring_size - 1) {
91 len_stat |= (DMADESC_WRAP_MASK >> priv->dma_desc_shift);
92 @@ -291,7 +291,7 @@ static void bcm_enet_refill_rx_timer(str
93 struct net_device *dev = priv->net_dev;
94
95 spin_lock(&priv->rx_lock);
96 - bcm_enet_refill_rx(dev);
97 + bcm_enet_refill_rx(dev, false);
98 spin_unlock(&priv->rx_lock);
99 }
100
101 @@ -321,6 +321,7 @@ static int bcm_enet_receive_queue(struct
102 int desc_idx;
103 u32 len_stat;
104 unsigned int len;
105 + void *buf;
106
107 desc_idx = priv->rx_curr_desc;
108 desc = &priv->rx_desc_cpu[desc_idx];
109 @@ -366,16 +367,14 @@ static int bcm_enet_receive_queue(struct
110 }
111
112 /* valid packet */
113 - skb = priv->rx_skb[desc_idx];
114 + buf = priv->rx_buf[desc_idx];
115 len = (len_stat & DMADESC_LENGTH_MASK) >> DMADESC_LENGTH_SHIFT;
116 /* don't include FCS */
117 len -= 4;
118
119 if (len < copybreak) {
120 - struct sk_buff *nskb;
121 -
122 - nskb = napi_alloc_skb(&priv->napi, len);
123 - if (!nskb) {
124 + skb = napi_alloc_skb(&priv->napi, len);
125 + if (unlikely(!skb)) {
126 /* forget packet, just rearm desc */
127 dev->stats.rx_dropped++;
128 continue;
129 @@ -383,14 +382,21 @@ static int bcm_enet_receive_queue(struct
130
131 dma_sync_single_for_cpu(kdev, desc->address,
132 len, DMA_FROM_DEVICE);
133 - memcpy(nskb->data, skb->data, len);
134 + memcpy(skb->data, buf + priv->rx_buf_offset, len);
135 dma_sync_single_for_device(kdev, desc->address,
136 len, DMA_FROM_DEVICE);
137 - skb = nskb;
138 } else {
139 - dma_unmap_single(&priv->pdev->dev, desc->address,
140 - priv->rx_skb_size, DMA_FROM_DEVICE);
141 - priv->rx_skb[desc_idx] = NULL;
142 + dma_unmap_single(kdev, desc->address,
143 + priv->rx_buf_size, DMA_FROM_DEVICE);
144 + priv->rx_buf[desc_idx] = NULL;
145 +
146 + skb = build_skb(buf, priv->rx_frag_size);
147 + if (unlikely(!skb)) {
148 + skb_free_frag(buf);
149 + dev->stats.rx_dropped++;
150 + continue;
151 + }
152 + skb_reserve(skb, priv->rx_buf_offset);
153 }
154
155 skb_put(skb, len);
156 @@ -404,7 +410,7 @@ static int bcm_enet_receive_queue(struct
157 netif_receive_skb_list(&rx_list);
158
159 if (processed || !priv->rx_desc_count) {
160 - bcm_enet_refill_rx(dev);
161 + bcm_enet_refill_rx(dev, true);
162
163 /* kick rx dma */
164 enet_dmac_writel(priv, priv->dma_chan_en_mask,
165 @@ -861,22 +867,22 @@ static void bcm_enet_adjust_link(struct
166 priv->pause_tx ? "tx" : "off");
167 }
168
169 -static void bcm_enet_free_rx_skb_ring(struct device *kdev, struct bcm_enet_priv *priv)
170 +static void bcm_enet_free_rx_buf_ring(struct device *kdev, struct bcm_enet_priv *priv)
171 {
172 int i;
173
174 for (i = 0; i < priv->rx_ring_size; i++) {
175 struct bcm_enet_desc *desc;
176
177 - if (!priv->rx_skb[i])
178 + if (!priv->rx_buf[i])
179 continue;
180
181 desc = &priv->rx_desc_cpu[i];
182 - dma_unmap_single(kdev, desc->address, priv->rx_skb_size,
183 + dma_unmap_single(kdev, desc->address, priv->rx_buf_size,
184 DMA_FROM_DEVICE);
185 - kfree_skb(priv->rx_skb[i]);
186 + skb_free_frag(priv->rx_buf[i]);
187 }
188 - kfree(priv->rx_skb);
189 + kfree(priv->rx_buf);
190 }
191
192 /*
193 @@ -988,10 +994,10 @@ static int bcm_enet_open(struct net_devi
194 priv->tx_curr_desc = 0;
195 spin_lock_init(&priv->tx_lock);
196
197 - /* init & fill rx ring with skbs */
198 - priv->rx_skb = kcalloc(priv->rx_ring_size, sizeof(struct sk_buff *),
199 + /* init & fill rx ring with buffers */
200 + priv->rx_buf = kcalloc(priv->rx_ring_size, sizeof(void *),
201 GFP_KERNEL);
202 - if (!priv->rx_skb) {
203 + if (!priv->rx_buf) {
204 ret = -ENOMEM;
205 goto out_free_tx_skb;
206 }
207 @@ -1008,8 +1014,8 @@ static int bcm_enet_open(struct net_devi
208 enet_dmac_writel(priv, ENETDMA_BUFALLOC_FORCE_MASK | 0,
209 ENETDMAC_BUFALLOC, priv->rx_chan);
210
211 - if (bcm_enet_refill_rx(dev)) {
212 - dev_err(kdev, "cannot allocate rx skb queue\n");
213 + if (bcm_enet_refill_rx(dev, false)) {
214 + dev_err(kdev, "cannot allocate rx buffer queue\n");
215 ret = -ENOMEM;
216 goto out;
217 }
218 @@ -1103,7 +1109,7 @@ static int bcm_enet_open(struct net_devi
219 return 0;
220
221 out:
222 - bcm_enet_free_rx_skb_ring(kdev, priv);
223 + bcm_enet_free_rx_buf_ring(kdev, priv);
224
225 out_free_tx_skb:
226 kfree(priv->tx_skb);
227 @@ -1209,8 +1215,8 @@ static int bcm_enet_stop(struct net_devi
228 /* force reclaim of all tx buffers */
229 bcm_enet_tx_reclaim(dev, 1);
230
231 - /* free the rx skb ring */
232 - bcm_enet_free_rx_skb_ring(kdev, priv);
233 + /* free the rx buffer ring */
234 + bcm_enet_free_rx_buf_ring(kdev, priv);
235
236 /* free remaining allocated memory */
237 kfree(priv->tx_skb);
238 @@ -1637,9 +1643,12 @@ static int bcm_enet_change_mtu(struct ne
239 * align rx buffer size to dma burst len, account FCS since
240 * it's appended
241 */
242 - priv->rx_skb_size = ALIGN(actual_mtu + ETH_FCS_LEN,
243 + priv->rx_buf_size = ALIGN(actual_mtu + ETH_FCS_LEN,
244 priv->dma_maxburst * 4);
245
246 + priv->rx_frag_size = SKB_DATA_ALIGN(priv->rx_buf_offset + priv->rx_buf_size) +
247 + SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
248 +
249 dev->mtu = new_mtu;
250 return 0;
251 }
252 @@ -1725,6 +1734,7 @@ static int bcm_enet_probe(struct platfor
253
254 priv->enet_is_sw = false;
255 priv->dma_maxburst = BCMENET_DMA_MAXBURST;
256 + priv->rx_buf_offset = NET_SKB_PAD;
257
258 ret = bcm_enet_change_mtu(dev, dev->mtu);
259 if (ret)
260 @@ -2142,7 +2152,7 @@ static int bcm_enetsw_open(struct net_de
261 priv->tx_skb = kcalloc(priv->tx_ring_size, sizeof(struct sk_buff *),
262 GFP_KERNEL);
263 if (!priv->tx_skb) {
264 - dev_err(kdev, "cannot allocate rx skb queue\n");
265 + dev_err(kdev, "cannot allocate tx skb queue\n");
266 ret = -ENOMEM;
267 goto out_free_tx_ring;
268 }
269 @@ -2152,11 +2162,11 @@ static int bcm_enetsw_open(struct net_de
270 priv->tx_curr_desc = 0;
271 spin_lock_init(&priv->tx_lock);
272
273 - /* init & fill rx ring with skbs */
274 - priv->rx_skb = kcalloc(priv->rx_ring_size, sizeof(struct sk_buff *),
275 + /* init & fill rx ring with buffers */
276 + priv->rx_buf = kcalloc(priv->rx_ring_size, sizeof(void *),
277 GFP_KERNEL);
278 - if (!priv->rx_skb) {
279 - dev_err(kdev, "cannot allocate rx skb queue\n");
280 + if (!priv->rx_buf) {
281 + dev_err(kdev, "cannot allocate rx buffer queue\n");
282 ret = -ENOMEM;
283 goto out_free_tx_skb;
284 }
285 @@ -2203,8 +2213,8 @@ static int bcm_enetsw_open(struct net_de
286 enet_dma_writel(priv, ENETDMA_BUFALLOC_FORCE_MASK | 0,
287 ENETDMA_BUFALLOC_REG(priv->rx_chan));
288
289 - if (bcm_enet_refill_rx(dev)) {
290 - dev_err(kdev, "cannot allocate rx skb queue\n");
291 + if (bcm_enet_refill_rx(dev, false)) {
292 + dev_err(kdev, "cannot allocate rx buffer queue\n");
293 ret = -ENOMEM;
294 goto out;
295 }
296 @@ -2303,7 +2313,7 @@ static int bcm_enetsw_open(struct net_de
297 return 0;
298
299 out:
300 - bcm_enet_free_rx_skb_ring(kdev, priv);
301 + bcm_enet_free_rx_buf_ring(kdev, priv);
302
303 out_free_tx_skb:
304 kfree(priv->tx_skb);
305 @@ -2353,8 +2363,8 @@ static int bcm_enetsw_stop(struct net_de
306 /* force reclaim of all tx buffers */
307 bcm_enet_tx_reclaim(dev, 1);
308
309 - /* free the rx skb ring */
310 - bcm_enet_free_rx_skb_ring(kdev, priv);
311 + /* free the rx buffer ring */
312 + bcm_enet_free_rx_buf_ring(kdev, priv);
313
314 /* free remaining allocated memory */
315 kfree(priv->tx_skb);
316 @@ -2655,6 +2665,7 @@ static int bcm_enetsw_probe(struct platf
317 priv->rx_ring_size = BCMENET_DEF_RX_DESC;
318 priv->tx_ring_size = BCMENET_DEF_TX_DESC;
319 priv->dma_maxburst = BCMENETSW_DMA_MAXBURST;
320 + priv->rx_buf_offset = NET_SKB_PAD + NET_IP_ALIGN;
321
322 pd = dev_get_platdata(&pdev->dev);
323 if (pd) {
324 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.h
325 +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.h
326 @@ -230,11 +230,17 @@ struct bcm_enet_priv {
327 /* next dirty rx descriptor to refill */
328 int rx_dirty_desc;
329
330 - /* size of allocated rx skbs */
331 - unsigned int rx_skb_size;
332 + /* size of allocated rx buffers */
333 + unsigned int rx_buf_size;
334
335 - /* list of skb given to hw for rx */
336 - struct sk_buff **rx_skb;
337 + /* allocated rx buffer offset */
338 + unsigned int rx_buf_offset;
339 +
340 + /* size of allocated rx frag */
341 + unsigned int rx_frag_size;
342 +
343 + /* list of buffer given to hw for rx */
344 + void **rx_buf;
345
346 /* used when rx skb allocation failed, so we defer rx queue
347 * refill */