1 From 3a1cc23a75abcd9cea585eb84846507363d58397 Mon Sep 17 00:00:00 2001
2 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
3 Date: Tue, 25 Oct 2022 15:22:45 +0200
4 Subject: [PATCH] net: broadcom: bcm4908_enet: use build_skb()
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
9 RX code can be more efficient with the build_skb(). Allocating actual
10 SKB around eth packet buffer - right before passing it up - results in
13 Without RPS (echo 0 > rps_cpus) BCM4908 NAT masq performance "jumps"
14 between two speeds: ~900 Mbps and 940 Mbps (it's a 4 CPUs SoC). This
15 change bumps the lower speed from 905 Mb/s to 918 Mb/s (tested using
16 single stream iperf 2.0.5 traffic).
18 There are more optimizations to consider. One obvious to try is GRO
19 however as BCM4908 doesn't do hw csum is may actually lower performance.
20 Sometimes. Some early testing:
22 ┌─────────────────────────────────┬─────────────────────┬────────────────────┐
23 │ │ netif_receive_skb() │ napi_gro_receive() │
24 ├─────────────────────────────────┼─────────────────────┼────────────────────┤
25 │ netdev_alloc_skb() │ 905 Mb/s │ 892 Mb/s │
26 │ napi_alloc_frag() + build_skb() │ 918 Mb/s │ 917 Mb/s │
27 └─────────────────────────────────┴─────────────────────┴────────────────────┘
31 2. skb_copy_from_linear_data() for small packets
33 Those need proper testing first though. That can be done later.
35 Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
36 Link: https://lore.kernel.org/r/20221025132245.22871-1-zajec5@gmail.com
37 Signed-off-by: Paolo Abeni <pabeni@redhat.com>
39 drivers/net/ethernet/broadcom/bcm4908_enet.c | 53 +++++++++++++-------
40 1 file changed, 36 insertions(+), 17 deletions(-)
42 --- a/drivers/net/ethernet/broadcom/bcm4908_enet.c
43 +++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c
45 #define ENET_MAX_ETH_OVERHEAD (ETH_HLEN + BRCM_MAX_TAG_LEN + VLAN_HLEN + \
46 ETH_FCS_LEN + 4) /* 32 */
48 +#define ENET_RX_SKB_BUF_SIZE (NET_SKB_PAD + NET_IP_ALIGN + \
49 + ETH_HLEN + BRCM_MAX_TAG_LEN + VLAN_HLEN + \
50 + ENET_MTU_MAX + ETH_FCS_LEN + 4)
51 +#define ENET_RX_SKB_BUF_ALLOC_SIZE (SKB_DATA_ALIGN(ENET_RX_SKB_BUF_SIZE) + \
52 + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
53 +#define ENET_RX_BUF_DMA_OFFSET (NET_SKB_PAD + NET_IP_ALIGN)
54 +#define ENET_RX_BUF_DMA_SIZE (ENET_RX_SKB_BUF_SIZE - ENET_RX_BUF_DMA_OFFSET)
56 struct bcm4908_enet_dma_ring_bd {
61 struct bcm4908_enet_dma_ring_slot {
62 - struct sk_buff *skb;
65 + struct sk_buff *skb; /* TX */
70 @@ -260,22 +271,21 @@ static int bcm4908_enet_dma_alloc_rx_buf
74 - slot->len = ENET_MTU_MAX + ENET_MAX_ETH_OVERHEAD;
76 - slot->skb = netdev_alloc_skb(enet->netdev, slot->len);
78 + slot->buf = napi_alloc_frag(ENET_RX_SKB_BUF_ALLOC_SIZE);
82 - slot->dma_addr = dma_map_single(dev, slot->skb->data, slot->len, DMA_FROM_DEVICE);
83 + slot->dma_addr = dma_map_single(dev, slot->buf + ENET_RX_BUF_DMA_OFFSET,
84 + ENET_RX_BUF_DMA_SIZE, DMA_FROM_DEVICE);
85 err = dma_mapping_error(dev, slot->dma_addr);
87 dev_err(dev, "Failed to map DMA buffer: %d\n", err);
88 - kfree_skb(slot->skb);
90 + skb_free_frag(slot->buf);
95 - tmp = slot->len << DMA_CTL_LEN_DESC_BUFLENGTH_SHIFT;
96 + tmp = ENET_RX_BUF_DMA_SIZE << DMA_CTL_LEN_DESC_BUFLENGTH_SHIFT;
97 tmp |= DMA_CTL_STATUS_OWN;
98 if (idx == enet->rx_ring.length - 1)
99 tmp |= DMA_CTL_STATUS_WRAP;
100 @@ -315,11 +325,11 @@ static void bcm4908_enet_dma_uninit(stru
102 for (i = rx_ring->length - 1; i >= 0; i--) {
103 slot = &rx_ring->slots[i];
107 dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_FROM_DEVICE);
108 - kfree_skb(slot->skb);
110 + skb_free_frag(slot->buf);
115 @@ -575,6 +585,7 @@ static int bcm4908_enet_poll_rx(struct n
116 while (handled < weight) {
117 struct bcm4908_enet_dma_ring_bd *buf_desc;
118 struct bcm4908_enet_dma_ring_slot slot;
119 + struct sk_buff *skb;
123 @@ -598,16 +609,24 @@ static int bcm4908_enet_poll_rx(struct n
125 if (len < ETH_ZLEN ||
126 (ctl & (DMA_CTL_STATUS_SOP | DMA_CTL_STATUS_EOP)) != (DMA_CTL_STATUS_SOP | DMA_CTL_STATUS_EOP)) {
127 - kfree_skb(slot.skb);
128 + skb_free_frag(slot.buf);
129 enet->netdev->stats.rx_dropped++;
133 - dma_unmap_single(dev, slot.dma_addr, slot.len, DMA_FROM_DEVICE);
134 + dma_unmap_single(dev, slot.dma_addr, ENET_RX_BUF_DMA_SIZE, DMA_FROM_DEVICE);
136 + skb = build_skb(slot.buf, ENET_RX_SKB_BUF_ALLOC_SIZE);
137 + if (unlikely(!skb)) {
138 + skb_free_frag(slot.buf);
139 + enet->netdev->stats.rx_dropped++;
142 + skb_reserve(skb, ENET_RX_BUF_DMA_OFFSET);
143 + skb_put(skb, len - ETH_FCS_LEN);
144 + skb->protocol = eth_type_trans(skb, enet->netdev);
146 - skb_put(slot.skb, len - ETH_FCS_LEN);
147 - slot.skb->protocol = eth_type_trans(slot.skb, enet->netdev);
148 - netif_receive_skb(slot.skb);
149 + netif_receive_skb(skb);
151 enet->netdev->stats.rx_packets++;
152 enet->netdev->stats.rx_bytes += len;