ar71xx: improve rx performance of the ethernet driver by using build_skb to deliver...
authorFelix Fietkau <nbd@openwrt.org>
Mon, 28 May 2012 02:55:59 +0000 (02:55 +0000)
committerFelix Fietkau <nbd@openwrt.org>
Mon, 28 May 2012 02:55:59 +0000 (02:55 +0000)
SVN-Revision: 31934

target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx.h
target/linux/ar71xx/files/drivers/net/ethernet/atheros/ag71xx/ag71xx_main.c

index 881741660bbbcec7d4a5070f63bc35308094174a..b9d95adaf62fc6bcb3a05ad64ba73a04232a65fe 100644 (file)
@@ -53,6 +53,7 @@
 #define AG71XX_TX_MTU_LEN      1540
 #define AG71XX_RX_PKT_SIZE     \
        (ETH_FRAME_LEN + ETH_FCS_LEN + VLAN_HLEN)
+#define AG71XX_RX_BUF_SIZE (AG71XX_RX_PKT_SIZE + NET_SKB_PAD + NET_IP_ALIGN)
 
 #define AG71XX_TX_RING_SIZE_DEFAULT    64
 #define AG71XX_RX_RING_SIZE_DEFAULT    128
@@ -85,7 +86,10 @@ struct ag71xx_desc {
 } __attribute__((aligned(4)));
 
 struct ag71xx_buf {
-       struct sk_buff          *skb;
+       union {
+               struct sk_buff  *skb;
+               void            *rx_buf;
+       };
        struct ag71xx_desc      *desc;
        dma_addr_t              dma_addr;
        unsigned long           timestamp;
index 6d1aff7f7e367ae2bb7711099bb97e9eb633f03a..fb99d272816c7eaa169a5b9ca21cdebec1b92f49 100644 (file)
@@ -189,15 +189,17 @@ static void ag71xx_ring_rx_clean(struct ag71xx *ag)
                return;
 
        for (i = 0; i < ring->size; i++)
-               if (ring->buf[i].skb) {
+               if (ring->buf[i].rx_buf) {
                        dma_unmap_single(&ag->dev->dev, ring->buf[i].dma_addr,
-                                        AG71XX_RX_PKT_SIZE, DMA_FROM_DEVICE);
-                       kfree_skb(ring->buf[i].skb);
+                                        AG71XX_RX_BUF_SIZE, DMA_FROM_DEVICE);
+                       kfree(ring->buf[i].rx_buf);
                }
 }
 
-struct sk_buff *ag71xx_rx_alloc(struct ag71xx *ag)
+static int ag71xx_buffer_offset(struct ag71xx *ag)
 {
+       int offset = NET_SKB_PAD;
+
        /*
         * On AR71xx/AR91xx packets must be 4-byte aligned.
         *
@@ -205,17 +207,35 @@ struct sk_buff *ag71xx_rx_alloc(struct ag71xx *ag)
         * so we don't need any extra alignment in that case.
         */
        if (!ag71xx_get_pdata(ag)->is_ar724x || ag71xx_has_ar8216(ag))
-               return netdev_alloc_skb(ag->dev, AG71XX_RX_PKT_SIZE);
+               return offset;
 
-       return netdev_alloc_skb_ip_align(ag->dev, AG71XX_RX_PKT_SIZE);
+       return offset + NET_IP_ALIGN;
 }
 
+static bool ag71xx_fill_rx_buf(struct ag71xx *ag, struct ag71xx_buf *buf,
+                              int offset)
+{
+       void *data;
+
+       data = kmalloc(AG71XX_RX_BUF_SIZE +
+                      SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
+                      GFP_ATOMIC);
+       if (!data)
+               return false;
+
+       buf->rx_buf = data;
+       buf->dma_addr = dma_map_single(&ag->dev->dev, data,
+                                      AG71XX_RX_BUF_SIZE, DMA_FROM_DEVICE);
+       buf->desc->data = (u32) buf->dma_addr + offset;
+       return true;
+}
 
 static int ag71xx_ring_rx_init(struct ag71xx *ag)
 {
        struct ag71xx_ring *ring = &ag->rx_ring;
        unsigned int i;
        int ret;
+       int offset = ag71xx_buffer_offset(ag);
 
        ret = 0;
        for (i = 0; i < ring->size; i++) {
@@ -228,22 +248,11 @@ static int ag71xx_ring_rx_init(struct ag71xx *ag)
        }
 
        for (i = 0; i < ring->size; i++) {
-               struct sk_buff *skb;
-               dma_addr_t dma_addr;
-
-               skb = ag71xx_rx_alloc(ag);
-               if (!skb) {
+               if (!ag71xx_fill_rx_buf(ag, &ring->buf[i], offset)) {
                        ret = -ENOMEM;
                        break;
                }
 
-               skb->dev = ag->dev;
-               dma_addr = dma_map_single(&ag->dev->dev, skb->data,
-                                         AG71XX_RX_PKT_SIZE,
-                                         DMA_FROM_DEVICE);
-               ring->buf[i].skb = skb;
-               ring->buf[i].dma_addr = dma_addr;
-               ring->buf[i].desc->data = (u32) dma_addr;
                ring->buf[i].desc->ctrl = DESC_EMPTY;
        }
 
@@ -260,6 +269,7 @@ static int ag71xx_ring_rx_refill(struct ag71xx *ag)
 {
        struct ag71xx_ring *ring = &ag->rx_ring;
        unsigned int count;
+       int offset = ag71xx_buffer_offset(ag);
 
        count = 0;
        for (; ring->curr - ring->dirty > 0; ring->dirty++) {
@@ -267,24 +277,9 @@ static int ag71xx_ring_rx_refill(struct ag71xx *ag)
 
                i = ring->dirty % ring->size;
 
-               if (ring->buf[i].skb == NULL) {
-                       dma_addr_t dma_addr;
-                       struct sk_buff *skb;
-
-                       skb = ag71xx_rx_alloc(ag);
-                       if (skb == NULL)
-                               break;
-
-                       skb->dev = ag->dev;
-
-                       dma_addr = dma_map_single(&ag->dev->dev, skb->data,
-                                                 AG71XX_RX_PKT_SIZE,
-                                                 DMA_FROM_DEVICE);
-
-                       ring->buf[i].skb = skb;
-                       ring->buf[i].dma_addr = dma_addr;
-                       ring->buf[i].desc->data = (u32) dma_addr;
-               }
+               if (!ring->buf[i].rx_buf &&
+                   !ag71xx_fill_rx_buf(ag, &ring->buf[i], offset))
+                       break;
 
                ring->buf[i].desc->ctrl = DESC_EMPTY;
                count++;
@@ -863,6 +858,7 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
 {
        struct net_device *dev = ag->dev;
        struct ag71xx_ring *ring = &ag->rx_ring;
+       int offset = ag71xx_buffer_offset(ag);
        int done = 0;
 
        DBG("%s: rx packets, limit=%d, curr=%u, dirty=%u\n",
@@ -885,18 +881,25 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
 
                ag71xx_wr(ag, AG71XX_REG_RX_STATUS, RX_STATUS_PR);
 
-               skb = ring->buf[i].skb;
                pktlen = ag71xx_desc_pktlen(desc);
                pktlen -= ETH_FCS_LEN;
 
                dma_unmap_single(&dev->dev, ring->buf[i].dma_addr,
-                                AG71XX_RX_PKT_SIZE, DMA_FROM_DEVICE);
+                                AG71XX_RX_BUF_SIZE, DMA_FROM_DEVICE);
 
                dev->last_rx = jiffies;
                dev->stats.rx_packets++;
                dev->stats.rx_bytes += pktlen;
 
+               skb = build_skb(ring->buf[i].rx_buf);
+               if (!skb) {
+                       kfree(ring->buf[i].rx_buf);
+                       goto next;
+               }
+
+               skb_reserve(skb, offset);
                skb_put(skb, pktlen);
+
                if (ag71xx_has_ar8216(ag))
                        err = ag71xx_remove_ar8216_header(ag, skb, pktlen);
 
@@ -910,7 +913,8 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
                        netif_receive_skb(skb);
                }
 
-               ring->buf[i].skb = NULL;
+next:
+               ring->buf[i].rx_buf = NULL;
                done++;
 
                ring->curr++;
@@ -944,7 +948,7 @@ static int ag71xx_poll(struct napi_struct *napi, int limit)
        ag71xx_debugfs_update_napi_stats(ag, rx_done, tx_done);
 
        rx_ring = &ag->rx_ring;
-       if (rx_ring->buf[rx_ring->dirty % rx_ring->size].skb == NULL)
+       if (rx_ring->buf[rx_ring->dirty % rx_ring->size].rx_buf == NULL)
                goto oom;
 
        status = ag71xx_rr(ag, AG71XX_REG_RX_STATUS);