ar71xx: improve ethernet driver cache footprint by removing an unnecessary pointer
[openwrt/staging/jow.git] / target / linux / ar71xx / files / drivers / net / ethernet / atheros / ag71xx / ag71xx_main.c
index 715c1cef1be6e4ce90c2e4a65d397ddb7bcc22b0..5ef324e8533e15ff61e3db8d32addede266b4231 100644 (file)
@@ -28,9 +28,11 @@ static int ag71xx_msg_level = -1;
 module_param_named(msg_level, ag71xx_msg_level, int, 0);
 MODULE_PARM_DESC(msg_level, "Message level (-1=defaults,0=none,...,16=all)");
 
+#define ETH_SWITCH_HEADER_LEN  2
+
 static inline unsigned int ag71xx_max_frame_len(unsigned int mtu)
 {
-       return ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN;
+       return ETH_SWITCH_HEADER_LEN + ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN;
 }
 
 static void ag71xx_dump_dma_regs(struct ag71xx *ag)
@@ -98,7 +100,6 @@ static void ag71xx_ring_free(struct ag71xx_ring *ring)
 static int ag71xx_ring_alloc(struct ag71xx_ring *ring)
 {
        int err;
-       int i;
 
        ring->desc_size = sizeof(struct ag71xx_desc);
        if (ring->desc_size % cache_line_size()) {
@@ -122,13 +123,6 @@ static int ag71xx_ring_alloc(struct ag71xx_ring *ring)
                goto err;
        }
 
-       for (i = 0; i < ring->size; i++) {
-               int idx = i * ring->desc_size;
-               ring->buf[i].desc = (struct ag71xx_desc *)&ring->descs_cpu[idx];
-               DBG("ag71xx: ring %p, desc %d at %p\n",
-                       ring, i, ring->buf[i].desc);
-       }
-
        return 0;
 
 err:
@@ -142,10 +136,12 @@ static void ag71xx_ring_tx_clean(struct ag71xx *ag)
        u32 bytes_compl = 0, pkts_compl = 0;
 
        while (ring->curr != ring->dirty) {
+               struct ag71xx_desc *desc;
                u32 i = ring->dirty % ring->size;
 
-               if (!ag71xx_desc_empty(ring->buf[i].desc)) {
-                       ring->buf[i].desc->ctrl = 0;
+               desc = ag71xx_ring_desc(ring, i);
+               if (!ag71xx_desc_empty(desc)) {
+                       desc->ctrl = 0;
                        dev->stats.tx_errors++;
                }
 
@@ -170,10 +166,12 @@ static void ag71xx_ring_tx_init(struct ag71xx *ag)
        int i;
 
        for (i = 0; i < ring->size; i++) {
-               ring->buf[i].desc->next = (u32) (ring->descs_dma +
+               struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+
+               desc->next = (u32) (ring->descs_dma +
                        ring->desc_size * ((i + 1) % ring->size));
 
-               ring->buf[i].desc->ctrl = DESC_EMPTY;
+               desc->ctrl = DESC_EMPTY;
                ring->buf[i].skb = NULL;
        }
 
@@ -220,6 +218,8 @@ static int ag71xx_buffer_offset(struct ag71xx *ag)
 static bool ag71xx_fill_rx_buf(struct ag71xx *ag, struct ag71xx_buf *buf,
                               int offset)
 {
+       struct ag71xx_ring *ring = &ag->rx_ring;
+       struct ag71xx_desc *desc = ag71xx_ring_desc(ring, buf - &ring->buf[0]);
        void *data;
 
        data = kmalloc(ag->rx_buf_size +
@@ -231,7 +231,7 @@ static bool ag71xx_fill_rx_buf(struct ag71xx *ag, struct ag71xx_buf *buf,
        buf->rx_buf = data;
        buf->dma_addr = dma_map_single(&ag->dev->dev, data, ag->rx_buf_size,
                                       DMA_FROM_DEVICE);
-       buf->desc->data = (u32) buf->dma_addr + offset;
+       desc->data = (u32) buf->dma_addr + offset;
        return true;
 }
 
@@ -244,21 +244,24 @@ static int ag71xx_ring_rx_init(struct ag71xx *ag)
 
        ret = 0;
        for (i = 0; i < ring->size; i++) {
-               ring->buf[i].desc->next = (u32) (ring->descs_dma +
+               struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+
+               desc->next = (u32) (ring->descs_dma +
                        ring->desc_size * ((i + 1) % ring->size));
 
                DBG("ag71xx: RX desc at %p, next is %08x\n",
-                       ring->buf[i].desc,
-                       ring->buf[i].desc->next);
+                       desc, desc->next);
        }
 
        for (i = 0; i < ring->size; i++) {
+               struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
+
                if (!ag71xx_fill_rx_buf(ag, &ring->buf[i], offset)) {
                        ret = -ENOMEM;
                        break;
                }
 
-               ring->buf[i].desc->ctrl = DESC_EMPTY;
+               desc->ctrl = DESC_EMPTY;
        }
 
        /* flush descriptors */
@@ -278,15 +281,17 @@ static int ag71xx_ring_rx_refill(struct ag71xx *ag)
 
        count = 0;
        for (; ring->curr - ring->dirty > 0; ring->dirty++) {
+               struct ag71xx_desc *desc;
                unsigned int i;
 
                i = ring->dirty % ring->size;
+               desc = ag71xx_ring_desc(ring, i);
 
                if (!ring->buf[i].rx_buf &&
                    !ag71xx_fill_rx_buf(ag, &ring->buf[i], offset))
                        break;
 
-               ring->buf[i].desc->ctrl = DESC_EMPTY;
+               desc->ctrl = DESC_EMPTY;
                count++;
        }
 
@@ -468,18 +473,18 @@ static void ag71xx_hw_init(struct ag71xx *ag)
                reset_mask &= ~(AR71XX_RESET_GE0_PHY | AR71XX_RESET_GE1_PHY);
 
                ath79_device_reset_set(reset_phy);
-               mdelay(50);
+               msleep(50);
                ath79_device_reset_clear(reset_phy);
-               mdelay(200);
+               msleep(200);
        }
 
        ag71xx_sb(ag, AG71XX_REG_MAC_CFG1, MAC_CFG1_SR);
        udelay(20);
 
        ath79_device_reset_set(reset_mask);
-       mdelay(100);
+       msleep(100);
        ath79_device_reset_clear(reset_mask);
-       mdelay(200);
+       msleep(200);
 
        ag71xx_hw_setup(ag);
 
@@ -534,6 +539,7 @@ void ag71xx_link_adjust(struct ag71xx *ag)
        u32 cfg2;
        u32 ifctl;
        u32 fifo5;
+       u32 fifo3;
 
        if (!ag->link) {
                ag71xx_hw_stop(ag);
@@ -574,11 +580,18 @@ void ag71xx_link_adjust(struct ag71xx *ag)
        }
 
        if (pdata->is_ar91xx)
-               ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, 0x00780fff);
+               fifo3 = 0x00780fff;
        else if (pdata->is_ar724x)
-               ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, pdata->fifo_cfg3);
+               fifo3 = pdata->fifo_cfg3;
        else
-               ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, 0x008001ff);
+               fifo3 = 0x008001ff;
+
+       if (ag->tx_ring.desc_split) {
+               fifo3 &= 0xffff;
+               fifo3 |= ((2048 - ag->tx_ring.desc_split) / 4) << 16;
+       }
+
+       ag71xx_wr(ag, AG71XX_REG_FIFO_CFG3, fifo3);
 
        if (pdata->set_speed)
                pdata->set_speed(ag->speed);
@@ -673,6 +686,54 @@ static int ag71xx_stop(struct net_device *dev)
        return 0;
 }
 
+static int ag71xx_fill_dma_desc(struct ag71xx_ring *ring, u32 addr, int len)
+{
+       int i;
+       struct ag71xx_desc *desc;
+       int ndesc = 0;
+       int split = ring->desc_split;
+
+       if (!split)
+               split = len;
+
+       while (len > 0) {
+               unsigned int cur_len = len;
+
+               i = (ring->curr + ndesc) % ring->size;
+               desc = ag71xx_ring_desc(ring, i);
+
+               if (!ag71xx_desc_empty(desc))
+                       return -1;
+
+               if (cur_len > split) {
+                       cur_len = split;
+
+                       /*
+                        * TX will hang if DMA transfers <= 4 bytes,
+                        * make sure next segment is more than 4 bytes long.
+                        */
+                       if (len <= split + 4)
+                               cur_len -= 4;
+               }
+
+               desc->data = addr;
+               addr += cur_len;
+               len -= cur_len;
+
+               if (len > 0)
+                       cur_len |= DESC_MORE;
+
+               /* prevent early tx attempt of this descriptor */
+               if (!ndesc)
+                       cur_len |= DESC_EMPTY;
+
+               desc->ctrl = cur_len;
+               ndesc++;
+       }
+
+       return ndesc;
+}
+
 static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
                                          struct net_device *dev)
 {
@@ -680,18 +741,12 @@ static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
        struct ag71xx_ring *ring = &ag->tx_ring;
        struct ag71xx_desc *desc;
        dma_addr_t dma_addr;
-       int i;
-
-       i = ring->curr % ring->size;
-       desc = ring->buf[i].desc;
-
-       if (!ag71xx_desc_empty(desc))
-               goto err_drop;
+       int i, n, ring_min;
 
        if (ag71xx_has_ar8216(ag))
                ag71xx_add_ar8216_header(ag, skb);
 
-       if (skb->len <= 0) {
+       if (skb->len <= 4) {
                DBG("%s: packet len is too small\n", ag->dev->name);
                goto err_drop;
        }
@@ -699,21 +754,33 @@ static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
        dma_addr = dma_map_single(&dev->dev, skb->data, skb->len,
                                  DMA_TO_DEVICE);
 
-       netdev_sent_queue(dev, skb->len);
+       i = ring->curr % ring->size;
+       desc = ag71xx_ring_desc(ring, i);
+
+       /* setup descriptor fields */
+       n = ag71xx_fill_dma_desc(ring, (u32) dma_addr, skb->len & ag->desc_pktlen_mask);
+       if (n < 0)
+               goto err_drop_unmap;
+
+       i = (ring->curr + n - 1) % ring->size;
        ring->buf[i].len = skb->len;
        ring->buf[i].skb = skb;
        ring->buf[i].timestamp = jiffies;
 
-       /* setup descriptor fields */
-       desc->data = (u32) dma_addr;
-       desc->ctrl = skb->len & ag->desc_pktlen_mask;
+       netdev_sent_queue(dev, skb->len);
+
+       desc->ctrl &= ~DESC_EMPTY;
+       ring->curr += n;
 
        /* flush descriptor */
        wmb();
 
-       ring->curr++;
-       if (ring->curr == (ring->dirty + ring->size)) {
-               DBG("%s: tx queue full\n", ag->dev->name);
+       ring_min = 2;
+       if (ring->desc_split)
+           ring_min *= AG71XX_TX_RING_DS_PER_PKT;
+
+       if (ring->curr - ring->dirty >= ring->size - ring_min) {
+               DBG("%s: tx queue full\n", dev->name);
                netif_stop_queue(dev);
        }
 
@@ -724,6 +791,9 @@ static netdev_tx_t ag71xx_hard_start_xmit(struct sk_buff *skb,
 
        return NETDEV_TX_OK;
 
+err_drop_unmap:
+       dma_unmap_single(&dev->dev, dma_addr, skb->len, DMA_TO_DEVICE);
+
 err_drop:
        dev->stats.tx_dropped++;
 
@@ -834,14 +904,14 @@ static int ag71xx_tx_packets(struct ag71xx *ag)
        struct ag71xx_platform_data *pdata = ag71xx_get_pdata(ag);
        int sent = 0;
        int bytes_compl = 0;
+       int n = 0;
 
        DBG("%s: processing TX ring\n", ag->dev->name);
 
-       while (ring->dirty != ring->curr) {
-               unsigned int i = ring->dirty % ring->size;
-               struct ag71xx_desc *desc = ring->buf[i].desc;
+       while (ring->dirty + n != ring->curr) {
+               unsigned int i = (ring->dirty + n) % ring->size;
+               struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
                struct sk_buff *skb = ring->buf[i].skb;
-               int len = ring->buf[i].len;
 
                if (!ag71xx_desc_empty(desc)) {
                        if (pdata->is_ar7240 &&
@@ -850,21 +920,29 @@ static int ag71xx_tx_packets(struct ag71xx *ag)
                        break;
                }
 
-               ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_PS);
-
-               bytes_compl += len;
-               ag->dev->stats.tx_bytes += len;
-               ag->dev->stats.tx_packets++;
+               n++;
+               if (!skb)
+                       continue;
 
                dev_kfree_skb_any(skb);
                ring->buf[i].skb = NULL;
 
-               ring->dirty++;
+               bytes_compl += ring->buf[i].len;
+
                sent++;
+               ring->dirty += n;
+
+               while (n > 0) {
+                       ag71xx_wr(ag, AG71XX_REG_TX_STATUS, TX_STATUS_PS);
+                       n--;
+               }
        }
 
        DBG("%s: %d packets sent out\n", ag->dev->name, sent);
 
+       ag->dev->stats.tx_bytes += bytes_compl;
+       ag->dev->stats.tx_packets += sent;
+
        if (!sent)
                return 0;
 
@@ -888,7 +966,7 @@ static int ag71xx_rx_packets(struct ag71xx *ag, int limit)
 
        while (done < limit) {
                unsigned int i = ring->curr % ring->size;
-               struct ag71xx_desc *desc = ring->buf[i].desc;
+               struct ag71xx_desc *desc = ag71xx_ring_desc(ring, i);
                struct sk_buff *skb;
                int pktlen;
                int err = 0;
@@ -1193,6 +1271,11 @@ static int ag71xx_probe(struct platform_device *pdev)
        ag->max_frame_len = pdata->max_frame_len;
        ag->desc_pktlen_mask = pdata->desc_pktlen_mask;
 
+       if (!pdata->is_ar724x && !pdata->is_ar91xx) {
+               ag->tx_ring.desc_split = AG71XX_TX_RING_SPLIT;
+               ag->tx_ring.size *= AG71XX_TX_RING_DS_PER_PKT;
+       }
+
        ag->stop_desc = dma_alloc_coherent(NULL,
                sizeof(struct ag71xx_desc), &ag->stop_desc_dma, GFP_KERNEL);