ralink: add reworked ethernet driver
[openwrt/staging/chunkeey.git] / target / linux / ramips / files / drivers / net / ethernet / ralink / ralink_soc_eth.c
index f98af293c83cc7342c2750a2e4f09fcf77efdcc1..5bc9b58336962a14b820c9f25e32e9ea1dd0fd76 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/reset.h>
 #include <linux/tcp.h>
 #include <linux/io.h>
+#include <linux/bug.h>
 
 #include <asm/mach-ralink/ralink_regs.h>
 
@@ -41,8 +42,8 @@
 #include "ralink_ethtool.h"
 
 #define        MAX_RX_LENGTH           1536
-#define FE_RX_HLEN             (NET_SKB_PAD + VLAN_ETH_HLEN + VLAN_HLEN + \
-               + NET_IP_ALIGN + ETH_FCS_LEN)
+#define FE_RX_ETH_HLEN         (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
+#define FE_RX_HLEN             (NET_SKB_PAD + FE_RX_ETH_HLEN + NET_IP_ALIGN)
 #define DMA_DUMMY_DESC         0xffffffff
 #define FE_DEFAULT_MSG_ENABLE    \
         (NETIF_MSG_DRV      | \
@@ -56,8 +57,8 @@
 
 #define TX_DMA_DESP2_DEF       (TX_DMA_LS0 | TX_DMA_DONE)
 #define TX_DMA_DESP4_DEF       (TX_DMA_QN(3) | TX_DMA_PN(1))
-#define NEXT_TX_DESP_IDX(X)    (((X) + 1) & (priv->tx_ring_size - 1))
-#define NEXT_RX_DESP_IDX(X)    (((X) + 1) & (priv->rx_ring_size - 1))
+#define NEXT_TX_DESP_IDX(X)    (((X) + 1) & (ring->tx_ring_size - 1))
+#define NEXT_RX_DESP_IDX(X)    (((X) + 1) & (ring->rx_ring_size - 1))
 
 #define SYSC_REG_RSTCTRL       0x34
 
@@ -65,7 +66,7 @@ static int fe_msg_level = -1;
 module_param_named(msg_level, fe_msg_level, int, 0);
 MODULE_PARM_DESC(msg_level, "Message level (-1=defaults,0=none,...,16=all)");
 
-static const u32 fe_reg_table_default[FE_REG_COUNT] = {
+static const u16 fe_reg_table_default[FE_REG_COUNT] = {
        [FE_REG_PDMA_GLO_CFG] = FE_PDMA_GLO_CFG,
        [FE_REG_PDMA_RST_CFG] = FE_PDMA_RST_CFG,
        [FE_REG_DLY_INT_CFG] = FE_DLY_INT_CFG,
@@ -84,7 +85,7 @@ static const u32 fe_reg_table_default[FE_REG_COUNT] = {
        [FE_REG_FE_RST_GL] = FE_FE_RST_GL,
 };
 
-static const u32 *fe_reg_table = fe_reg_table_default;
+static const u16 *fe_reg_table = fe_reg_table_default;
 
 struct fe_work_t {
        int bitnr;
@@ -172,14 +173,21 @@ static int fe_set_mac_address(struct net_device *dev, void *p)
 
 static inline int fe_max_frag_size(int mtu)
 {
+       /* make sure buf_size will be at least MAX_RX_LENGTH */
+       if (mtu + FE_RX_ETH_HLEN < MAX_RX_LENGTH)
+               mtu = MAX_RX_LENGTH - FE_RX_ETH_HLEN;
+
        return SKB_DATA_ALIGN(FE_RX_HLEN + mtu) +
                SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 }
 
 static inline int fe_max_buf_size(int frag_size)
 {
-       return frag_size - NET_SKB_PAD - NET_IP_ALIGN -
-               SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+       int buf_size = frag_size - NET_SKB_PAD - NET_IP_ALIGN -
+                      SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+       BUG_ON(buf_size < MAX_RX_LENGTH);
+       return buf_size;
 }
 
 static inline void fe_get_rxd(struct fe_rx_dma *rxd, struct fe_rx_dma *dma_rxd)
@@ -202,77 +210,80 @@ static inline void fe_set_txd(struct fe_tx_dma *txd, struct fe_tx_dma *dma_txd)
 static void fe_clean_rx(struct fe_priv *priv)
 {
        int i;
+       struct fe_rx_ring *ring = &priv->rx_ring;
 
-       if (priv->rx_data) {
-               for (i = 0; i < priv->rx_ring_size; i++)
-                       if (priv->rx_data[i]) {
-                               if (priv->rx_dma && priv->rx_dma[i].rxd1)
+       if (ring->rx_data) {
+               for (i = 0; i < ring->rx_ring_size; i++)
+                       if (ring->rx_data[i]) {
+                               if (ring->rx_dma && ring->rx_dma[i].rxd1)
                                        dma_unmap_single(&priv->netdev->dev,
-                                                       priv->rx_dma[i].rxd1,
-                                                       priv->rx_buf_size,
+                                                       ring->rx_dma[i].rxd1,
+                                                       ring->rx_buf_size,
                                                        DMA_FROM_DEVICE);
-                               put_page(virt_to_head_page(priv->rx_data[i]));
+                               put_page(virt_to_head_page(ring->rx_data[i]));
                        }
 
-               kfree(priv->rx_data);
-               priv->rx_data = NULL;
+               kfree(ring->rx_data);
+               ring->rx_data = NULL;
        }
 
-       if (priv->rx_dma) {
+       if (ring->rx_dma) {
                dma_free_coherent(&priv->netdev->dev,
-                               priv->rx_ring_size * sizeof(*priv->rx_dma),
-                               priv->rx_dma,
-                               priv->rx_phys);
-               priv->rx_dma = NULL;
+                               ring->rx_ring_size * sizeof(*ring->rx_dma),
+                               ring->rx_dma,
+                               ring->rx_phys);
+               ring->rx_dma = NULL;
        }
 }
 
 static int fe_alloc_rx(struct fe_priv *priv)
 {
        struct net_device *netdev = priv->netdev;
+       struct fe_rx_ring *ring = &priv->rx_ring;
        int i, pad;
 
-       priv->rx_data = kcalloc(priv->rx_ring_size, sizeof(*priv->rx_data),
+       ring->rx_data = kcalloc(ring->rx_ring_size, sizeof(*ring->rx_data),
                        GFP_KERNEL);
-       if (!priv->rx_data)
+       if (!ring->rx_data)
                goto no_rx_mem;
 
-       for (i = 0; i < priv->rx_ring_size; i++) {
-               priv->rx_data[i] = netdev_alloc_frag(priv->frag_size);
-               if (!priv->rx_data[i])
+       for (i = 0; i < ring->rx_ring_size; i++) {
+               ring->rx_data[i] = netdev_alloc_frag(ring->frag_size);
+               if (!ring->rx_data[i])
                        goto no_rx_mem;
        }
 
-       priv->rx_dma = dma_alloc_coherent(&netdev->dev,
-                       priv->rx_ring_size * sizeof(*priv->rx_dma),
-                       &priv->rx_phys,
+       ring->rx_dma = dma_alloc_coherent(&netdev->dev,
+                       ring->rx_ring_size * sizeof(*ring->rx_dma),
+                       &ring->rx_phys,
                        GFP_ATOMIC | __GFP_ZERO);
-       if (!priv->rx_dma)
+       if (!ring->rx_dma)
                goto no_rx_mem;
 
        if (priv->flags & FE_FLAG_RX_2B_OFFSET)
                pad = 0;
        else
                pad = NET_IP_ALIGN;
-       for (i = 0; i < priv->rx_ring_size; i++) {
+       for (i = 0; i < ring->rx_ring_size; i++) {
                dma_addr_t dma_addr = dma_map_single(&netdev->dev,
-                               priv->rx_data[i] + NET_SKB_PAD + pad,
-                               priv->rx_buf_size,
+                               ring->rx_data[i] + NET_SKB_PAD + pad,
+                               ring->rx_buf_size,
                                DMA_FROM_DEVICE);
                if (unlikely(dma_mapping_error(&netdev->dev, dma_addr)))
                        goto no_rx_mem;
-               priv->rx_dma[i].rxd1 = (unsigned int) dma_addr;
+               ring->rx_dma[i].rxd1 = (unsigned int) dma_addr;
 
                if (priv->flags & FE_FLAG_RX_SG_DMA)
-                       priv->rx_dma[i].rxd2 = RX_DMA_PLEN0(priv->rx_buf_size);
+                       ring->rx_dma[i].rxd2 = RX_DMA_PLEN0(ring->rx_buf_size);
                else
-                       priv->rx_dma[i].rxd2 = RX_DMA_LSO;
+                       ring->rx_dma[i].rxd2 = RX_DMA_LSO;
        }
+       ring->rx_calc_idx = ring->rx_ring_size - 1;
        wmb();
 
-       fe_reg_w32(priv->rx_phys, FE_REG_RX_BASE_PTR0);
-       fe_reg_w32(priv->rx_ring_size, FE_REG_RX_MAX_CNT0);
-       fe_reg_w32((priv->rx_ring_size - 1), FE_REG_RX_CALC_IDX0);
+       fe_reg_w32(ring->rx_phys, FE_REG_RX_BASE_PTR0);
+       fe_reg_w32(ring->rx_ring_size, FE_REG_RX_MAX_CNT0);
+       fe_reg_w32(ring->rx_calc_idx, FE_REG_RX_CALC_IDX0);
        fe_reg_w32(FE_PST_DRX_IDX0, FE_REG_PDMA_RST_CFG);
 
        return 0;
@@ -310,51 +321,58 @@ static void fe_txd_unmap(struct device *dev, struct fe_tx_buf *tx_buf)
 static void fe_clean_tx(struct fe_priv *priv)
 {
        int i;
-
-       if (priv->tx_buf) {
-               for (i = 0; i < priv->tx_ring_size; i++)
-                       fe_txd_unmap(&priv->netdev->dev, &priv->tx_buf[i]);
-               kfree(priv->tx_buf);
-               priv->tx_buf = NULL;
+       struct device *dev = &priv->netdev->dev;
+       struct fe_tx_ring *ring = &priv->tx_ring;
+
+       if (ring->tx_buf) {
+               for (i = 0; i < ring->tx_ring_size; i++)
+                       fe_txd_unmap(dev, &ring->tx_buf[i]);
+               kfree(ring->tx_buf);
+               ring->tx_buf = NULL;
        }
 
-       if (priv->tx_dma) {
-               dma_free_coherent(&priv->netdev->dev,
-                               priv->tx_ring_size * sizeof(*priv->tx_dma),
-                               priv->tx_dma,
-                               priv->tx_phys);
-               priv->tx_dma = NULL;
+       if (ring->tx_dma) {
+               dma_free_coherent(dev,
+                               ring->tx_ring_size * sizeof(*ring->tx_dma),
+                               ring->tx_dma,
+                               ring->tx_phys);
+               ring->tx_dma = NULL;
        }
+
+       netdev_reset_queue(priv->netdev);
 }
 
 static int fe_alloc_tx(struct fe_priv *priv)
 {
        int i;
+       struct fe_tx_ring *ring = &priv->tx_ring;
 
-       priv->tx_free_idx = 0;
+       ring->tx_free_idx = 0;
+       ring->tx_next_idx = 0;
+       ring->tx_thresh = max((unsigned long)ring->tx_ring_size >> 2, MAX_SKB_FRAGS);
 
-       priv->tx_buf = kcalloc(priv->tx_ring_size, sizeof(*priv->tx_buf),
+       ring->tx_buf = kcalloc(ring->tx_ring_size, sizeof(*ring->tx_buf),
                        GFP_KERNEL);
-       if (!priv->tx_buf)
+       if (!ring->tx_buf)
                goto no_tx_mem;
 
-       priv->tx_dma = dma_alloc_coherent(&priv->netdev->dev,
-                       priv->tx_ring_size * sizeof(*priv->tx_dma),
-                       &priv->tx_phys,
+       ring->tx_dma = dma_alloc_coherent(&priv->netdev->dev,
+                       ring->tx_ring_size * sizeof(*ring->tx_dma),
+                       &ring->tx_phys,
                        GFP_ATOMIC | __GFP_ZERO);
-       if (!priv->tx_dma)
+       if (!ring->tx_dma)
                goto no_tx_mem;
 
-       for (i = 0; i < priv->tx_ring_size; i++) {
+       for (i = 0; i < ring->tx_ring_size; i++) {
                if (priv->soc->tx_dma) {
-                       priv->soc->tx_dma(&priv->tx_dma[i]);
+                       priv->soc->tx_dma(&ring->tx_dma[i]);
                }
-               priv->tx_dma[i].txd2 = TX_DMA_DESP2_DEF;
+               ring->tx_dma[i].txd2 = TX_DMA_DESP2_DEF;
        }
        wmb();
 
-       fe_reg_w32(priv->tx_phys, FE_REG_TX_BASE_PTR0);
-       fe_reg_w32(priv->tx_ring_size, FE_REG_TX_MAX_CNT0);
+       fe_reg_w32(ring->tx_phys, FE_REG_TX_BASE_PTR0);
+       fe_reg_w32(ring->tx_ring_size, FE_REG_TX_MAX_CNT0);
        fe_reg_w32(0, FE_REG_TX_CTX_IDX0);
        fe_reg_w32(FE_PST_DTX_IDX0, FE_REG_PDMA_RST_CFG);
 
@@ -383,8 +401,6 @@ static void fe_free_dma(struct fe_priv *priv)
 {
        fe_clean_tx(priv);
        fe_clean_rx(priv);
-
-       netdev_reset_queue(priv->netdev);
 }
 
 void fe_stats_update(struct fe_priv *priv)
@@ -514,7 +530,7 @@ static int fe_vlan_rx_kill_vid(struct net_device *dev,
        u32 idx = (vid & 0xf);
 
        if (!((fe_reg_table[FE_REG_FE_DMA_VID_BASE]) &&
-                               (dev->features | NETIF_F_HW_VLAN_CTAG_TX)))
+                               (dev->features & NETIF_F_HW_VLAN_CTAG_TX)))
                return 0;
 
        clear_bit(idx, &priv->vlan_map);
@@ -522,8 +538,16 @@ static int fe_vlan_rx_kill_vid(struct net_device *dev,
        return 0;
 }
 
+static inline u32 fe_empty_txd(struct fe_tx_ring *ring)
+{
+       barrier();
+       return (u32)(ring->tx_ring_size -
+                       ((ring->tx_next_idx - ring->tx_free_idx) &
+                        (ring->tx_ring_size - 1)));
+}
+
 static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
-               int idx, int tx_num)
+               int tx_num, struct fe_tx_ring *ring)
 {
        struct fe_priv *priv = netdev_priv(dev);
        struct skb_frag_struct *frag;
@@ -534,7 +558,7 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
        u32 def_txd4;
        int i, j, k, frag_size, frag_map_size, offset;
 
-       tx_buf = &priv->tx_buf[idx];
+       tx_buf = &ring->tx_buf[ring->tx_next_idx];
        memset(tx_buf, 0, sizeof(*tx_buf));
        memset(&txd, 0, sizeof(txd));
        nr_frags = skb_shinfo(skb)->nr_frags;
@@ -551,13 +575,15 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
                txd.txd4 |= TX_DMA_CHKSUM;
 
        /* VLAN header offload */
-       if (vlan_tx_tag_present(skb)) {
+       if (skb_vlan_tag_present(skb)) {
+               u16 tag = skb_vlan_tag_get(skb);
+
                if (IS_ENABLED(CONFIG_SOC_MT7621))
-                       txd.txd4 |= TX_DMA_INS_VLAN_MT7621 | vlan_tx_tag_get(skb);
+                       txd.txd4 |= TX_DMA_INS_VLAN_MT7621 | tag;
                else
                        txd.txd4 |= TX_DMA_INS_VLAN |
-                               ((vlan_tx_tag_get(skb) >> VLAN_PRIO_SHIFT) << 4) |
-                               (vlan_tx_tag_get(skb) & 0xF);
+                               ((tag >> VLAN_PRIO_SHIFT) << 4) |
+                               (tag & 0xF);
        }
 
        /* TSO: fill MSS info in tcp checksum field */
@@ -586,7 +612,7 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
        dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb));
 
        /* TX SG offload */
-       j = idx;
+       j = ring->tx_next_idx;
        k = 0;
        for (i = 0; i < nr_frags; i++) {
                offset = 0;
@@ -606,7 +632,7 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
                                txd.txd2 = TX_DMA_PLEN0(frag_map_size);
                                txd.txd4 = def_txd4;
 
-                               tx_buf = &priv->tx_buf[j];
+                               tx_buf = &ring->tx_buf[j];
                                memset(tx_buf, 0, sizeof(*tx_buf));
 
                                tx_buf->flags |= FE_TX_FLAGS_PAGE0;
@@ -623,7 +649,7 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
 
                                if (!((i == (nr_frags -1)) &&
                                                        (frag_map_size == frag_size))) {
-                                       fe_set_txd(&txd, &priv->tx_dma[j]);
+                                       fe_set_txd(&txd, &ring->tx_dma[j]);
                                        memset(&txd, 0, sizeof(txd));
                                }
                        }
@@ -638,7 +664,7 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
                txd.txd2 |= TX_DMA_LS1;
        else
                txd.txd2 |= TX_DMA_LS0;
-       fe_set_txd(&txd, &priv->tx_dma[j]);
+       fe_set_txd(&txd, &ring->tx_dma[j]);
 
        /* store skb to cleanup */
        tx_buf->skb = skb;
@@ -646,17 +672,25 @@ static int fe_tx_map_dma(struct sk_buff *skb, struct net_device *dev,
        netdev_sent_queue(dev, skb->len);
        skb_tx_timestamp(skb);
 
-       j = NEXT_TX_DESP_IDX(j);
+       ring->tx_next_idx = NEXT_TX_DESP_IDX(j);
        wmb();
-       fe_reg_w32(j, FE_REG_TX_CTX_IDX0);
+       if (unlikely(fe_empty_txd(ring) <= ring->tx_thresh)) {
+               netif_stop_queue(dev);
+               smp_mb();
+               if (unlikely(fe_empty_txd(ring) > ring->tx_thresh))
+                       netif_wake_queue(dev);
+       }
+
+       if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) || !skb->xmit_more)
+               fe_reg_w32(ring->tx_next_idx, FE_REG_TX_CTX_IDX0);
 
        return 0;
 
 err_dma:
-       j = idx;
+       j = ring->tx_next_idx;
        for (i = 0; i < tx_num; i++) {
-               ptxd = &priv->tx_dma[j];
-               tx_buf = &priv->tx_buf[j];
+               ptxd = &ring->tx_dma[j];
+               tx_buf = &ring->tx_buf[j];
 
                /* unmap dma */
                fe_txd_unmap(&dev->dev, tx_buf);
@@ -679,8 +713,7 @@ static inline int fe_skb_padto(struct sk_buff *skb, struct fe_priv *priv) {
                if ((priv->flags & FE_FLAG_PADDING_64B) &&
                                !(priv->flags & FE_FLAG_PADDING_BUG))
                        return ret;
-
-               if (vlan_tx_tag_present(skb))
+               if (skb_vlan_tag_present(skb))
                        len = ETH_ZLEN;
                else if (skb->protocol == cpu_to_be16(ETH_P_8021Q))
                        len = VLAN_ETH_ZLEN;
@@ -700,12 +733,6 @@ static inline int fe_skb_padto(struct sk_buff *skb, struct fe_priv *priv) {
        return ret;
 }
 
-static inline u32 fe_empty_txd(struct fe_priv *priv, u32 tx_fill_idx)
-{
-       return (u32)(priv->tx_ring_size - ((tx_fill_idx - priv->tx_free_idx) &
-                               (priv->tx_ring_size - 1)));
-}
-
 static inline int fe_cal_txd_req(struct sk_buff *skb)
 {
        int i, nfrags;
@@ -727,8 +754,8 @@ static inline int fe_cal_txd_req(struct sk_buff *skb)
 static int fe_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct fe_priv *priv = netdev_priv(dev);
+       struct fe_tx_ring *ring = &priv->tx_ring;
        struct net_device_stats *stats = &dev->stats;
-       u32 tx;
        int tx_num;
        int len = skb->len;
 
@@ -738,8 +765,7 @@ static int fe_start_xmit(struct sk_buff *skb, struct net_device *dev)
        }
 
        tx_num = fe_cal_txd_req(skb);
-       tx = fe_reg_r32(FE_REG_TX_CTX_IDX0);
-       if (unlikely(fe_empty_txd(priv, tx) <= tx_num))
+       if (unlikely(fe_empty_txd(ring) <= tx_num))
        {
                netif_stop_queue(dev);
                netif_err(priv, tx_queued,dev,
@@ -747,7 +773,7 @@ static int fe_start_xmit(struct sk_buff *skb, struct net_device *dev)
                return NETDEV_TX_BUSY;
        }
 
-       if (fe_tx_map_dma(skb, dev, tx, tx_num) < 0) {
+       if (fe_tx_map_dma(skb, dev, tx_num, ring) < 0) {
                stats->tx_dropped++;
        } else {
                stats->tx_packets++;
@@ -777,8 +803,9 @@ static int fe_poll_rx(struct napi_struct *napi, int budget,
        struct net_device *netdev = priv->netdev;
        struct net_device_stats *stats = &netdev->stats;
        struct fe_soc_data *soc = priv->soc;
+       struct fe_rx_ring *ring = &priv->rx_ring;
+       int idx = ring->rx_calc_idx;
        u32 checksum_bit;
-       int idx = fe_reg_r32(FE_REG_RX_CALC_IDX0);
        struct sk_buff *skb;
        u8 *data, *new_data;
        struct fe_rx_dma *rxd, trxd;
@@ -799,22 +826,22 @@ static int fe_poll_rx(struct napi_struct *napi, int budget,
                unsigned int pktlen;
                dma_addr_t dma_addr;
                idx = NEXT_RX_DESP_IDX(idx);
-               rxd = &priv->rx_dma[idx];
-               data = priv->rx_data[idx];
+               rxd = &ring->rx_dma[idx];
+               data = ring->rx_data[idx];
 
                fe_get_rxd(&trxd, rxd);
                if (!(trxd.rxd2 & RX_DMA_DONE))
                        break;
 
                /* alloc new buffer */
-               new_data = netdev_alloc_frag(priv->frag_size);
+               new_data = netdev_alloc_frag(ring->frag_size);
                if (unlikely(!new_data)) {
                        stats->rx_dropped++;
                        goto release_desc;
                }
                dma_addr = dma_map_single(&netdev->dev,
                                new_data + NET_SKB_PAD + pad,
-                               priv->rx_buf_size,
+                               ring->rx_buf_size,
                                DMA_FROM_DEVICE);
                if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) {
                        put_page(virt_to_head_page(new_data));
@@ -822,7 +849,7 @@ static int fe_poll_rx(struct napi_struct *napi, int budget,
                }
 
                /* receive data */
-               skb = build_skb(data, priv->frag_size);
+               skb = build_skb(data, ring->frag_size);
                if (unlikely(!skb)) {
                        put_page(virt_to_head_page(new_data));
                        goto release_desc;
@@ -830,8 +857,8 @@ static int fe_poll_rx(struct napi_struct *napi, int budget,
                skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
 
                dma_unmap_single(&netdev->dev, trxd.rxd1,
-                               priv->rx_buf_size, DMA_FROM_DEVICE);
-               pktlen = RX_DMA_PLEN0(trxd.rxd2);
+                               ring->rx_buf_size, DMA_FROM_DEVICE);
+               pktlen = RX_DMA_GET_PLEN0(trxd.rxd2);
                skb->dev = netdev;
                skb_put(skb, pktlen);
                if (trxd.rxd4 & checksum_bit) {
@@ -848,17 +875,18 @@ static int fe_poll_rx(struct napi_struct *napi, int budget,
 
                napi_gro_receive(napi, skb);
 
-               priv->rx_data[idx] = new_data;
+               ring->rx_data[idx] = new_data;
                rxd->rxd1 = (unsigned int) dma_addr;
 
 release_desc:
                if (priv->flags & FE_FLAG_RX_SG_DMA)
-                       rxd->rxd2 = RX_DMA_PLEN0(priv->rx_buf_size);
+                       rxd->rxd2 = RX_DMA_PLEN0(ring->rx_buf_size);
                else
                        rxd->rxd2 = RX_DMA_LSO;
 
+               ring->rx_calc_idx = idx;
                wmb();
-               fe_reg_w32(idx, FE_REG_RX_CALC_IDX0);
+               fe_reg_w32(ring->rx_calc_idx, FE_REG_RX_CALC_IDX0);
                done++;
        }
 
@@ -868,7 +896,8 @@ release_desc:
        return done;
 }
 
-static int fe_poll_tx(struct fe_priv *priv, int budget, u32 tx_intr)
+static int fe_poll_tx(struct fe_priv *priv, int budget, u32 tx_intr,
+               int *tx_again)
 {
        struct net_device *netdev = priv->netdev;
        struct device *dev = &netdev->dev;
@@ -877,13 +906,13 @@ static int fe_poll_tx(struct fe_priv *priv, int budget, u32 tx_intr)
        struct fe_tx_buf *tx_buf;
        int done = 0;
        u32 idx, hwidx;
+       struct fe_tx_ring *ring = &priv->tx_ring;
 
+       idx = ring->tx_free_idx;
        hwidx = fe_reg_r32(FE_REG_TX_DTX_IDX0);
-       idx = priv->tx_free_idx;
 
-txpoll_again:
        while ((idx != hwidx) && budget) {
-               tx_buf = &priv->tx_buf[idx];
+               tx_buf = &ring->tx_buf[idx];
                skb = tx_buf->skb;
 
                if (!skb)
@@ -897,22 +926,24 @@ txpoll_again:
                fe_txd_unmap(dev, tx_buf);
                idx = NEXT_TX_DESP_IDX(idx);
        }
-       priv->tx_free_idx = idx;
+       ring->tx_free_idx = idx;
 
-       if (budget) {
-               fe_reg_w32(tx_intr, FE_REG_FE_INT_STATUS);
+       if (idx == hwidx) {
+               /* read hw index again make sure no new tx packet */
                hwidx = fe_reg_r32(FE_REG_TX_DTX_IDX0);
-               if (idx != hwidx)
-                       goto txpoll_again;
-       }
-
-       if (!done)
-               return 0;
-
-       netdev_completed_queue(netdev, done, bytes_compl);
-       if (unlikely(netif_queue_stopped(netdev) &&
-                               netif_carrier_ok(netdev))) {
-               netif_wake_queue(netdev);
+               if (idx == hwidx)
+                       fe_reg_w32(tx_intr, FE_REG_FE_INT_STATUS);
+               else
+                       *tx_again = 1;
+       } else
+               *tx_again = 1;
+
+       if (done) {
+               netdev_completed_queue(netdev, done, bytes_compl);
+               smp_mb();
+               if (unlikely(netif_queue_stopped(netdev) &&
+                                       (fe_empty_txd(ring) > ring->tx_thresh)))
+                       netif_wake_queue(netdev);
        }
 
        return done;
@@ -922,7 +953,7 @@ static int fe_poll(struct napi_struct *napi, int budget)
 {
        struct fe_priv *priv = container_of(napi, struct fe_priv, rx_napi);
        struct fe_hw_stats *hwstat = priv->hw_stats;
-       int tx_done, rx_done;
+       int tx_done, rx_done, tx_again;
        u32 status, fe_status, status_reg, mask;
        u32 tx_intr, rx_intr, status_intr;
 
@@ -930,7 +961,7 @@ static int fe_poll(struct napi_struct *napi, int budget)
        tx_intr = priv->soc->tx_int;
        rx_intr = priv->soc->rx_int;
        status_intr = priv->soc->status_int;
-       tx_done = rx_done = 0;
+       tx_done = rx_done = tx_again = 0;
 
        if (fe_reg_table[FE_REG_FE_INT_STATUS2]) {
                fe_status = fe_reg_r32(FE_REG_FE_INT_STATUS2);
@@ -939,7 +970,7 @@ static int fe_poll(struct napi_struct *napi, int budget)
                status_reg = FE_REG_FE_INT_STATUS;
 
        if (status & tx_intr)
-               tx_done = fe_poll_tx(priv, budget, tx_intr);
+               tx_done = fe_poll_tx(priv, budget, tx_intr, &tx_again);
 
        if (status & rx_intr)
                rx_done = fe_poll_rx(napi, budget, priv, rx_intr);
@@ -959,13 +990,18 @@ static int fe_poll(struct napi_struct *napi, int budget)
                                tx_done, rx_done, status, mask);
        }
 
-       if ((tx_done < budget) && (rx_done < budget)) {
+       if (!tx_again && (rx_done < budget)) {
                status = fe_reg_r32(FE_REG_FE_INT_STATUS);
-               if (status & (tx_intr | rx_intr ))
+               if (status & (tx_intr | rx_intr)) {
+                       /* let napi poll again */
+                       rx_done = budget;
                        goto poll_again;
+               }
 
                napi_complete(napi);
                fe_int_enable(tx_intr | rx_intr);
+       } else {
+                       rx_done = budget;
        }
 
 poll_again:
@@ -975,6 +1011,7 @@ poll_again:
 static void fe_tx_timeout(struct net_device *dev)
 {
        struct fe_priv *priv = netdev_priv(dev);
+       struct fe_tx_ring *ring = &priv->tx_ring;
 
        priv->netdev->stats.tx_errors++;
        netif_err(priv, tx_err, dev,
@@ -982,12 +1019,13 @@ static void fe_tx_timeout(struct net_device *dev)
        netif_info(priv, drv, dev, "dma_cfg:%08x\n",
                        fe_reg_r32(FE_REG_PDMA_GLO_CFG));
        netif_info(priv, drv, dev, "tx_ring=%d, " \
-                       "base=%08x, max=%u, ctx=%u, dtx=%u, fdx=%d\n", 0,
+                       "base=%08x, max=%u, ctx=%u, dtx=%u, fdx=%hu, next=%hu\n", 0,
                        fe_reg_r32(FE_REG_TX_BASE_PTR0),
                        fe_reg_r32(FE_REG_TX_MAX_CNT0),
                        fe_reg_r32(FE_REG_TX_CTX_IDX0),
                        fe_reg_r32(FE_REG_TX_DTX_IDX0),
-                       priv->tx_free_idx
+                       ring->tx_free_idx,
+                       ring->tx_next_idx
                  );
        netif_info(priv, drv, dev, "rx_ring=%d, " \
                        "base=%08x, max=%u, calc=%u, drx=%u\n", 0,
@@ -1153,7 +1191,6 @@ static int fe_open(struct net_device *dev)
                goto err_out;
 
        spin_lock_irqsave(&priv->page_lock, flags);
-       napi_enable(&priv->rx_napi);
 
        val = FE_TX_WB_DDONE | FE_RX_DMA_EN | FE_TX_DMA_EN;
        if (priv->flags & FE_FLAG_RX_2B_OFFSET)
@@ -1169,8 +1206,9 @@ static int fe_open(struct net_device *dev)
        if (priv->soc->has_carrier && priv->soc->has_carrier(priv))
                netif_carrier_on(dev);
 
-       netif_start_queue(dev);
+       napi_enable(&priv->rx_napi);
        fe_int_enable(priv->soc->tx_int | priv->soc->rx_int);
+       netif_start_queue(dev);
 
        return 0;
 
@@ -1185,15 +1223,14 @@ static int fe_stop(struct net_device *dev)
        unsigned long flags;
        int i;
 
-       fe_int_disable(priv->soc->tx_int | priv->soc->rx_int);
-
        netif_tx_disable(dev);
+       fe_int_disable(priv->soc->tx_int | priv->soc->rx_int);
+       napi_disable(&priv->rx_napi);
 
        if (priv->phy)
                priv->phy->stop(priv);
 
        spin_lock_irqsave(&priv->page_lock, flags);
-       napi_disable(&priv->rx_napi);
 
        fe_reg_w32(fe_reg_r32(FE_REG_PDMA_GLO_CFG) &
                     ~(FE_TX_WB_DDONE | FE_RX_DMA_EN | FE_TX_DMA_EN),
@@ -1324,10 +1361,10 @@ static int fe_change_mtu(struct net_device *dev, int new_mtu)
                return 0;
 
        if (new_mtu <= ETH_DATA_LEN)
-               priv->frag_size = fe_max_frag_size(ETH_DATA_LEN);
+               priv->rx_ring.frag_size = fe_max_frag_size(ETH_DATA_LEN);
        else
-               priv->frag_size = PAGE_SIZE;
-       priv->rx_buf_size = fe_max_buf_size(priv->frag_size);
+               priv->rx_ring.frag_size = PAGE_SIZE;
+       priv->rx_ring.rx_buf_size = fe_max_buf_size(priv->rx_ring.frag_size);
 
        if (!netif_running(dev))
                return 0;
@@ -1479,16 +1516,16 @@ static int fe_probe(struct platform_device *pdev)
        priv->device = &pdev->dev;
        priv->soc = soc;
        priv->msg_enable = netif_msg_init(fe_msg_level, FE_DEFAULT_MSG_ENABLE);
-       priv->frag_size = fe_max_frag_size(ETH_DATA_LEN);
-       priv->rx_buf_size = fe_max_buf_size(priv->frag_size);
-       priv->tx_ring_size = priv->rx_ring_size = NUM_DMA_DESC;
+       priv->rx_ring.frag_size = fe_max_frag_size(ETH_DATA_LEN);
+       priv->rx_ring.rx_buf_size = fe_max_buf_size(priv->rx_ring.frag_size);
+       priv->tx_ring.tx_ring_size = priv->rx_ring.rx_ring_size = NUM_DMA_DESC;
        INIT_WORK(&priv->pending_work, fe_pending_work);
 
        napi_weight = 32;
        if (priv->flags & FE_FLAG_NAPI_WEIGHT) {
-               napi_weight *= 2;
-               priv->tx_ring_size *= 2;
-               priv->rx_ring_size *= 2;
+               napi_weight *= 4;
+               priv->tx_ring.tx_ring_size *= 4;
+               priv->rx_ring.rx_ring_size *= 4;
        }
        netif_napi_add(netdev, &priv->rx_napi, fe_poll, napi_weight);
        fe_set_ethtool_ops(netdev);