kernel: add bgmac fixes for various issues
authorFelix Fietkau <nbd@openwrt.org>
Sun, 12 Apr 2015 10:35:21 +0000 (10:35 +0000)
committerFelix Fietkau <nbd@openwrt.org>
Sun, 12 Apr 2015 10:35:21 +0000 (10:35 +0000)
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
SVN-Revision: 45388

target/linux/generic/patches-3.18/077-04-bgmac-simplify-tx-ring-index-handling.patch [new file with mode: 0644]
target/linux/generic/patches-3.18/077-05-bgmac-leave-interrupts-disabled-as-long-as-there-is-.patch [new file with mode: 0644]
target/linux/generic/patches-3.18/077-06-bgmac-set-received-skb-headroom-to-NET_SKB_PAD.patch [new file with mode: 0644]
target/linux/generic/patches-3.18/077-07-bgmac-fix-DMA-rx-corruption.patch [new file with mode: 0644]

diff --git a/target/linux/generic/patches-3.18/077-04-bgmac-simplify-tx-ring-index-handling.patch b/target/linux/generic/patches-3.18/077-04-bgmac-simplify-tx-ring-index-handling.patch
new file mode 100644 (file)
index 0000000..cf62a50
--- /dev/null
@@ -0,0 +1,123 @@
+From: Felix Fietkau <nbd@openwrt.org>
+Date: Sun, 12 Apr 2015 09:58:56 +0200
+Subject: [PATCH] bgmac: simplify tx ring index handling
+
+Keep incrementing ring->start and ring->end instead of pointing it to
+the actual ring slot entry. This simplifies the calculation of the
+number of free slots.
+
+Signed-off-by: Felix Fietkau <nbd@openwrt.org>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -142,11 +142,10 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+ {
+       struct device *dma_dev = bgmac->core->dma_dev;
+       struct net_device *net_dev = bgmac->net_dev;
+-      struct bgmac_slot_info *slot = &ring->slots[ring->end];
+-      int free_slots;
++      int index = ring->end % BGMAC_TX_RING_SLOTS;
++      struct bgmac_slot_info *slot = &ring->slots[index];
+       int nr_frags;
+       u32 flags;
+-      int index = ring->end;
+       int i;
+       if (skb->len > BGMAC_DESC_CTL1_LEN) {
+@@ -158,13 +157,7 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+               skb_checksum_help(skb);
+       nr_frags = skb_shinfo(skb)->nr_frags;
+-
+-      if (ring->start <= ring->end)
+-              free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
+-      else
+-              free_slots = ring->start - ring->end;
+-
+-      if (free_slots <= nr_frags + 1) {
++      if (ring->end - ring->start + nr_frags + 1 >= BGMAC_TX_RING_SLOTS) {
+               bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
+               netif_stop_queue(net_dev);
+               return NETDEV_TX_BUSY;
+@@ -200,7 +193,7 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+       }
+       slot->skb = skb;
+-
++      ring->end += nr_frags + 1;
+       netdev_sent_queue(net_dev, skb->len);
+       wmb();
+@@ -208,13 +201,12 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+       /* Increase ring->end to point empty slot. We tell hardware the first
+        * slot it should *not* read.
+        */
+-      ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
+       bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
+                   ring->index_base +
+-                  ring->end * sizeof(struct bgmac_dma_desc));
++                  (ring->end % BGMAC_TX_RING_SLOTS) *
++                  sizeof(struct bgmac_dma_desc));
+-      free_slots -= nr_frags + 1;
+-      if (free_slots < 8)
++      if (ring->end - ring->start >= BGMAC_TX_RING_SLOTS - 8)
+               netif_stop_queue(net_dev);
+       return NETDEV_TX_OK;
+@@ -256,17 +248,17 @@ static void bgmac_dma_tx_free(struct bgm
+       empty_slot &= BGMAC_DMA_TX_STATDPTR;
+       empty_slot /= sizeof(struct bgmac_dma_desc);
+-      while (ring->start != empty_slot) {
+-              struct bgmac_slot_info *slot = &ring->slots[ring->start];
+-              u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
+-              int len = ctl1 & BGMAC_DESC_CTL1_LEN;
++      while (ring->start != ring->end) {
++              int slot_idx = ring->start % BGMAC_TX_RING_SLOTS;
++              struct bgmac_slot_info *slot = &ring->slots[slot_idx];
++              u32 ctl1;
++              int len;
+-              if (!slot->dma_addr) {
+-                      bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
+-                                ring->start, ring->end);
+-                      goto next;
+-              }
++              if (slot_idx == empty_slot)
++                      break;
++              ctl1 = le32_to_cpu(ring->cpu_base[slot_idx].ctl1);
++              len = ctl1 & BGMAC_DESC_CTL1_LEN;
+               if (ctl1 & BGMAC_DESC_CTL0_SOF)
+                       /* Unmap no longer used buffer */
+                       dma_unmap_single(dma_dev, slot->dma_addr, len,
+@@ -284,10 +276,8 @@ static void bgmac_dma_tx_free(struct bgm
+                       slot->skb = NULL;
+               }
+-next:
+               slot->dma_addr = 0;
+-              if (++ring->start >= BGMAC_TX_RING_SLOTS)
+-                      ring->start = 0;
++              ring->start++;
+               freed = true;
+       }
+--- a/drivers/net/ethernet/broadcom/bgmac.h
++++ b/drivers/net/ethernet/broadcom/bgmac.h
+@@ -414,10 +414,10 @@ enum bgmac_dma_ring_type {
+  * empty.
+  */
+ struct bgmac_dma_ring {
+-      u16 num_slots;
+-      u16 start;
+-      u16 end;
++      u32 start;
++      u32 end;
++      u16 num_slots;
+       u16 mmio_base;
+       struct bgmac_dma_desc *cpu_base;
+       dma_addr_t dma_base;
diff --git a/target/linux/generic/patches-3.18/077-05-bgmac-leave-interrupts-disabled-as-long-as-there-is-.patch b/target/linux/generic/patches-3.18/077-05-bgmac-leave-interrupts-disabled-as-long-as-there-is-.patch
new file mode 100644 (file)
index 0000000..7974654
--- /dev/null
@@ -0,0 +1,87 @@
+From: Felix Fietkau <nbd@openwrt.org>
+Date: Sun, 12 Apr 2015 10:08:04 +0200
+Subject: [PATCH] bgmac: leave interrupts disabled as long as there is work
+ to do
+
+Always poll rx and tx during NAPI poll instead of relying on the status
+of the first interrupt. This prevents bgmac_poll from leaving unfinished
+work around until the next IRQ.
+In my tests this makes bridging/routing throughput under heavy load more
+stable and ensures that no new IRQs arrive as long as bgmac_poll uses up
+the entire budget.
+
+Signed-off-by: Felix Fietkau <nbd@openwrt.org>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -1105,8 +1105,6 @@ static void bgmac_chip_reset(struct bgma
+       bgmac_phy_init(bgmac);
+       netdev_reset_queue(bgmac->net_dev);
+-
+-      bgmac->int_status = 0;
+ }
+ static void bgmac_chip_intrs_on(struct bgmac *bgmac)
+@@ -1221,14 +1219,13 @@ static irqreturn_t bgmac_interrupt(int i
+       if (!int_status)
+               return IRQ_NONE;
+-      /* Ack */
+-      bgmac_write(bgmac, BGMAC_INT_STATUS, int_status);
++      int_status &= ~(BGMAC_IS_TX0 | BGMAC_IS_RX);
++      if (int_status)
++              bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", int_status);
+       /* Disable new interrupts until handling existing ones */
+       bgmac_chip_intrs_off(bgmac);
+-      bgmac->int_status = int_status;
+-
+       napi_schedule(&bgmac->napi);
+       return IRQ_HANDLED;
+@@ -1237,25 +1234,17 @@ static irqreturn_t bgmac_interrupt(int i
+ static int bgmac_poll(struct napi_struct *napi, int weight)
+ {
+       struct bgmac *bgmac = container_of(napi, struct bgmac, napi);
+-      struct bgmac_dma_ring *ring;
+       int handled = 0;
+-      if (bgmac->int_status & BGMAC_IS_TX0) {
+-              ring = &bgmac->tx_ring[0];
+-              bgmac_dma_tx_free(bgmac, ring);
+-              bgmac->int_status &= ~BGMAC_IS_TX0;
+-      }
++      /* Ack */
++      bgmac_write(bgmac, BGMAC_INT_STATUS, ~0);
+-      if (bgmac->int_status & BGMAC_IS_RX) {
+-              ring = &bgmac->rx_ring[0];
+-              handled += bgmac_dma_rx_read(bgmac, ring, weight);
+-              bgmac->int_status &= ~BGMAC_IS_RX;
+-      }
++      bgmac_dma_tx_free(bgmac, &bgmac->tx_ring[0]);
++      handled += bgmac_dma_rx_read(bgmac, &bgmac->rx_ring[0], weight);
+-      if (bgmac->int_status) {
+-              bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", bgmac->int_status);
+-              bgmac->int_status = 0;
+-      }
++      /* poll again if more events arrived in the mean time */
++      if (bgmac_read(bgmac, BGMAC_INT_STATUS) & (BGMAC_IS_TX0 | BGMAC_IS_RX))
++              return handled;
+       if (handled < weight) {
+               napi_complete(napi);
+--- a/drivers/net/ethernet/broadcom/bgmac.h
++++ b/drivers/net/ethernet/broadcom/bgmac.h
+@@ -452,7 +452,6 @@ struct bgmac {
+       /* Int */
+       u32 int_mask;
+-      u32 int_status;
+       /* Current MAC state */
+       int mac_speed;
diff --git a/target/linux/generic/patches-3.18/077-06-bgmac-set-received-skb-headroom-to-NET_SKB_PAD.patch b/target/linux/generic/patches-3.18/077-06-bgmac-set-received-skb-headroom-to-NET_SKB_PAD.patch
new file mode 100644 (file)
index 0000000..e7fde16
--- /dev/null
@@ -0,0 +1,66 @@
+From: Felix Fietkau <nbd@openwrt.org>
+Date: Sun, 12 Apr 2015 10:13:28 +0200
+Subject: [PATCH] bgmac: set received skb headroom to NET_SKB_PAD
+
+A packet buffer offset of 30 bytes is inefficient, because the first 2
+bytes end up in a different cacheline.
+
+Signed-off-by: Felix Fietkau <nbd@openwrt.org>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -342,13 +342,13 @@ static int bgmac_dma_rx_skb_for_slot(str
+               return -ENOMEM;
+       /* Poison - if everything goes fine, hardware will overwrite it */
+-      rx = buf;
++      rx = buf + BGMAC_RX_BUF_OFFSET;
+       rx->len = cpu_to_le16(0xdead);
+       rx->flags = cpu_to_le16(0xbeef);
+       /* Map skb for the DMA */
+-      dma_addr = dma_map_single(dma_dev, buf, BGMAC_RX_BUF_SIZE,
+-                                DMA_FROM_DEVICE);
++      dma_addr = dma_map_single(dma_dev, buf + BGMAC_RX_BUF_OFFSET,
++                                BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+       if (dma_mapping_error(dma_dev, dma_addr)) {
+               bgmac_err(bgmac, "DMA mapping error\n");
+               put_page(virt_to_head_page(buf));
+@@ -399,7 +399,7 @@ static int bgmac_dma_rx_read(struct bgma
+       while (ring->start != ring->end) {
+               struct device *dma_dev = bgmac->core->dma_dev;
+               struct bgmac_slot_info *slot = &ring->slots[ring->start];
+-              struct bgmac_rx_header *rx = slot->buf;
++              struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET;
+               struct sk_buff *skb;
+               void *buf = slot->buf;
+               u16 len, flags;
+@@ -450,8 +450,10 @@ static int bgmac_dma_rx_read(struct bgma
+                                        BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
+                       skb = build_skb(buf, BGMAC_RX_ALLOC_SIZE);
+-                      skb_put(skb, BGMAC_RX_FRAME_OFFSET + len);
+-                      skb_pull(skb, BGMAC_RX_FRAME_OFFSET);
++                      skb_put(skb, BGMAC_RX_FRAME_OFFSET +
++                              BGMAC_RX_BUF_OFFSET + len);
++                      skb_pull(skb, BGMAC_RX_FRAME_OFFSET +
++                               BGMAC_RX_BUF_OFFSET);
+                       skb_checksum_none_assert(skb);
+                       skb->protocol = eth_type_trans(skb, bgmac->net_dev);
+--- a/drivers/net/ethernet/broadcom/bgmac.h
++++ b/drivers/net/ethernet/broadcom/bgmac.h
+@@ -360,9 +360,11 @@
+ #define BGMAC_RX_HEADER_LEN                   28              /* Last 24 bytes are unused. Well... */
+ #define BGMAC_RX_FRAME_OFFSET                 30              /* There are 2 unused bytes between header and real data */
++#define BGMAC_RX_BUF_OFFSET                   (NET_SKB_PAD + NET_IP_ALIGN - \
++                                               BGMAC_RX_FRAME_OFFSET)
+ #define BGMAC_RX_MAX_FRAME_SIZE                       1536            /* Copied from b44/tg3 */
+ #define BGMAC_RX_BUF_SIZE                     (BGMAC_RX_FRAME_OFFSET + BGMAC_RX_MAX_FRAME_SIZE)
+-#define BGMAC_RX_ALLOC_SIZE                   (SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE) + \
++#define BGMAC_RX_ALLOC_SIZE                   (SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE + BGMAC_RX_BUF_OFFSET) + \
+                                                SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+ #define BGMAC_BFL_ENETROBO                    0x0010          /* has ephy roboswitch spi */
diff --git a/target/linux/generic/patches-3.18/077-07-bgmac-fix-DMA-rx-corruption.patch b/target/linux/generic/patches-3.18/077-07-bgmac-fix-DMA-rx-corruption.patch
new file mode 100644 (file)
index 0000000..55e8033
--- /dev/null
@@ -0,0 +1,54 @@
+From: Felix Fietkau <nbd@openwrt.org>
+Date: Sun, 12 Apr 2015 11:59:47 +0200
+Subject: [PATCH] bgmac: fix DMA rx corruption
+
+The driver needs to inform the hardware about the first invalid (not yet
+filled) rx slot, by writing its DMA descriptor pointer offset to the
+BGMAC_DMA_RX_INDEX register.
+
+This register was set to a value exceeding the rx ring size, effectively
+allowing the hardware constant access to the full ring, regardless of
+which slots are initialized.
+
+Fix this by updating the register in bgmac_dma_rx_setup_desc.
+
+Signed-off-by: Felix Fietkau <nbd@openwrt.org>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -380,6 +380,12 @@ static void bgmac_dma_rx_setup_desc(stru
+       dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[desc_idx].dma_addr));
+       dma_desc->ctl0 = cpu_to_le32(ctl0);
+       dma_desc->ctl1 = cpu_to_le32(ctl1);
++
++      desc_idx = (desc_idx + 1) % BGMAC_RX_RING_SLOTS;
++
++      bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
++                      ring->index_base +
++                      desc_idx * sizeof(struct bgmac_dma_desc));
+ }
+ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
+@@ -394,9 +400,7 @@ static int bgmac_dma_rx_read(struct bgma
+       end_slot &= BGMAC_DMA_RX_STATDPTR;
+       end_slot /= sizeof(struct bgmac_dma_desc);
+-      ring->end = end_slot;
+-
+-      while (ring->start != ring->end) {
++      while (ring->start != end_slot) {
+               struct device *dma_dev = bgmac->core->dma_dev;
+               struct bgmac_slot_info *slot = &ring->slots[ring->start];
+               struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET;
+@@ -693,10 +697,6 @@ static void bgmac_dma_init(struct bgmac 
+               for (j = 0; j < ring->num_slots; j++)
+                       bgmac_dma_rx_setup_desc(bgmac, ring, j);
+-              bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
+-                          ring->index_base +
+-                          ring->num_slots * sizeof(struct bgmac_dma_desc));
+-
+               ring->start = 0;
+               ring->end = 0;
+       }