kernel: split patches folder up into backport, pending and hack folders
[openwrt/staging/mkresin.git] / target / linux / generic / pending-3.18 / 077-03-bgmac-implement-scatter-gather-support.patch
diff --git a/target/linux/generic/pending-3.18/077-03-bgmac-implement-scatter-gather-support.patch b/target/linux/generic/pending-3.18/077-03-bgmac-implement-scatter-gather-support.patch
new file mode 100644 (file)
index 0000000..ceb25e8
--- /dev/null
@@ -0,0 +1,267 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Mon, 23 Mar 2015 02:42:26 +0100
+Subject: [PATCH] bgmac: implement scatter/gather support
+
+Always use software checksumming, since the hardware does not have any
+checksum offload support.
+This significantly improves local TCP tx performance.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct b
+       bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
+ }
++static void
++bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
++                   int i, int len, u32 ctl0)
++{
++      struct bgmac_slot_info *slot;
++      struct bgmac_dma_desc *dma_desc;
++      u32 ctl1;
++
++      if (i == ring->num_slots - 1)
++              ctl0 |= BGMAC_DESC_CTL0_EOT;
++
++      ctl1 = len & BGMAC_DESC_CTL1_LEN;
++
++      slot = &ring->slots[i];
++      dma_desc = &ring->cpu_base[i];
++      dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
++      dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
++      dma_desc->ctl0 = cpu_to_le32(ctl0);
++      dma_desc->ctl1 = cpu_to_le32(ctl1);
++}
++
+ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
+                                   struct bgmac_dma_ring *ring,
+                                   struct sk_buff *skb)
+ {
+       struct device *dma_dev = bgmac->core->dma_dev;
+       struct net_device *net_dev = bgmac->net_dev;
+-      struct bgmac_dma_desc *dma_desc;
+-      struct bgmac_slot_info *slot;
+-      u32 ctl0, ctl1;
++      struct bgmac_slot_info *slot = &ring->slots[ring->end];
+       int free_slots;
++      int nr_frags;
++      u32 flags;
++      int index = ring->end;
++      int i;
+       if (skb->len > BGMAC_DESC_CTL1_LEN) {
+               bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
+-              goto err_stop_drop;
++              goto err_drop;
+       }
++      if (skb->ip_summed == CHECKSUM_PARTIAL)
++              skb_checksum_help(skb);
++
++      nr_frags = skb_shinfo(skb)->nr_frags;
++
+       if (ring->start <= ring->end)
+               free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
+       else
+               free_slots = ring->start - ring->end;
+-      if (free_slots == 1) {
++
++      if (free_slots <= nr_frags + 1) {
+               bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
+               netif_stop_queue(net_dev);
+               return NETDEV_TX_BUSY;
+       }
+-      slot = &ring->slots[ring->end];
+-      slot->skb = skb;
+-      slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
++      slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb),
+                                       DMA_TO_DEVICE);
+-      if (dma_mapping_error(dma_dev, slot->dma_addr)) {
+-              bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
+-                        ring->mmio_base);
+-              goto err_stop_drop;
+-      }
++      if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
++              goto err_dma_head;
+-      ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF;
+-      if (ring->end == ring->num_slots - 1)
+-              ctl0 |= BGMAC_DESC_CTL0_EOT;
+-      ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;
++      flags = BGMAC_DESC_CTL0_SOF;
++      if (!nr_frags)
++              flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
++
++      bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags);
++      flags = 0;
++
++      for (i = 0; i < nr_frags; i++) {
++              struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
++              int len = skb_frag_size(frag);
++
++              index = (index + 1) % BGMAC_TX_RING_SLOTS;
++              slot = &ring->slots[index];
++              slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0,
++                                                len, DMA_TO_DEVICE);
++              if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
++                      goto err_dma;
+-      dma_desc = ring->cpu_base;
+-      dma_desc += ring->end;
+-      dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
+-      dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
+-      dma_desc->ctl0 = cpu_to_le32(ctl0);
+-      dma_desc->ctl1 = cpu_to_le32(ctl1);
++              if (i == nr_frags - 1)
++                      flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
++
++              bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags);
++      }
++
++      slot->skb = skb;
+       netdev_sent_queue(net_dev, skb->len);
+@@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(stru
+       /* Increase ring->end to point empty slot. We tell hardware the first
+        * slot it should *not* read.
+        */
+-      if (++ring->end >= BGMAC_TX_RING_SLOTS)
+-              ring->end = 0;
++      ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
+       bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
+                   ring->index_base +
+                   ring->end * sizeof(struct bgmac_dma_desc));
+-      /* Always keep one slot free to allow detecting bugged calls. */
+-      if (--free_slots == 1)
++      free_slots -= nr_frags + 1;
++      if (free_slots < 8)
+               netif_stop_queue(net_dev);
+       return NETDEV_TX_OK;
+-err_stop_drop:
+-      netif_stop_queue(net_dev);
++err_dma:
++      dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
++                       DMA_TO_DEVICE);
++
++      while (i > 0) {
++              int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
++              struct bgmac_slot_info *slot = &ring->slots[index];
++              u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
++              int len = ctl1 & BGMAC_DESC_CTL1_LEN;
++
++              dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE);
++      }
++
++err_dma_head:
++      bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
++                ring->mmio_base);
++
++err_drop:
+       dev_kfree_skb(skb);
+       return NETDEV_TX_OK;
+ }
+@@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgm
+       while (ring->start != empty_slot) {
+               struct bgmac_slot_info *slot = &ring->slots[ring->start];
++              u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
++              int len = ctl1 & BGMAC_DESC_CTL1_LEN;
+-              if (slot->skb) {
++              if (!slot->dma_addr) {
++                      bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
++                                ring->start, ring->end);
++                      goto next;
++              }
++
++              if (ctl1 & BGMAC_DESC_CTL0_SOF)
+                       /* Unmap no longer used buffer */
+-                      dma_unmap_single(dma_dev, slot->dma_addr,
+-                                       slot->skb->len, DMA_TO_DEVICE);
+-                      slot->dma_addr = 0;
++                      dma_unmap_single(dma_dev, slot->dma_addr, len,
++                                       DMA_TO_DEVICE);
++              else
++                      dma_unmap_page(dma_dev, slot->dma_addr, len,
++                                     DMA_TO_DEVICE);
++              if (slot->skb) {
+                       bytes_compl += slot->skb->len;
+                       pkts_compl++;
+                       /* Free memory! :) */
+                       dev_kfree_skb(slot->skb);
+                       slot->skb = NULL;
+-              } else {
+-                      bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
+-                                ring->start, ring->end);
+               }
++next:
++              slot->dma_addr = 0;
+               if (++ring->start >= BGMAC_TX_RING_SLOTS)
+                       ring->start = 0;
+               freed = true;
+       }
++      if (!pkts_compl)
++              return;
++
+       netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl);
+-      if (freed && netif_queue_stopped(bgmac->net_dev))
++      if (netif_queue_stopped(bgmac->net_dev))
+               netif_wake_queue(bgmac->net_dev);
+ }
+@@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struc
+                                  struct bgmac_dma_ring *ring)
+ {
+       struct device *dma_dev = bgmac->core->dma_dev;
++      struct bgmac_dma_desc *dma_desc = ring->cpu_base;
+       struct bgmac_slot_info *slot;
+       int i;
+       for (i = 0; i < ring->num_slots; i++) {
++              int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
++
+               slot = &ring->slots[i];
+-              if (slot->skb) {
+-                      if (slot->dma_addr)
+-                              dma_unmap_single(dma_dev, slot->dma_addr,
+-                                               slot->skb->len, DMA_TO_DEVICE);
+-                      dev_kfree_skb(slot->skb);
+-              }
++              dev_kfree_skb(slot->skb);
++
++              if (!slot->dma_addr)
++                      continue;
++
++              if (slot->skb)
++                      dma_unmap_single(dma_dev, slot->dma_addr,
++                                       len, DMA_TO_DEVICE);
++              else
++                      dma_unmap_page(dma_dev, slot->dma_addr,
++                                     len, DMA_TO_DEVICE);
+       }
+ }
+@@ -1583,6 +1657,10 @@ static int bgmac_probe(struct bcma_devic
+               goto err_dma_free;
+       }
++      net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
++      net_dev->hw_features = net_dev->features;
++      net_dev->vlan_features = net_dev->features;
++
+       err = register_netdev(bgmac->net_dev);
+       if (err) {
+               bgmac_err(bgmac, "Cannot register net device\n");