kernel: split patches folder up into backport, pending and hack folders
[openwrt/staging/wigyori.git] / target / linux / generic / pending-3.18 / 077-03-bgmac-implement-scatter-gather-support.patch
1 From: Felix Fietkau <nbd@nbd.name>
2 Date: Mon, 23 Mar 2015 02:42:26 +0100
3 Subject: [PATCH] bgmac: implement scatter/gather support
4
5 Always use software checksumming, since the hardware does not have any
6 checksum offload support.
7 This significantly improves local TCP tx performance.
8
9 Signed-off-by: Felix Fietkau <nbd@nbd.name>
10 ---
11
12 --- a/drivers/net/ethernet/broadcom/bgmac.c
13 +++ b/drivers/net/ethernet/broadcom/bgmac.c
14 @@ -115,53 +115,91 @@ static void bgmac_dma_tx_enable(struct b
15 bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_CTL, ctl);
16 }
17
18 +static void
19 +bgmac_dma_tx_add_buf(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
20 + int i, int len, u32 ctl0)
21 +{
22 + struct bgmac_slot_info *slot;
23 + struct bgmac_dma_desc *dma_desc;
24 + u32 ctl1;
25 +
26 + if (i == ring->num_slots - 1)
27 + ctl0 |= BGMAC_DESC_CTL0_EOT;
28 +
29 + ctl1 = len & BGMAC_DESC_CTL1_LEN;
30 +
31 + slot = &ring->slots[i];
32 + dma_desc = &ring->cpu_base[i];
33 + dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
34 + dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
35 + dma_desc->ctl0 = cpu_to_le32(ctl0);
36 + dma_desc->ctl1 = cpu_to_le32(ctl1);
37 +}
38 +
39 static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
40 struct bgmac_dma_ring *ring,
41 struct sk_buff *skb)
42 {
43 struct device *dma_dev = bgmac->core->dma_dev;
44 struct net_device *net_dev = bgmac->net_dev;
45 - struct bgmac_dma_desc *dma_desc;
46 - struct bgmac_slot_info *slot;
47 - u32 ctl0, ctl1;
48 + struct bgmac_slot_info *slot = &ring->slots[ring->end];
49 int free_slots;
50 + int nr_frags;
51 + u32 flags;
52 + int index = ring->end;
53 + int i;
54
55 if (skb->len > BGMAC_DESC_CTL1_LEN) {
56 bgmac_err(bgmac, "Too long skb (%d)\n", skb->len);
57 - goto err_stop_drop;
58 + goto err_drop;
59 }
60
61 + if (skb->ip_summed == CHECKSUM_PARTIAL)
62 + skb_checksum_help(skb);
63 +
64 + nr_frags = skb_shinfo(skb)->nr_frags;
65 +
66 if (ring->start <= ring->end)
67 free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
68 else
69 free_slots = ring->start - ring->end;
70 - if (free_slots == 1) {
71 +
72 + if (free_slots <= nr_frags + 1) {
73 bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
74 netif_stop_queue(net_dev);
75 return NETDEV_TX_BUSY;
76 }
77
78 - slot = &ring->slots[ring->end];
79 - slot->skb = skb;
80 - slot->dma_addr = dma_map_single(dma_dev, skb->data, skb->len,
81 + slot->dma_addr = dma_map_single(dma_dev, skb->data, skb_headlen(skb),
82 DMA_TO_DEVICE);
83 - if (dma_mapping_error(dma_dev, slot->dma_addr)) {
84 - bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
85 - ring->mmio_base);
86 - goto err_stop_drop;
87 - }
88 + if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
89 + goto err_dma_head;
90
91 - ctl0 = BGMAC_DESC_CTL0_IOC | BGMAC_DESC_CTL0_SOF | BGMAC_DESC_CTL0_EOF;
92 - if (ring->end == ring->num_slots - 1)
93 - ctl0 |= BGMAC_DESC_CTL0_EOT;
94 - ctl1 = skb->len & BGMAC_DESC_CTL1_LEN;
95 + flags = BGMAC_DESC_CTL0_SOF;
96 + if (!nr_frags)
97 + flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
98 +
99 + bgmac_dma_tx_add_buf(bgmac, ring, index, skb_headlen(skb), flags);
100 + flags = 0;
101 +
102 + for (i = 0; i < nr_frags; i++) {
103 + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
104 + int len = skb_frag_size(frag);
105 +
106 + index = (index + 1) % BGMAC_TX_RING_SLOTS;
107 + slot = &ring->slots[index];
108 + slot->dma_addr = skb_frag_dma_map(dma_dev, frag, 0,
109 + len, DMA_TO_DEVICE);
110 + if (unlikely(dma_mapping_error(dma_dev, slot->dma_addr)))
111 + goto err_dma;
112
113 - dma_desc = ring->cpu_base;
114 - dma_desc += ring->end;
115 - dma_desc->addr_low = cpu_to_le32(lower_32_bits(slot->dma_addr));
116 - dma_desc->addr_high = cpu_to_le32(upper_32_bits(slot->dma_addr));
117 - dma_desc->ctl0 = cpu_to_le32(ctl0);
118 - dma_desc->ctl1 = cpu_to_le32(ctl1);
119 + if (i == nr_frags - 1)
120 + flags |= BGMAC_DESC_CTL0_EOF | BGMAC_DESC_CTL0_IOC;
121 +
122 + bgmac_dma_tx_add_buf(bgmac, ring, index, len, flags);
123 + }
124 +
125 + slot->skb = skb;
126
127 netdev_sent_queue(net_dev, skb->len);
128
129 @@ -170,20 +208,35 @@ static netdev_tx_t bgmac_dma_tx_add(stru
130 /* Increase ring->end to point empty slot. We tell hardware the first
131 * slot it should *not* read.
132 */
133 - if (++ring->end >= BGMAC_TX_RING_SLOTS)
134 - ring->end = 0;
135 + ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
136 bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
137 ring->index_base +
138 ring->end * sizeof(struct bgmac_dma_desc));
139
140 - /* Always keep one slot free to allow detecting bugged calls. */
141 - if (--free_slots == 1)
142 + free_slots -= nr_frags + 1;
143 + if (free_slots < 8)
144 netif_stop_queue(net_dev);
145
146 return NETDEV_TX_OK;
147
148 -err_stop_drop:
149 - netif_stop_queue(net_dev);
150 +err_dma:
151 + dma_unmap_single(dma_dev, slot->dma_addr, skb_headlen(skb),
152 + DMA_TO_DEVICE);
153 +
154 + while (i > 0) {
155 + int index = (ring->end + i) % BGMAC_TX_RING_SLOTS;
156 + struct bgmac_slot_info *slot = &ring->slots[index];
157 + u32 ctl1 = le32_to_cpu(ring->cpu_base[index].ctl1);
158 + int len = ctl1 & BGMAC_DESC_CTL1_LEN;
159 +
160 + dma_unmap_page(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE);
161 + }
162 +
163 +err_dma_head:
164 + bgmac_err(bgmac, "Mapping error of skb on ring 0x%X\n",
165 + ring->mmio_base);
166 +
167 +err_drop:
168 dev_kfree_skb(skb);
169 return NETDEV_TX_OK;
170 }
171 @@ -205,32 +258,45 @@ static void bgmac_dma_tx_free(struct bgm
172
173 while (ring->start != empty_slot) {
174 struct bgmac_slot_info *slot = &ring->slots[ring->start];
175 + u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
176 + int len = ctl1 & BGMAC_DESC_CTL1_LEN;
177
178 - if (slot->skb) {
179 + if (!slot->dma_addr) {
180 + bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
181 + ring->start, ring->end);
182 + goto next;
183 + }
184 +
185 + if (ctl1 & BGMAC_DESC_CTL0_SOF)
186 /* Unmap no longer used buffer */
187 - dma_unmap_single(dma_dev, slot->dma_addr,
188 - slot->skb->len, DMA_TO_DEVICE);
189 - slot->dma_addr = 0;
190 + dma_unmap_single(dma_dev, slot->dma_addr, len,
191 + DMA_TO_DEVICE);
192 + else
193 + dma_unmap_page(dma_dev, slot->dma_addr, len,
194 + DMA_TO_DEVICE);
195
196 + if (slot->skb) {
197 bytes_compl += slot->skb->len;
198 pkts_compl++;
199
200 /* Free memory! :) */
201 dev_kfree_skb(slot->skb);
202 slot->skb = NULL;
203 - } else {
204 - bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
205 - ring->start, ring->end);
206 }
207
208 +next:
209 + slot->dma_addr = 0;
210 if (++ring->start >= BGMAC_TX_RING_SLOTS)
211 ring->start = 0;
212 freed = true;
213 }
214
215 + if (!pkts_compl)
216 + return;
217 +
218 netdev_completed_queue(bgmac->net_dev, pkts_compl, bytes_compl);
219
220 - if (freed && netif_queue_stopped(bgmac->net_dev))
221 + if (netif_queue_stopped(bgmac->net_dev))
222 netif_wake_queue(bgmac->net_dev);
223 }
224
225 @@ -439,17 +505,25 @@ static void bgmac_dma_tx_ring_free(struc
226 struct bgmac_dma_ring *ring)
227 {
228 struct device *dma_dev = bgmac->core->dma_dev;
229 + struct bgmac_dma_desc *dma_desc = ring->cpu_base;
230 struct bgmac_slot_info *slot;
231 int i;
232
233 for (i = 0; i < ring->num_slots; i++) {
234 + int len = dma_desc[i].ctl1 & BGMAC_DESC_CTL1_LEN;
235 +
236 slot = &ring->slots[i];
237 - if (slot->skb) {
238 - if (slot->dma_addr)
239 - dma_unmap_single(dma_dev, slot->dma_addr,
240 - slot->skb->len, DMA_TO_DEVICE);
241 - dev_kfree_skb(slot->skb);
242 - }
243 + dev_kfree_skb(slot->skb);
244 +
245 + if (!slot->dma_addr)
246 + continue;
247 +
248 + if (slot->skb)
249 + dma_unmap_single(dma_dev, slot->dma_addr,
250 + len, DMA_TO_DEVICE);
251 + else
252 + dma_unmap_page(dma_dev, slot->dma_addr,
253 + len, DMA_TO_DEVICE);
254 }
255 }
256
257 @@ -1583,6 +1657,10 @@ static int bgmac_probe(struct bcma_devic
258 goto err_dma_free;
259 }
260
261 + net_dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
262 + net_dev->hw_features = net_dev->features;
263 + net_dev->vlan_features = net_dev->features;
264 +
265 err = register_netdev(bgmac->net_dev);
266 if (err) {
267 bgmac_err(bgmac, "Cannot register net device\n");