improve the stability of via-rhine with large packet sizes and high network load
[openwrt/svn-archive/archive.git] / target / linux / rb532-2.6 / patches / 240-via_rhine_performance.patch
1 diff -ur linux.old/drivers/net/via-rhine.c linux.dev/drivers/net/via-rhine.c
2 --- linux.old/drivers/net/via-rhine.c 2006-12-07 05:53:39.000000000 +0100
3 +++ linux.dev/drivers/net/via-rhine.c 2006-12-07 07:06:52.000000000 +0100
4 @@ -131,6 +131,10 @@
5 - Fix Tx engine race for good
6 - Craig Brind: Zero padded aligned buffers for short packets.
7
8 + OpenWrt Version (Felix Fietkau <nbd@openwrt.org>)
9 + - Performance improvements
10 + - NAPI polling
11 +
12 */
13
14 #define DRV_NAME "via-rhine"
15 @@ -142,7 +146,6 @@
16 These may be modified when a driver module is loaded. */
17
18 static int debug = 1; /* 1 normal messages, 0 quiet .. 7 verbose. */
19 -static int max_interrupt_work = 20;
20
21 /* Set the copy breakpoint for the copy-only-tiny-frames scheme.
22 Setting to > 1518 effectively disables this feature. */
23 @@ -165,9 +168,9 @@
24 Making the Tx ring too large decreases the effectiveness of channel
25 bonding and packet priority.
26 There are no ill effects from too-large receive rings. */
27 -#define TX_RING_SIZE 16
28 -#define TX_QUEUE_LEN 10 /* Limit ring entries actually used. */
29 -#define RX_RING_SIZE 16
30 +#define TX_RING_SIZE 64
31 +#define TX_QUEUE_LEN 60 /* Limit ring entries actually used. */
32 +#define RX_RING_SIZE 64
33
34
35 /* Operational parameters that usually are not changed. */
36 @@ -201,6 +204,7 @@
37 #include <asm/io.h>
38 #include <asm/irq.h>
39 #include <asm/uaccess.h>
40 +#include <asm/unaligned.h>
41
42 /* These identify the driver base version and may not be removed. */
43 static char version[] __devinitdata =
44 @@ -217,10 +221,8 @@
45 MODULE_DESCRIPTION("VIA Rhine PCI Fast Ethernet driver");
46 MODULE_LICENSE("GPL");
47
48 -module_param(max_interrupt_work, int, 0);
49 module_param(debug, int, 0);
50 module_param(rx_copybreak, int, 0);
51 -MODULE_PARM_DESC(max_interrupt_work, "VIA Rhine maximum events handled per interrupt");
52 MODULE_PARM_DESC(debug, "VIA Rhine debug level (0-7)");
53 MODULE_PARM_DESC(rx_copybreak, "VIA Rhine copy breakpoint for copy-only-tiny-frames");
54
55 @@ -461,6 +463,8 @@
56 struct tx_desc *tx_ring;
57 dma_addr_t rx_ring_dma;
58 dma_addr_t tx_ring_dma;
59 + u32 istat;
60 + u32 imask;
61
62 /* The addresses of receive-in-place skbuffs. */
63 struct sk_buff *rx_skbuff[RX_RING_SIZE];
64 @@ -500,9 +504,10 @@
65 static void rhine_tx_timeout(struct net_device *dev);
66 static int rhine_start_tx(struct sk_buff *skb, struct net_device *dev);
67 static irqreturn_t rhine_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
68 -static void rhine_tx(struct net_device *dev);
69 -static void rhine_rx(struct net_device *dev);
70 -static void rhine_error(struct net_device *dev, int intr_status);
71 +static int rhine_poll(struct net_device *dev, int *budget);
72 +static int rhine_tx(struct net_device *dev);
73 +static int rhine_rx(struct net_device *dev, int max_work);
74 +static void rhine_error(struct net_device *dev);
75 static void rhine_set_rx_mode(struct net_device *dev);
76 static struct net_device_stats *rhine_get_stats(struct net_device *dev);
77 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
78 @@ -597,6 +602,7 @@
79 struct rhine_private *rp = netdev_priv(dev);
80 void __iomem *ioaddr = rp->base;
81
82 + pci_enable_device(rp->pdev);
83 iowrite8(Cmd1Reset, ioaddr + ChipCmd1);
84 IOSYNC;
85
86 @@ -618,6 +624,28 @@
87 "failed" : "succeeded");
88 }
89
90 +static inline void rhine_intr_enable(struct net_device *dev)
91 +{
92 + struct rhine_private *rp = netdev_priv(dev);
93 + void __iomem *ioaddr = rp->base;
94 +
95 + iowrite16(rp->imask = (IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow |
96 + IntrRxDropped | IntrRxNoBuf | IntrTxAborted |
97 + IntrTxDone | IntrTxError | IntrTxUnderrun |
98 + IntrPCIErr | IntrStatsMax | IntrLinkChange),
99 + ioaddr + IntrEnable);
100 +}
101 +
102 +static inline void rhine_intr_disable(struct net_device *dev)
103 +{
104 + struct rhine_private *rp = netdev_priv(dev);
105 + void __iomem *ioaddr = rp->base;
106 +
107 + iowrite16(rp->imask = (IntrRxOverflow | IntrRxNoBuf | IntrTxAborted |
108 + IntrTxError | IntrTxUnderrun | IntrPCIErr | IntrStatsMax | IntrLinkChange),
109 + ioaddr + IntrEnable);
110 +}
111 +
112 #ifdef USE_MMIO
113 static void enable_mmio(long pioaddr, u32 quirks)
114 {
115 @@ -660,14 +688,26 @@
116
117 }
118
119 -#ifdef CONFIG_NET_POLL_CONTROLLER
120 -static void rhine_poll(struct net_device *dev)
121 +static int rhine_poll(struct net_device *dev, int *budget)
122 {
123 - disable_irq(dev->irq);
124 - rhine_interrupt(dev->irq, (void *)dev, NULL);
125 - enable_irq(dev->irq);
126 + unsigned int work_done, work_to_do = min(*budget, dev->quota);
127 + struct rhine_private *rp = netdev_priv(dev);
128 +
129 + work_done = rhine_rx(dev, (*budget < dev->quota ? *budget : dev->quota));
130 +
131 + if (rp->istat & (IntrTxErrSummary | IntrTxDone))
132 + rhine_tx(dev);
133 +
134 + *budget -= work_done;
135 + dev->quota -= work_done;
136 +
137 + if (work_done < work_to_do) {
138 + netif_rx_complete(dev);
139 + rhine_intr_enable(dev);
140 + }
141 +
142 + return (work_done >= work_to_do);
143 }
144 -#endif
145
146 static void rhine_hw_init(struct net_device *dev, long pioaddr)
147 {
148 @@ -846,11 +886,10 @@
149 dev->ethtool_ops = &netdev_ethtool_ops;
150 dev->tx_timeout = rhine_tx_timeout;
151 dev->watchdog_timeo = TX_TIMEOUT;
152 -#ifdef CONFIG_NET_POLL_CONTROLLER
153 - dev->poll_controller = rhine_poll;
154 -#endif
155 - if (rp->quirks & rqRhineI)
156 - dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM;
157 + dev->poll = rhine_poll;
158 + dev->weight = 64;
159 +
160 + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
161
162 /* dev->name not defined before register_netdev()! */
163 rc = register_netdev(dev);
164 @@ -894,6 +933,10 @@
165 }
166 }
167 rp->mii_if.phy_id = phy_id;
168 +
169 + // shut down until somebody really needs it
170 + iowrite8(0x80, ioaddr + 0xa1);
171 + pci_set_power_state(rp->pdev, 3);
172
173 return 0;
174
175 @@ -985,7 +1028,7 @@
176
177 /* Fill in the Rx buffers. Handle allocation failure gracefully. */
178 for (i = 0; i < RX_RING_SIZE; i++) {
179 - struct sk_buff *skb = dev_alloc_skb(rp->rx_buf_sz);
180 + struct sk_buff *skb = dev_alloc_skb(rp->rx_buf_sz + 4);
181 rp->rx_skbuff[i] = skb;
182 if (skb == NULL)
183 break;
184 @@ -1120,11 +1163,7 @@
185 rhine_set_rx_mode(dev);
186
187 /* Enable interrupts by setting the interrupt mask. */
188 - iowrite16(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow |
189 - IntrRxDropped | IntrRxNoBuf | IntrTxAborted |
190 - IntrTxDone | IntrTxError | IntrTxUnderrun |
191 - IntrPCIErr | IntrStatsMax | IntrLinkChange,
192 - ioaddr + IntrEnable);
193 + rhine_intr_enable(dev);
194
195 iowrite16(CmdStart | CmdTxOn | CmdRxOn | (Cmd1NoTxPoll << 8),
196 ioaddr + ChipCmd);
197 @@ -1235,6 +1274,7 @@
198 mdio_read(dev, rp->mii_if.phy_id, MII_BMSR));
199
200 netif_start_queue(dev);
201 + netif_poll_enable(dev);
202
203 return 0;
204 }
205 @@ -1263,8 +1303,8 @@
206 /* Reinitialize the hardware. */
207 rhine_chip_reset(dev);
208 init_registers(dev);
209 -
210 spin_unlock(&rp->lock);
211 +
212 enable_irq(rp->pdev->irq);
213
214 dev->trans_start = jiffies;
215 @@ -1358,77 +1398,66 @@
216 struct net_device *dev = dev_instance;
217 struct rhine_private *rp = netdev_priv(dev);
218 void __iomem *ioaddr = rp->base;
219 - u32 intr_status;
220 - int boguscnt = max_interrupt_work;
221 int handled = 0;
222
223 - while ((intr_status = get_intr_status(dev))) {
224 + if ((rp->istat = (get_intr_status(dev) & rp->imask))) {
225 handled = 1;
226
227 /* Acknowledge all of the current interrupt sources ASAP. */
228 - if (intr_status & IntrTxDescRace)
229 + if (rp->istat & IntrTxDescRace)
230 iowrite8(0x08, ioaddr + IntrStatus2);
231 - iowrite16(intr_status & 0xffff, ioaddr + IntrStatus);
232 + iowrite16(rp->istat & 0xffff, ioaddr + IntrStatus);
233 IOSYNC;
234
235 - if (debug > 4)
236 - printk(KERN_DEBUG "%s: Interrupt, status %8.8x.\n",
237 - dev->name, intr_status);
238 + if (likely(rp->istat & ((IntrRxDone | IntrRxErr | IntrRxDropped |
239 + IntrRxWakeUp | IntrRxEmpty | IntrRxNoBuf |
240 + IntrTxErrSummary | IntrTxDone)))) {
241 +
242 + rhine_intr_disable(dev);
243
244 - if (intr_status & (IntrRxDone | IntrRxErr | IntrRxDropped |
245 - IntrRxWakeUp | IntrRxEmpty | IntrRxNoBuf))
246 - rhine_rx(dev);
247 -
248 - if (intr_status & (IntrTxErrSummary | IntrTxDone)) {
249 - if (intr_status & IntrTxErrSummary) {
250 - /* Avoid scavenging before Tx engine turned off */
251 - RHINE_WAIT_FOR(!(ioread8(ioaddr+ChipCmd) & CmdTxOn));
252 - if (debug > 2 &&
253 - ioread8(ioaddr+ChipCmd) & CmdTxOn)
254 - printk(KERN_WARNING "%s: "
255 - "rhine_interrupt() Tx engine"
256 - "still on.\n", dev->name);
257 - }
258 - rhine_tx(dev);
259 + if (likely(netif_rx_schedule_prep(dev)))
260 + __netif_rx_schedule(dev);
261 }
262
263 /* Abnormal error summary/uncommon events handlers. */
264 - if (intr_status & (IntrPCIErr | IntrLinkChange |
265 + if (unlikely(rp->istat & (IntrPCIErr | IntrLinkChange |
266 IntrStatsMax | IntrTxError | IntrTxAborted |
267 - IntrTxUnderrun | IntrTxDescRace))
268 - rhine_error(dev, intr_status);
269 -
270 - if (--boguscnt < 0) {
271 - printk(KERN_WARNING "%s: Too much work at interrupt, "
272 - "status=%#8.8x.\n",
273 - dev->name, intr_status);
274 - break;
275 - }
276 + IntrTxUnderrun | IntrTxDescRace)))
277 + rhine_error(dev);
278 }
279
280 - if (debug > 3)
281 - printk(KERN_DEBUG "%s: exiting interrupt, status=%8.8x.\n",
282 - dev->name, ioread16(ioaddr + IntrStatus));
283 return IRQ_RETVAL(handled);
284 }
285
286 /* This routine is logically part of the interrupt handler, but isolated
287 for clarity. */
288 -static void rhine_tx(struct net_device *dev)
289 +static int rhine_tx(struct net_device *dev)
290 {
291 struct rhine_private *rp = netdev_priv(dev);
292 int txstatus = 0, entry = rp->dirty_tx % TX_RING_SIZE;
293 + void __iomem *ioaddr = rp->base;
294 + int done = 0;
295 +
296 + /* Avoid scavenging before Tx engine turned off */
297 + RHINE_WAIT_FOR(!(ioread8(ioaddr+ChipCmd) & CmdTxOn));
298 + if (debug > 2 &&
299 + ioread8(ioaddr+ChipCmd) & CmdTxOn)
300 + printk(KERN_WARNING "%s: "
301 + "rhine_interrupt() Tx engine"
302 + "still on.\n", dev->name);
303
304 - spin_lock(&rp->lock);
305
306 /* find and cleanup dirty tx descriptors */
307 while (rp->dirty_tx != rp->cur_tx) {
308 + spin_lock(&rp->lock);
309 txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status);
310 if (debug > 6)
311 printk(KERN_DEBUG "Tx scavenge %d status %8.8x.\n",
312 entry, txstatus);
313 - if (txstatus & DescOwn)
314 + if (txstatus & DescOwn) {
315 + spin_unlock(&rp->lock);
316 break;
317 + }
318 if (txstatus & 0x8000) {
319 if (debug > 1)
320 printk(KERN_DEBUG "%s: Transmit error, "
321 @@ -1443,6 +1472,7 @@
322 (txstatus & 0x0800) || (txstatus & 0x1000)) {
323 rp->stats.tx_fifo_errors++;
324 rp->tx_ring[entry].tx_status = cpu_to_le32(DescOwn);
325 + spin_unlock(&rp->lock);
326 break; /* Keep the skb - we try again */
327 }
328 /* Transmitter restarted in 'abnormal' handler. */
329 @@ -1457,6 +1487,7 @@
330 txstatus & 0xF);
331 rp->stats.tx_bytes += rp->tx_skbuff[entry]->len;
332 rp->stats.tx_packets++;
333 + done++;
334 }
335 /* Free the original skb. */
336 if (rp->tx_skbuff_dma[entry]) {
337 @@ -1465,23 +1496,25 @@
338 rp->tx_skbuff[entry]->len,
339 PCI_DMA_TODEVICE);
340 }
341 - dev_kfree_skb_irq(rp->tx_skbuff[entry]);
342 + dev_kfree_skb_any(rp->tx_skbuff[entry]);
343 rp->tx_skbuff[entry] = NULL;
344 entry = (++rp->dirty_tx) % TX_RING_SIZE;
345 + spin_unlock(&rp->lock);
346 }
347 +
348 if ((rp->cur_tx - rp->dirty_tx) < TX_QUEUE_LEN - 4)
349 netif_wake_queue(dev);
350
351 - spin_unlock(&rp->lock);
352 + return done;
353 }
354
355 /* This routine is logically part of the interrupt handler, but isolated
356 for clarity and better register allocation. */
357 -static void rhine_rx(struct net_device *dev)
358 +static int rhine_rx(struct net_device *dev, int max_work)
359 {
360 struct rhine_private *rp = netdev_priv(dev);
361 int entry = rp->cur_rx % RX_RING_SIZE;
362 - int boguscnt = rp->dirty_rx + RX_RING_SIZE - rp->cur_rx;
363 + int done = 0;
364
365 if (debug > 4) {
366 printk(KERN_DEBUG "%s: rhine_rx(), entry %d status %8.8x.\n",
367 @@ -1498,7 +1531,7 @@
368 if (debug > 4)
369 printk(KERN_DEBUG "rhine_rx() status is %8.8x.\n",
370 desc_status);
371 - if (--boguscnt < 0)
372 + if (--max_work < 0)
373 break;
374 if ((desc_status & (RxWholePkt | RxErr)) != RxWholePkt) {
375 if ((desc_status & RxWholePkt) != RxWholePkt) {
376 @@ -1523,9 +1556,7 @@
377 if (desc_status & 0x0004) rp->stats.rx_frame_errors++;
378 if (desc_status & 0x0002) {
379 /* this can also be updated outside the interrupt handler */
380 - spin_lock(&rp->lock);
381 rp->stats.rx_crc_errors++;
382 - spin_unlock(&rp->lock);
383 }
384 }
385 } else {
386 @@ -1553,6 +1584,7 @@
387 rp->rx_buf_sz,
388 PCI_DMA_FROMDEVICE);
389 } else {
390 + int i;
391 skb = rp->rx_skbuff[entry];
392 if (skb == NULL) {
393 printk(KERN_ERR "%s: Inconsistent Rx "
394 @@ -1561,6 +1593,14 @@
395 break;
396 }
397 rp->rx_skbuff[entry] = NULL;
398 +
399 + /* align the data to the ip header - should be faster than using rx_copybreak */
400 + for (i = pkt_len - (pkt_len % 4); i >= 0; i -= 4) {
401 + put_unaligned(*((u32 *) (skb->data + i)), (u32 *) (skb->data + i + 2));
402 + }
403 + skb->data += 2;
404 + skb->tail += 2;
405 +
406 skb_put(skb, pkt_len);
407 pci_unmap_single(rp->pdev,
408 rp->rx_skbuff_dma[entry],
409 @@ -1568,10 +1608,11 @@
410 PCI_DMA_FROMDEVICE);
411 }
412 skb->protocol = eth_type_trans(skb, dev);
413 - netif_rx(skb);
414 + netif_receive_skb(skb);
415 dev->last_rx = jiffies;
416 rp->stats.rx_bytes += pkt_len;
417 rp->stats.rx_packets++;
418 + done++;
419 }
420 entry = (++rp->cur_rx) % RX_RING_SIZE;
421 rp->rx_head_desc = &rp->rx_ring[entry];
422 @@ -1582,7 +1623,7 @@
423 struct sk_buff *skb;
424 entry = rp->dirty_rx % RX_RING_SIZE;
425 if (rp->rx_skbuff[entry] == NULL) {
426 - skb = dev_alloc_skb(rp->rx_buf_sz);
427 + skb = dev_alloc_skb(rp->rx_buf_sz + 4);
428 rp->rx_skbuff[entry] = skb;
429 if (skb == NULL)
430 break; /* Better luck next round. */
431 @@ -1595,6 +1636,8 @@
432 }
433 rp->rx_ring[entry].rx_status = cpu_to_le32(DescOwn);
434 }
435 +
436 + return done;
437 }
438
439 /*
440 @@ -1644,11 +1687,11 @@
441
442 }
443
444 -static void rhine_error(struct net_device *dev, int intr_status)
445 +static void rhine_error(struct net_device *dev)
446 {
447 struct rhine_private *rp = netdev_priv(dev);
448 void __iomem *ioaddr = rp->base;
449 -
450 + u32 intr_status = rp->istat;
451 spin_lock(&rp->lock);
452
453 if (intr_status & IntrLinkChange)
454 @@ -1895,6 +1938,7 @@
455
456 /* Disable interrupts by clearing the interrupt mask. */
457 iowrite16(0x0000, ioaddr + IntrEnable);
458 + rp->imask = 0;
459
460 /* Stop the chip's Tx and Rx processes. */
461 iowrite16(CmdStop, ioaddr + ChipCmd);
462 @@ -1906,6 +1950,9 @@
463 free_tbufs(dev);
464 free_ring(dev);
465
466 + writeb(0x80, ioaddr + 0xa1);
467 + pci_set_power_state(rp->pdev, 3);
468 +
469 return 0;
470 }
471
472 @@ -1935,6 +1982,7 @@
473 return; /* Nothing to do for non-WOL adapters */
474
475 rhine_power_init(dev);
476 + netif_poll_disable(dev);
477
478 /* Make sure we use pattern 0, 1 and not 4, 5 */
479 if (rp->quirks & rq6patterns)
480 Only in linux.dev/drivers/net: .via-rhine.c.swp