finally move buildroot-ng to trunk
[openwrt/staging/dedeckeh.git] / target / linux / rb532-2.6 / patches / 240-via_rhine_performance.patch
1 diff -urN linux.old/drivers/net/via-rhine.c linux.dev/drivers/net/via-rhine.c
2 --- linux.old/drivers/net/via-rhine.c 2006-06-08 20:21:20.000000000 +0200
3 +++ linux.dev/drivers/net/via-rhine.c 2006-06-08 20:19:40.000000000 +0200
4 @@ -131,6 +131,10 @@
5 - Fix Tx engine race for good
6 - Craig Brind: Zero padded aligned buffers for short packets.
7
8 + OpenWrt Version (Felix Fietkau <nbd@openwrt.org>)
9 + - Performance improvements
10 + - NAPI polling
11 +
12 */
13
14 #define DRV_NAME "via-rhine"
15 @@ -142,7 +146,6 @@
16 These may be modified when a driver module is loaded. */
17
18 static int debug = 1; /* 1 normal messages, 0 quiet .. 7 verbose. */
19 -static int max_interrupt_work = 20;
20
21 /* Set the copy breakpoint for the copy-only-tiny-frames scheme.
22 Setting to > 1518 effectively disables this feature. */
23 @@ -165,9 +168,9 @@
24 Making the Tx ring too large decreases the effectiveness of channel
25 bonding and packet priority.
26 There are no ill effects from too-large receive rings. */
27 -#define TX_RING_SIZE 16
28 -#define TX_QUEUE_LEN 10 /* Limit ring entries actually used. */
29 -#define RX_RING_SIZE 16
30 +#define TX_RING_SIZE 128
31 +#define TX_QUEUE_LEN 120 /* Limit ring entries actually used. */
32 +#define RX_RING_SIZE 128
33
34
35 /* Operational parameters that usually are not changed. */
36 @@ -201,6 +204,7 @@
37 #include <asm/io.h>
38 #include <asm/irq.h>
39 #include <asm/uaccess.h>
40 +#include <asm/unaligned.h>
41
42 /* These identify the driver base version and may not be removed. */
43 static char version[] __devinitdata =
44 @@ -217,10 +221,8 @@
45 MODULE_DESCRIPTION("VIA Rhine PCI Fast Ethernet driver");
46 MODULE_LICENSE("GPL");
47
48 -module_param(max_interrupt_work, int, 0);
49 module_param(debug, int, 0);
50 module_param(rx_copybreak, int, 0);
51 -MODULE_PARM_DESC(max_interrupt_work, "VIA Rhine maximum events handled per interrupt");
52 MODULE_PARM_DESC(debug, "VIA Rhine debug level (0-7)");
53 MODULE_PARM_DESC(rx_copybreak, "VIA Rhine copy breakpoint for copy-only-tiny-frames");
54
55 @@ -461,6 +463,8 @@
56 struct tx_desc *tx_ring;
57 dma_addr_t rx_ring_dma;
58 dma_addr_t tx_ring_dma;
59 + u32 istat;
60 + u32 imask;
61
62 /* The addresses of receive-in-place skbuffs. */
63 struct sk_buff *rx_skbuff[RX_RING_SIZE];
64 @@ -504,9 +508,10 @@
65 static void rhine_check_media_task(struct net_device *dev);
66 static int rhine_start_tx(struct sk_buff *skb, struct net_device *dev);
67 static irqreturn_t rhine_interrupt(int irq, void *dev_instance, struct pt_regs *regs);
68 -static void rhine_tx(struct net_device *dev);
69 -static void rhine_rx(struct net_device *dev);
70 -static void rhine_error(struct net_device *dev, int intr_status);
71 +static int rhine_poll(struct net_device *dev, int *budget);
72 +static int rhine_tx(struct net_device *dev);
73 +static int rhine_rx(struct net_device *dev);
74 +static void rhine_error(struct net_device *dev);
75 static void rhine_set_rx_mode(struct net_device *dev);
76 static struct net_device_stats *rhine_get_stats(struct net_device *dev);
77 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
78 @@ -601,6 +606,8 @@
79 struct rhine_private *rp = netdev_priv(dev);
80 void __iomem *ioaddr = rp->base;
81
82 + pci_enable_device(rp->pdev);
83 +
84 iowrite8(Cmd1Reset, ioaddr + ChipCmd1);
85 IOSYNC;
86
87 @@ -622,6 +629,28 @@
88 "failed" : "succeeded");
89 }
90
91 +static inline void rhine_intr_enable(struct net_device *dev)
92 +{
93 + struct rhine_private *rp = netdev_priv(dev);
94 + void __iomem *ioaddr = rp->base;
95 +
96 + iowrite16(rp->imask = (IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow |
97 + IntrRxDropped | IntrRxNoBuf | IntrTxAborted |
98 + IntrTxDone | IntrTxError | IntrTxUnderrun |
99 + IntrPCIErr | IntrStatsMax | IntrLinkChange),
100 + ioaddr + IntrEnable);
101 +}
102 +
103 +static inline void rhine_intr_disable(struct net_device *dev)
104 +{
105 + struct rhine_private *rp = netdev_priv(dev);
106 + void __iomem *ioaddr = rp->base;
107 +
108 + iowrite16(rp->imask = (IntrRxOverflow | IntrRxNoBuf | IntrTxAborted |
109 + IntrTxError | IntrTxUnderrun | IntrPCIErr | IntrStatsMax | IntrLinkChange),
110 + ioaddr + IntrEnable);
111 +}
112 +
113 #ifdef USE_MMIO
114 static void enable_mmio(long pioaddr, u32 quirks)
115 {
116 @@ -664,14 +693,26 @@
117
118 }
119
120 -#ifdef CONFIG_NET_POLL_CONTROLLER
121 -static void rhine_poll(struct net_device *dev)
122 +static int rhine_poll(struct net_device *dev, int *budget)
123 {
124 - disable_irq(dev->irq);
125 - rhine_interrupt(dev->irq, (void *)dev, NULL);
126 - enable_irq(dev->irq);
127 + unsigned int work_done, work_to_do = min(*budget, dev->quota);
128 + struct rhine_private *rp = netdev_priv(dev);
129 +
130 + work_done = rhine_rx(dev);
131 +
132 + if (rp->istat & (IntrTxErrSummary | IntrTxDone))
133 + rhine_tx(dev);
134 +
135 + *budget -= work_done;
136 + dev->quota -= work_done;
137 +
138 + if (work_done < work_to_do) {
139 + netif_rx_complete(dev);
140 + rhine_intr_enable(dev);
141 + }
142 +
143 + return (work_done >= work_to_do);
144 }
145 -#endif
146
147 static void rhine_hw_init(struct net_device *dev, long pioaddr)
148 {
149 @@ -850,11 +891,10 @@
150 dev->ethtool_ops = &netdev_ethtool_ops;
151 dev->tx_timeout = rhine_tx_timeout;
152 dev->watchdog_timeo = TX_TIMEOUT;
153 -#ifdef CONFIG_NET_POLL_CONTROLLER
154 - dev->poll_controller = rhine_poll;
155 -#endif
156 - if (rp->quirks & rqRhineI)
157 - dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM;
158 + dev->poll = rhine_poll;
159 + dev->weight = 64;
160 +
161 + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
162
163 INIT_WORK(&rp->tx_timeout_task,
164 (void (*)(void *))rhine_tx_timeout_task, dev);
165 @@ -904,6 +944,10 @@
166 }
167 }
168 rp->mii_if.phy_id = phy_id;
169 +
170 + // shut down until somebody really needs it
171 + iowrite8(0x80, ioaddr + 0xa1);
172 + pci_set_power_state(rp->pdev, 3);
173
174 return 0;
175
176 @@ -995,7 +1039,7 @@
177
178 /* Fill in the Rx buffers. Handle allocation failure gracefully. */
179 for (i = 0; i < RX_RING_SIZE; i++) {
180 - struct sk_buff *skb = dev_alloc_skb(rp->rx_buf_sz);
181 + struct sk_buff *skb = dev_alloc_skb(rp->rx_buf_sz + 4);
182 rp->rx_skbuff[i] = skb;
183 if (skb == NULL)
184 break;
185 @@ -1115,11 +1159,7 @@
186 rhine_set_rx_mode(dev);
187
188 /* Enable interrupts by setting the interrupt mask. */
189 - iowrite16(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow |
190 - IntrRxDropped | IntrRxNoBuf | IntrTxAborted |
191 - IntrTxDone | IntrTxError | IntrTxUnderrun |
192 - IntrPCIErr | IntrStatsMax | IntrLinkChange,
193 - ioaddr + IntrEnable);
194 + rhine_intr_enable(dev);
195
196 iowrite16(CmdStart | CmdTxOn | CmdRxOn | (Cmd1NoTxPoll << 8),
197 ioaddr + ChipCmd);
198 @@ -1230,6 +1270,7 @@
199 mdio_read(dev, rp->mii_if.phy_id, MII_BMSR));
200
201 netif_start_queue(dev);
202 + netif_poll_enable(dev);
203
204 return 0;
205 }
206 @@ -1268,8 +1309,8 @@
207 /* Reinitialize the hardware. */
208 rhine_chip_reset(dev);
209 init_registers(dev);
210 -
211 spin_unlock(&rp->lock);
212 +
213 enable_irq(rp->pdev->irq);
214
215 dev->trans_start = jiffies;
216 @@ -1363,69 +1404,56 @@
217 struct net_device *dev = dev_instance;
218 struct rhine_private *rp = netdev_priv(dev);
219 void __iomem *ioaddr = rp->base;
220 - u32 intr_status;
221 - int boguscnt = max_interrupt_work;
222 int handled = 0;
223
224 - while ((intr_status = get_intr_status(dev))) {
225 + if ((rp->istat = (get_intr_status(dev) & rp->imask))) {
226 handled = 1;
227
228 /* Acknowledge all of the current interrupt sources ASAP. */
229 - if (intr_status & IntrTxDescRace)
230 + if (rp->istat & IntrTxDescRace)
231 iowrite8(0x08, ioaddr + IntrStatus2);
232 - iowrite16(intr_status & 0xffff, ioaddr + IntrStatus);
233 + iowrite16(rp->istat & 0xffff, ioaddr + IntrStatus);
234 IOSYNC;
235
236 - if (debug > 4)
237 - printk(KERN_DEBUG "%s: Interrupt, status %8.8x.\n",
238 - dev->name, intr_status);
239 + if (likely(rp->istat & ((IntrRxDone | IntrRxErr | IntrRxDropped |
240 + IntrRxWakeUp | IntrRxEmpty | IntrRxNoBuf |
241 + IntrTxErrSummary | IntrTxDone)))) {
242 +
243 + rhine_intr_disable(dev);
244
245 - if (intr_status & (IntrRxDone | IntrRxErr | IntrRxDropped |
246 - IntrRxWakeUp | IntrRxEmpty | IntrRxNoBuf))
247 - rhine_rx(dev);
248 -
249 - if (intr_status & (IntrTxErrSummary | IntrTxDone)) {
250 - if (intr_status & IntrTxErrSummary) {
251 - /* Avoid scavenging before Tx engine turned off */
252 - RHINE_WAIT_FOR(!(ioread8(ioaddr+ChipCmd) & CmdTxOn));
253 - if (debug > 2 &&
254 - ioread8(ioaddr+ChipCmd) & CmdTxOn)
255 - printk(KERN_WARNING "%s: "
256 - "rhine_interrupt() Tx engine"
257 - "still on.\n", dev->name);
258 - }
259 - rhine_tx(dev);
260 + if (likely(netif_rx_schedule_prep(dev)))
261 + __netif_rx_schedule(dev);
262 }
263
264 /* Abnormal error summary/uncommon events handlers. */
265 - if (intr_status & (IntrPCIErr | IntrLinkChange |
266 + if (unlikely(rp->istat & (IntrPCIErr | IntrLinkChange |
267 IntrStatsMax | IntrTxError | IntrTxAborted |
268 - IntrTxUnderrun | IntrTxDescRace))
269 - rhine_error(dev, intr_status);
270 -
271 - if (--boguscnt < 0) {
272 - printk(KERN_WARNING "%s: Too much work at interrupt, "
273 - "status=%#8.8x.\n",
274 - dev->name, intr_status);
275 - break;
276 - }
277 + IntrTxUnderrun | IntrTxDescRace)))
278 + rhine_error(dev);
279 }
280
281 - if (debug > 3)
282 - printk(KERN_DEBUG "%s: exiting interrupt, status=%8.8x.\n",
283 - dev->name, ioread16(ioaddr + IntrStatus));
284 return IRQ_RETVAL(handled);
285 }
286
287 /* This routine is logically part of the interrupt handler, but isolated
288 for clarity. */
289 -static void rhine_tx(struct net_device *dev)
290 +static int rhine_tx(struct net_device *dev)
291 {
292 struct rhine_private *rp = netdev_priv(dev);
293 int txstatus = 0, entry = rp->dirty_tx % TX_RING_SIZE;
294 + void __iomem *ioaddr = rp->base;
295 + int done = 0;
296
297 - spin_lock(&rp->lock);
298 + /* Avoid scavenging before Tx engine turned off */
299 + RHINE_WAIT_FOR(!(ioread8(ioaddr+ChipCmd) & CmdTxOn));
300 + if (debug > 2 &&
301 + ioread8(ioaddr+ChipCmd) & CmdTxOn)
302 + printk(KERN_WARNING "%s: "
303 + "rhine_interrupt() Tx engine"
304 + "still on.\n", dev->name);
305
306 +
307 + spin_lock_irq(&rp->lock);
308 /* find and cleanup dirty tx descriptors */
309 while (rp->dirty_tx != rp->cur_tx) {
310 txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status);
311 @@ -1462,6 +1490,7 @@
312 txstatus & 0xF);
313 rp->stats.tx_bytes += rp->tx_skbuff[entry]->len;
314 rp->stats.tx_packets++;
315 + done++;
316 }
317 /* Free the original skb. */
318 if (rp->tx_skbuff_dma[entry]) {
319 @@ -1470,23 +1499,25 @@
320 rp->tx_skbuff[entry]->len,
321 PCI_DMA_TODEVICE);
322 }
323 - dev_kfree_skb_irq(rp->tx_skbuff[entry]);
324 + dev_kfree_skb_any(rp->tx_skbuff[entry]);
325 rp->tx_skbuff[entry] = NULL;
326 entry = (++rp->dirty_tx) % TX_RING_SIZE;
327 }
328 + spin_unlock_irq(&rp->lock);
329 +
330 if ((rp->cur_tx - rp->dirty_tx) < TX_QUEUE_LEN - 4)
331 netif_wake_queue(dev);
332
333 - spin_unlock(&rp->lock);
334 + return done;
335 }
336
337 /* This routine is logically part of the interrupt handler, but isolated
338 for clarity and better register allocation. */
339 -static void rhine_rx(struct net_device *dev)
340 +static int rhine_rx(struct net_device *dev)
341 {
342 struct rhine_private *rp = netdev_priv(dev);
343 int entry = rp->cur_rx % RX_RING_SIZE;
344 - int boguscnt = rp->dirty_rx + RX_RING_SIZE - rp->cur_rx;
345 + int done = 0;
346
347 if (debug > 4) {
348 printk(KERN_DEBUG "%s: rhine_rx(), entry %d status %8.8x.\n",
349 @@ -1503,8 +1534,6 @@
350 if (debug > 4)
351 printk(KERN_DEBUG "rhine_rx() status is %8.8x.\n",
352 desc_status);
353 - if (--boguscnt < 0)
354 - break;
355 if ((desc_status & (RxWholePkt | RxErr)) != RxWholePkt) {
356 if ((desc_status & RxWholePkt) != RxWholePkt) {
357 printk(KERN_WARNING "%s: Oversized Ethernet "
358 @@ -1528,9 +1557,7 @@
359 if (desc_status & 0x0004) rp->stats.rx_frame_errors++;
360 if (desc_status & 0x0002) {
361 /* this can also be updated outside the interrupt handler */
362 - spin_lock(&rp->lock);
363 rp->stats.rx_crc_errors++;
364 - spin_unlock(&rp->lock);
365 }
366 }
367 } else {
368 @@ -1558,6 +1585,7 @@
369 rp->rx_buf_sz,
370 PCI_DMA_FROMDEVICE);
371 } else {
372 + int i;
373 skb = rp->rx_skbuff[entry];
374 if (skb == NULL) {
375 printk(KERN_ERR "%s: Inconsistent Rx "
376 @@ -1566,6 +1594,14 @@
377 break;
378 }
379 rp->rx_skbuff[entry] = NULL;
380 +
381 + /* align the data to the ip header - should be faster than using rx_copybreak */
382 + for (i = pkt_len - (pkt_len % 4); i >= 0; i -= 4) {
383 + put_unaligned(*((u32 *) (skb->data + i)), (u32 *) (skb->data + i + 2));
384 + }
385 + skb->data += 2;
386 + skb->tail += 2;
387 +
388 skb_put(skb, pkt_len);
389 pci_unmap_single(rp->pdev,
390 rp->rx_skbuff_dma[entry],
391 @@ -1573,10 +1609,11 @@
392 PCI_DMA_FROMDEVICE);
393 }
394 skb->protocol = eth_type_trans(skb, dev);
395 - netif_rx(skb);
396 + netif_receive_skb(skb);
397 dev->last_rx = jiffies;
398 rp->stats.rx_bytes += pkt_len;
399 rp->stats.rx_packets++;
400 + done++;
401 }
402 entry = (++rp->cur_rx) % RX_RING_SIZE;
403 rp->rx_head_desc = &rp->rx_ring[entry];
404 @@ -1587,7 +1624,7 @@
405 struct sk_buff *skb;
406 entry = rp->dirty_rx % RX_RING_SIZE;
407 if (rp->rx_skbuff[entry] == NULL) {
408 - skb = dev_alloc_skb(rp->rx_buf_sz);
409 + skb = dev_alloc_skb(rp->rx_buf_sz + 4);
410 rp->rx_skbuff[entry] = skb;
411 if (skb == NULL)
412 break; /* Better luck next round. */
413 @@ -1600,6 +1637,8 @@
414 }
415 rp->rx_ring[entry].rx_status = cpu_to_le32(DescOwn);
416 }
417 +
418 + return done;
419 }
420
421 /*
422 @@ -1649,11 +1688,11 @@
423
424 }
425
426 -static void rhine_error(struct net_device *dev, int intr_status)
427 +static void rhine_error(struct net_device *dev)
428 {
429 struct rhine_private *rp = netdev_priv(dev);
430 void __iomem *ioaddr = rp->base;
431 -
432 + u32 intr_status = rp->istat;
433 spin_lock(&rp->lock);
434
435 if (intr_status & IntrLinkChange)
436 @@ -1898,6 +1937,7 @@
437
438 /* Disable interrupts by clearing the interrupt mask. */
439 iowrite16(0x0000, ioaddr + IntrEnable);
440 + rp->imask = 0;
441
442 /* Stop the chip's Tx and Rx processes. */
443 iowrite16(CmdStop, ioaddr + ChipCmd);
444 @@ -1912,6 +1952,9 @@
445 free_tbufs(dev);
446 free_ring(dev);
447
448 + writeb(0x80, ioaddr + 0xa1);
449 + pci_set_power_state(rp->pdev, 3);
450 +
451 return 0;
452 }
453
454 @@ -1941,6 +1984,7 @@
455 return; /* Nothing to do for non-WOL adapters */
456
457 rhine_power_init(dev);
458 + netif_poll_disable(dev);
459
460 /* Make sure we use pattern 0, 1 and not 4, 5 */
461 if (rp->quirks & rq6patterns)