[kernel] update imq patches for 2.6.26, 2.6.27, 2.6.28, 2.6.29 and 2.6.30
[openwrt/svn-archive/archive.git] / target / linux / generic-2.6 / patches-2.6.26 / 150-netfilter_imq.patch
1 --- /dev/null
2 +++ b/drivers/net/imq.c
3 @@ -0,0 +1,565 @@
4 +/*
5 + * Pseudo-driver for the intermediate queue device.
6 + *
7 + * This program is free software; you can redistribute it and/or
8 + * modify it under the terms of the GNU General Public License
9 + * as published by the Free Software Foundation; either version
10 + * 2 of the License, or (at your option) any later version.
11 + *
12 + * Authors: Patrick McHardy, <kaber@trash.net>
13 + *
14 + * The first version was written by Martin Devera, <devik@cdi.cz>
15 + *
16 + * Credits: Jan Rafaj <imq2t@cedric.vabo.cz>
17 + * - Update patch to 2.4.21
18 + * Sebastian Strollo <sstrollo@nortelnetworks.com>
19 + * - Fix "Dead-loop on netdevice imq"-issue
20 + * Marcel Sebek <sebek64@post.cz>
21 + * - Update to 2.6.2-rc1
22 + *
23 + * After some time of inactivity there is a group taking care
24 + * of IMQ again: http://www.linuximq.net
25 + *
26 + *
27 + * 2004/06/30 - New version of IMQ patch to kernels <=2.6.7
28 + * including the following changes:
29 + *
30 + * - Correction of ipv6 support "+"s issue (Hasso Tepper)
31 + * - Correction of imq_init_devs() issue that resulted in
32 + * kernel OOPS unloading IMQ as module (Norbert Buchmuller)
33 + * - Addition of functionality to choose number of IMQ devices
34 + * during kernel config (Andre Correa)
35 + * - Addition of functionality to choose how IMQ hooks on
36 + * PRE and POSTROUTING (after or before NAT) (Andre Correa)
37 + * - Cosmetic corrections (Norbert Buchmuller) (Andre Correa)
38 + *
39 + *
40 + * 2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were
41 + * released with almost no problems. 2.6.14-x was released
42 + * with some important changes: nfcache was removed; After
43 + * some weeks of trouble we figured out that some IMQ fields
44 + * in skb were missing in skbuff.c - skb_clone and copy_skb_header.
45 + * These functions are correctly patched by this new patch version.
46 + *
47 + * Thanks for all who helped to figure out all the problems with
48 + * 2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX,
49 + * Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully
50 + * I didn't forget anybody). I apologize again for my lack of time.
51 + *
52 + *
53 + * 2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead
54 + * of qdisc_restart() and moved qdisc_run() to tasklet to avoid
55 + * recursive locking. New initialization routines to fix 'rmmod' not
56 + * working anymore. Used code from ifb.c. (Jussi Kivilinna)
57 + *
58 + * 2008/08/06 - 2.6.26 - (JK)
59 + * - Replaced tasklet with 'netif_schedule()'.
60 + * - Cleaned up and added comments for imq_nf_queue().
61 + *
62 + * 2009/05/02 - Backported 2.6.27 fixes to 2.6.26 (Jussi Kivilinna)
63 + * - Add skb_save_cb/skb_restore_cb helper functions for backuping
64 + * control buffer. This is needed because some networking layers
65 + * on kernels before 2.6.27 overwrite control buffer when they
66 + * should not. These errornous uses (wireless for example) of cb
67 + * were found when qdisc-layer started using cb in 2.6.27. As we
68 + * don't want to break up any code, even if it's buggy, use
69 + * same backup-cb trick as used with 2.6.27-patch.
70 + * - Add better locking for IMQ device by using spin_lock_bh
71 + * instead of spin_lock. There was problem where NIC-interrupt
72 + * would happen while IMQ-spin_lock was held which could lead to
73 + * deadlock. Hopefully this will solve the SMP issues.
74 + * - Fix rmmod not working.
75 + * - Use netdevice feature flags to avoid extra packet handling
76 + * by core networking layer and possibly increase performance.
77 + *
78 + * Also, many thanks to pablo Sebastian Greco for making the initial
79 + * patch and to those who helped the testing.
80 + *
81 + * More info at: http://www.linuximq.net/ (Andre Correa)
82 + */
83 +
84 +#include <linux/module.h>
85 +#include <linux/kernel.h>
86 +#include <linux/moduleparam.h>
87 +#include <linux/list.h>
88 +#include <linux/skbuff.h>
89 +#include <linux/netdevice.h>
90 +#include <linux/etherdevice.h>
91 +#include <linux/rtnetlink.h>
92 +#include <linux/if_arp.h>
93 +#include <linux/netfilter.h>
94 +#include <linux/netfilter_ipv4.h>
95 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
96 + #include <linux/netfilter_ipv6.h>
97 +#endif
98 +#include <linux/imq.h>
99 +#include <net/pkt_sched.h>
100 +#include <net/netfilter/nf_queue.h>
101 +
102 +static nf_hookfn imq_nf_hook;
103 +
104 +static struct nf_hook_ops imq_ingress_ipv4 = {
105 + .hook = imq_nf_hook,
106 + .owner = THIS_MODULE,
107 + .pf = PF_INET,
108 + .hooknum = NF_INET_PRE_ROUTING,
109 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
110 + .priority = NF_IP_PRI_MANGLE + 1
111 +#else
112 + .priority = NF_IP_PRI_NAT_DST + 1
113 +#endif
114 +};
115 +
116 +static struct nf_hook_ops imq_egress_ipv4 = {
117 + .hook = imq_nf_hook,
118 + .owner = THIS_MODULE,
119 + .pf = PF_INET,
120 + .hooknum = NF_INET_POST_ROUTING,
121 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
122 + .priority = NF_IP_PRI_LAST
123 +#else
124 + .priority = NF_IP_PRI_NAT_SRC - 1
125 +#endif
126 +};
127 +
128 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
129 +static struct nf_hook_ops imq_ingress_ipv6 = {
130 + .hook = imq_nf_hook,
131 + .owner = THIS_MODULE,
132 + .pf = PF_INET6,
133 + .hooknum = NF_INET_PRE_ROUTING,
134 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
135 + .priority = NF_IP6_PRI_MANGLE + 1
136 +#else
137 + .priority = NF_IP6_PRI_NAT_DST + 1
138 +#endif
139 +};
140 +
141 +static struct nf_hook_ops imq_egress_ipv6 = {
142 + .hook = imq_nf_hook,
143 + .owner = THIS_MODULE,
144 + .pf = PF_INET6,
145 + .hooknum = NF_INET_POST_ROUTING,
146 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
147 + .priority = NF_IP6_PRI_LAST
148 +#else
149 + .priority = NF_IP6_PRI_NAT_SRC - 1
150 +#endif
151 +};
152 +#endif
153 +
154 +#if defined(CONFIG_IMQ_NUM_DEVS)
155 +static unsigned int numdevs = CONFIG_IMQ_NUM_DEVS;
156 +#else
157 +static unsigned int numdevs = IMQ_MAX_DEVS;
158 +#endif
159 +
160 +static DEFINE_SPINLOCK(imq_nf_queue_lock);
161 +
162 +static struct net_device *imq_devs_cache[IMQ_MAX_DEVS];
163 +
164 +static struct net_device_stats *imq_get_stats(struct net_device *dev)
165 +{
166 + return &dev->stats;
167 +}
168 +
169 +/* called for packets kfree'd in qdiscs at places other than enqueue */
170 +static void imq_skb_destructor(struct sk_buff *skb)
171 +{
172 + struct nf_queue_entry *entry = skb->nf_queue_entry;
173 +
174 + if (entry) {
175 + nf_queue_entry_release_refs(entry);
176 + kfree(entry);
177 + }
178 +
179 + skb_restore_cb(skb); /* kfree backup */
180 +}
181 +
182 +static void imq_nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
183 +{
184 + int status;
185 +
186 + if (!entry->next_outfn) {
187 + spin_lock_bh(&imq_nf_queue_lock);
188 + nf_reinject(entry, verdict);
189 + spin_unlock_bh(&imq_nf_queue_lock);
190 + return;
191 + }
192 +
193 + rcu_read_lock();
194 + local_bh_disable();
195 + status = entry->next_outfn(entry, entry->next_queuenum);
196 + local_bh_enable();
197 + if (status < 0) {
198 + nf_queue_entry_release_refs(entry);
199 + kfree_skb(entry->skb);
200 + kfree(entry);
201 + }
202 +
203 + rcu_read_unlock();
204 +}
205 +
206 +static int imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
207 +{
208 + dev->stats.tx_bytes += skb->len;
209 + dev->stats.tx_packets++;
210 +
211 + skb->imq_flags = 0;
212 + skb->destructor = NULL;
213 +
214 + skb_restore_cb(skb); /* restore skb->cb */
215 +
216 + dev->trans_start = jiffies;
217 + imq_nf_reinject(skb->nf_queue_entry, NF_ACCEPT);
218 + return 0;
219 +}
220 +
221 +static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num)
222 +{
223 + struct net_device *dev;
224 + struct sk_buff *skb_orig, *skb, *skb_shared;
225 + struct Qdisc *q;
226 + /*spinlock_t *root_lock;*/
227 + int users, index;
228 + int retval = -EINVAL;
229 +
230 + index = entry->skb->imq_flags & IMQ_F_IFMASK;
231 + if (unlikely(index > numdevs - 1)) {
232 + if (net_ratelimit())
233 + printk(KERN_WARNING
234 + "IMQ: invalid device specified, highest is %u\n",
235 + numdevs - 1);
236 + retval = -EINVAL;
237 + goto out;
238 + }
239 +
240 + /* check for imq device by index from cache */
241 + dev = imq_devs_cache[index];
242 + if (unlikely(!dev)) {
243 + char buf[8];
244 +
245 + /* get device by name and cache result */
246 + snprintf(buf, sizeof(buf), "imq%d", index);
247 + dev = dev_get_by_name(&init_net, buf);
248 + if (!dev) {
249 + /* not found ?!*/
250 + BUG();
251 + retval = -ENODEV;
252 + goto out;
253 + }
254 +
255 + imq_devs_cache[index] = dev;
256 + dev_put(dev);
257 + }
258 +
259 + if (unlikely(!(dev->flags & IFF_UP))) {
260 + entry->skb->imq_flags = 0;
261 + imq_nf_reinject(entry, NF_ACCEPT);
262 + retval = 0;
263 + goto out;
264 + }
265 + dev->last_rx = jiffies;
266 +
267 + skb = entry->skb;
268 + skb_orig = NULL;
269 +
270 + /* skb has owner? => make clone */
271 + if (unlikely(skb->destructor)) {
272 + skb_orig = skb;
273 + skb = skb_clone(skb, GFP_ATOMIC);
274 + if (!skb) {
275 + retval = -ENOMEM;
276 + goto out;
277 + }
278 + entry->skb = skb;
279 + }
280 +
281 + skb->nf_queue_entry = entry;
282 +
283 + dev->stats.rx_bytes += skb->len;
284 + dev->stats.rx_packets++;
285 +
286 + q = rcu_dereference(dev->qdisc);
287 + if (unlikely(!q->enqueue))
288 + goto packet_not_eaten_by_imq_dev;
289 +
290 + spin_lock_bh(&dev->queue_lock);
291 +
292 + users = atomic_read(&skb->users);
293 +
294 + skb_shared = skb_get(skb); /* increase reference count by one */
295 + skb_save_cb(skb_shared); /* backup skb->cb, as qdisc layer will
296 + overwrite it */
297 + q->enqueue(skb_shared, q); /* might kfree_skb */
298 +
299 + if (likely(atomic_read(&skb_shared->users) == users + 1)) {
300 + kfree_skb(skb_shared); /* decrease reference count by one */
301 +
302 + skb->destructor = &imq_skb_destructor;
303 +
304 + /* cloned? */
305 + if (skb_orig)
306 + kfree_skb(skb_orig); /* free original */
307 +
308 + /* schedule qdisc dequeue */
309 + netif_schedule(dev);
310 +
311 + spin_unlock_bh(&dev->queue_lock);
312 + retval = 0;
313 + goto out;
314 + } else {
315 + skb_restore_cb(skb_shared); /* restore skb->cb */
316 + /* qdisc dropped packet and decreased skb reference count of
317 + * skb, so we don't really want to and try refree as that would
318 + * actually destroy the skb. */
319 + spin_unlock_bh(&dev->queue_lock);
320 + goto packet_not_eaten_by_imq_dev;
321 + }
322 +
323 +packet_not_eaten_by_imq_dev:
324 + /* cloned? restore original */
325 + if (skb_orig) {
326 + kfree_skb(skb);
327 + entry->skb = skb_orig;
328 + }
329 + retval = -1;
330 +out:
331 + return retval;
332 +}
333 +
334 +static struct nf_queue_handler nfqh = {
335 + .name = "imq",
336 + .outfn = imq_nf_queue,
337 +};
338 +
339 +static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb,
340 + const struct net_device *indev,
341 + const struct net_device *outdev,
342 + int (*okfn)(struct sk_buff *))
343 +{
344 + if (pskb->imq_flags & IMQ_F_ENQUEUE)
345 + return NF_QUEUE;
346 +
347 + return NF_ACCEPT;
348 +}
349 +
350 +static int imq_close(struct net_device *dev)
351 +{
352 + netif_stop_queue(dev);
353 + return 0;
354 +}
355 +
356 +static int imq_open(struct net_device *dev)
357 +{
358 + netif_start_queue(dev);
359 + return 0;
360 +}
361 +
362 +static void imq_setup(struct net_device *dev)
363 +{
364 + dev->hard_start_xmit = imq_dev_xmit;
365 + dev->open = imq_open;
366 + dev->get_stats = imq_get_stats;
367 + dev->stop = imq_close;
368 + dev->type = ARPHRD_VOID;
369 + dev->mtu = 16000;
370 + dev->tx_queue_len = 11000;
371 + dev->flags = IFF_NOARP;
372 + dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
373 + NETIF_F_GSO | NETIF_F_HW_CSUM |
374 + NETIF_F_HIGHDMA;
375 +}
376 +
377 +static int imq_validate(struct nlattr *tb[], struct nlattr *data[])
378 +{
379 + int ret = 0;
380 +
381 + if (tb[IFLA_ADDRESS]) {
382 + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
383 + ret = -EINVAL;
384 + goto end;
385 + }
386 + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
387 + ret = -EADDRNOTAVAIL;
388 + goto end;
389 + }
390 + }
391 + return 0;
392 +end:
393 + printk(KERN_WARNING "IMQ: imq_validate failed (%d)\n", ret);
394 + return ret;
395 +}
396 +
397 +static struct rtnl_link_ops imq_link_ops __read_mostly = {
398 + .kind = "imq",
399 + .priv_size = 0,
400 + .setup = imq_setup,
401 + .validate = imq_validate,
402 +};
403 +
404 +static int __init imq_init_hooks(void)
405 +{
406 + int err;
407 +
408 + nf_register_queue_imq_handler(&nfqh);
409 +
410 + err = nf_register_hook(&imq_ingress_ipv4);
411 + if (err)
412 + goto err1;
413 +
414 + err = nf_register_hook(&imq_egress_ipv4);
415 + if (err)
416 + goto err2;
417 +
418 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
419 + err = nf_register_hook(&imq_ingress_ipv6);
420 + if (err)
421 + goto err3;
422 +
423 + err = nf_register_hook(&imq_egress_ipv6);
424 + if (err)
425 + goto err4;
426 +#endif
427 +
428 + return 0;
429 +
430 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
431 +err4:
432 + nf_unregister_hook(&imq_ingress_ipv6);
433 +err3:
434 + nf_unregister_hook(&imq_egress_ipv4);
435 +#endif
436 +err2:
437 + nf_unregister_hook(&imq_ingress_ipv4);
438 +err1:
439 + nf_unregister_queue_imq_handler();
440 + return err;
441 +}
442 +
443 +static int __init imq_init_one(int index)
444 +{
445 + struct net_device *dev;
446 + int ret;
447 +
448 + dev = alloc_netdev(0, "imq%d", imq_setup);
449 + if (!dev)
450 + return -ENOMEM;
451 +
452 + ret = dev_alloc_name(dev, dev->name);
453 + if (ret < 0)
454 + goto fail;
455 +
456 + dev->rtnl_link_ops = &imq_link_ops;
457 + ret = register_netdevice(dev);
458 + if (ret < 0)
459 + goto fail;
460 +
461 + return 0;
462 +fail:
463 + free_netdev(dev);
464 + return ret;
465 +}
466 +
467 +static int __init imq_init_devs(void)
468 +{
469 + int err, i;
470 +
471 + if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) {
472 + printk(KERN_ERR "IMQ: numdevs has to be betweed 1 and %u\n",
473 + IMQ_MAX_DEVS);
474 + return -EINVAL;
475 + }
476 +
477 + rtnl_lock();
478 + err = __rtnl_link_register(&imq_link_ops);
479 +
480 + for (i = 0; i < numdevs && !err; i++)
481 + err = imq_init_one(i);
482 +
483 + if (err) {
484 + __rtnl_link_unregister(&imq_link_ops);
485 + memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
486 + }
487 + rtnl_unlock();
488 +
489 + return err;
490 +}
491 +
492 +static int __init imq_init_module(void)
493 +{
494 + int err;
495 +
496 +#if defined(CONFIG_IMQ_NUM_DEVS)
497 + BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16);
498 + BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2);
499 + BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK);
500 +#endif
501 +
502 + err = imq_init_devs();
503 + if (err) {
504 + printk(KERN_ERR "IMQ: Error trying imq_init_devs(net)\n");
505 + return err;
506 + }
507 +
508 + err = imq_init_hooks();
509 + if (err) {
510 + printk(KERN_ERR "IMQ: Error trying imq_init_hooks()\n");
511 + rtnl_link_unregister(&imq_link_ops);
512 + memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
513 + return err;
514 + }
515 +
516 + printk(KERN_INFO "IMQ driver loaded successfully.\n");
517 +
518 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
519 + printk(KERN_INFO "\tHooking IMQ before NAT on PREROUTING.\n");
520 +#else
521 + printk(KERN_INFO "\tHooking IMQ after NAT on PREROUTING.\n");
522 +#endif
523 +#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB)
524 + printk(KERN_INFO "\tHooking IMQ before NAT on POSTROUTING.\n");
525 +#else
526 + printk(KERN_INFO "\tHooking IMQ after NAT on POSTROUTING.\n");
527 +#endif
528 +
529 + return 0;
530 +}
531 +
532 +static void __exit imq_unhook(void)
533 +{
534 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
535 + nf_unregister_hook(&imq_ingress_ipv6);
536 + nf_unregister_hook(&imq_egress_ipv6);
537 +#endif
538 + nf_unregister_hook(&imq_ingress_ipv4);
539 + nf_unregister_hook(&imq_egress_ipv4);
540 +
541 + nf_unregister_queue_imq_handler();
542 +}
543 +
544 +static void __exit imq_cleanup_devs(void)
545 +{
546 + rtnl_link_unregister(&imq_link_ops);
547 + memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
548 +}
549 +
550 +static void __exit imq_exit_module(void)
551 +{
552 + imq_unhook();
553 + imq_cleanup_devs();
554 + printk(KERN_INFO "IMQ driver unloaded successfully.\n");
555 +}
556 +
557 +module_init(imq_init_module);
558 +module_exit(imq_exit_module);
559 +
560 +module_param(numdevs, int, 0);
561 +MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will "
562 + "be created)");
563 +MODULE_AUTHOR("http://www.linuximq.net");
564 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See "
565 + "http://www.linuximq.net/ for more information.");
566 +MODULE_LICENSE("GPL");
567 +MODULE_ALIAS_RTNL_LINK("imq");
568 +
569 --- a/drivers/net/Kconfig
570 +++ b/drivers/net/Kconfig
571 @@ -117,6 +117,129 @@
572 To compile this driver as a module, choose M here: the module
573 will be called eql. If unsure, say N.
574
575 +config IMQ
576 + tristate "IMQ (intermediate queueing device) support"
577 + depends on NETDEVICES && NETFILTER
578 + ---help---
579 + The IMQ device(s) is used as placeholder for QoS queueing
580 + disciplines. Every packet entering/leaving the IP stack can be
581 + directed through the IMQ device where it's enqueued/dequeued to the
582 + attached qdisc. This allows you to treat network devices as classes
583 + and distribute bandwidth among them. Iptables is used to specify
584 + through which IMQ device, if any, packets travel.
585 +
586 + More information at: http://www.linuximq.net/
587 +
588 + To compile this driver as a module, choose M here: the module
589 + will be called imq. If unsure, say N.
590 +
591 +choice
592 + prompt "IMQ behavior (PRE/POSTROUTING)"
593 + depends on IMQ
594 + default IMQ_BEHAVIOR_AB
595 + help
596 +
597 + This settings defines how IMQ behaves in respect to its
598 + hooking in PREROUTING and POSTROUTING.
599 +
600 + IMQ can work in any of the following ways:
601 +
602 + PREROUTING | POSTROUTING
603 + -----------------|-------------------
604 + #1 After NAT | After NAT
605 + #2 After NAT | Before NAT
606 + #3 Before NAT | After NAT
607 + #4 Before NAT | Before NAT
608 +
609 + The default behavior is to hook before NAT on PREROUTING
610 + and after NAT on POSTROUTING (#3).
611 +
612 + This settings are specially usefull when trying to use IMQ
613 + to shape NATed clients.
614 +
615 + More information can be found at: www.linuximq.net
616 +
617 + If not sure leave the default settings alone.
618 +
619 +config IMQ_BEHAVIOR_AA
620 + bool "IMQ AA"
621 + help
622 + This settings defines how IMQ behaves in respect to its
623 + hooking in PREROUTING and POSTROUTING.
624 +
625 + Choosing this option will make IMQ hook like this:
626 +
627 + PREROUTING: After NAT
628 + POSTROUTING: After NAT
629 +
630 + More information can be found at: www.linuximq.net
631 +
632 + If not sure leave the default settings alone.
633 +
634 +config IMQ_BEHAVIOR_AB
635 + bool "IMQ AB"
636 + help
637 + This settings defines how IMQ behaves in respect to its
638 + hooking in PREROUTING and POSTROUTING.
639 +
640 + Choosing this option will make IMQ hook like this:
641 +
642 + PREROUTING: After NAT
643 + POSTROUTING: Before NAT
644 +
645 + More information can be found at: www.linuximq.net
646 +
647 + If not sure leave the default settings alone.
648 +
649 +config IMQ_BEHAVIOR_BA
650 + bool "IMQ BA"
651 + help
652 + This settings defines how IMQ behaves in respect to its
653 + hooking in PREROUTING and POSTROUTING.
654 +
655 + Choosing this option will make IMQ hook like this:
656 +
657 + PREROUTING: Before NAT
658 + POSTROUTING: After NAT
659 +
660 + More information can be found at: www.linuximq.net
661 +
662 + If not sure leave the default settings alone.
663 +
664 +config IMQ_BEHAVIOR_BB
665 + bool "IMQ BB"
666 + help
667 + This settings defines how IMQ behaves in respect to its
668 + hooking in PREROUTING and POSTROUTING.
669 +
670 + Choosing this option will make IMQ hook like this:
671 +
672 + PREROUTING: Before NAT
673 + POSTROUTING: Before NAT
674 +
675 + More information can be found at: www.linuximq.net
676 +
677 + If not sure leave the default settings alone.
678 +
679 +endchoice
680 +
681 +config IMQ_NUM_DEVS
682 +
683 + int "Number of IMQ devices"
684 + range 2 16
685 + depends on IMQ
686 + default "16"
687 + help
688 +
689 + This settings defines how many IMQ devices will be
690 + created.
691 +
692 + The default value is 16.
693 +
694 + More information can be found at: www.linuximq.net
695 +
696 + If not sure leave the default settings alone.
697 +
698 config TUN
699 tristate "Universal TUN/TAP device driver support"
700 select CRC32
701 --- a/drivers/net/Makefile
702 +++ b/drivers/net/Makefile
703 @@ -142,6 +142,7 @@
704 obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
705
706 obj-$(CONFIG_DUMMY) += dummy.o
707 +obj-$(CONFIG_IMQ) += imq.o
708 obj-$(CONFIG_IFB) += ifb.o
709 obj-$(CONFIG_MACVLAN) += macvlan.o
710 obj-$(CONFIG_DE600) += de600.o
711 --- /dev/null
712 +++ b/include/linux/imq.h
713 @@ -0,0 +1,13 @@
714 +#ifndef _IMQ_H
715 +#define _IMQ_H
716 +
717 +/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */
718 +#define IMQ_F_BITS 5
719 +
720 +#define IMQ_F_IFMASK 0x0f
721 +#define IMQ_F_ENQUEUE 0x10
722 +
723 +#define IMQ_MAX_DEVS (IMQ_F_IFMASK + 1)
724 +
725 +#endif /* _IMQ_H */
726 +
727 --- /dev/null
728 +++ b/include/linux/netfilter_ipv4/ipt_IMQ.h
729 @@ -0,0 +1,10 @@
730 +#ifndef _IPT_IMQ_H
731 +#define _IPT_IMQ_H
732 +
733 +/* Backwards compatibility for old userspace */
734 +#include <linux/netfilter/xt_IMQ.h>
735 +
736 +#define ipt_imq_info xt_imq_info
737 +
738 +#endif /* _IPT_IMQ_H */
739 +
740 --- /dev/null
741 +++ b/include/linux/netfilter_ipv6/ip6t_IMQ.h
742 @@ -0,0 +1,10 @@
743 +#ifndef _IP6T_IMQ_H
744 +#define _IP6T_IMQ_H
745 +
746 +/* Backwards compatibility for old userspace */
747 +#include <linux/netfilter/xt_IMQ.h>
748 +
749 +#define ip6t_imq_info xt_imq_info
750 +
751 +#endif /* _IP6T_IMQ_H */
752 +
753 --- a/include/linux/skbuff.h
754 +++ b/include/linux/skbuff.h
755 @@ -28,6 +28,9 @@
756 #include <linux/rcupdate.h>
757 #include <linux/dmaengine.h>
758 #include <linux/hrtimer.h>
759 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
760 +#include <linux/imq.h>
761 +#endif
762
763 #define HAVE_ALLOC_SKB /* For the drivers to know */
764 #define HAVE_ALIGNABLE_SKB /* Ditto 8) */
765 @@ -270,6 +273,9 @@
766 * first. This is owned by whoever has the skb queued ATM.
767 */
768 char cb[48];
769 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
770 + void *cb_next;
771 +#endif
772
773 unsigned int len,
774 data_len;
775 @@ -300,6 +306,9 @@
776 struct nf_conntrack *nfct;
777 struct sk_buff *nfct_reasm;
778 #endif
779 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
780 + struct nf_queue_entry *nf_queue_entry;
781 +#endif
782 #ifdef CONFIG_BRIDGE_NETFILTER
783 struct nf_bridge_info *nf_bridge;
784 #endif
785 @@ -318,6 +327,9 @@
786 __u8 ndisc_nodetype:2;
787 #endif
788 /* 14 bit hole */
789 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
790 + __u8 imq_flags:IMQ_F_BITS;
791 +#endif
792
793 #ifdef CONFIG_NET_DMA
794 dma_cookie_t dma_cookie;
795 @@ -348,6 +360,12 @@
796
797 #include <asm/system.h>
798
799 +
800 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
801 +extern int skb_save_cb(struct sk_buff *skb);
802 +extern int skb_restore_cb(struct sk_buff *skb);
803 +#endif
804 +
805 extern void kfree_skb(struct sk_buff *skb);
806 extern void __kfree_skb(struct sk_buff *skb);
807 extern struct sk_buff *__alloc_skb(unsigned int size,
808 @@ -1633,6 +1651,10 @@
809 dst->nfct_reasm = src->nfct_reasm;
810 nf_conntrack_get_reasm(src->nfct_reasm);
811 #endif
812 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
813 + dst->imq_flags = src->imq_flags;
814 + dst->nf_queue_entry = src->nf_queue_entry;
815 +#endif
816 #ifdef CONFIG_BRIDGE_NETFILTER
817 dst->nf_bridge = src->nf_bridge;
818 nf_bridge_get(src->nf_bridge);
819 --- a/net/core/dev.c
820 +++ b/net/core/dev.c
821 @@ -95,6 +95,9 @@
822 #include <net/net_namespace.h>
823 #include <net/sock.h>
824 #include <linux/rtnetlink.h>
825 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
826 +#include <linux/imq.h>
827 +#endif
828 #include <linux/proc_fs.h>
829 #include <linux/seq_file.h>
830 #include <linux/stat.h>
831 @@ -1569,7 +1572,11 @@
832 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
833 {
834 if (likely(!skb->next)) {
835 - if (!list_empty(&ptype_all))
836 + if (!list_empty(&ptype_all)
837 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
838 + && !(skb->imq_flags & IMQ_F_ENQUEUE)
839 +#endif
840 + )
841 dev_queue_xmit_nit(skb, dev);
842
843 if (netif_needs_gso(dev, skb)) {
844 --- /dev/null
845 +++ b/include/linux/netfilter/xt_IMQ.h
846 @@ -0,0 +1,9 @@
847 +#ifndef _XT_IMQ_H
848 +#define _XT_IMQ_H
849 +
850 +struct xt_imq_info {
851 + unsigned int todev; /* target imq device */
852 +};
853 +
854 +#endif /* _XT_IMQ_H */
855 +
856 --- a/include/net/netfilter/nf_queue.h
857 +++ b/include/net/netfilter/nf_queue.h
858 @@ -13,6 +13,12 @@
859 struct net_device *indev;
860 struct net_device *outdev;
861 int (*okfn)(struct sk_buff *);
862 +
863 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
864 + int (*next_outfn)(struct nf_queue_entry *entry,
865 + unsigned int queuenum);
866 + unsigned int next_queuenum;
867 +#endif
868 };
869
870 #define nf_queue_entry_reroute(x) ((void *)x + sizeof(struct nf_queue_entry))
871 @@ -30,5 +36,11 @@
872 const struct nf_queue_handler *qh);
873 extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh);
874 extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
875 +extern void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
876 +
877 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
878 +extern void nf_register_queue_imq_handler(const struct nf_queue_handler *qh);
879 +extern void nf_unregister_queue_imq_handler(void);
880 +#endif
881
882 #endif /* _NF_QUEUE_H */
883 --- a/net/core/skbuff.c
884 +++ b/net/core/skbuff.c
885 @@ -71,6 +71,9 @@
886
887 static struct kmem_cache *skbuff_head_cache __read_mostly;
888 static struct kmem_cache *skbuff_fclone_cache __read_mostly;
889 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
890 +static struct kmem_cache *skbuff_cb_store_cache __read_mostly;
891 +#endif
892
893 static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
894 struct pipe_buffer *buf)
895 @@ -94,6 +97,81 @@
896 return 1;
897 }
898
899 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
900 +/* Control buffer save/restore for IMQ devices */
901 +struct skb_cb_table {
902 + void *cb_next;
903 + atomic_t refcnt;
904 + char cb[48];
905 +};
906 +
907 +static DEFINE_SPINLOCK(skb_cb_store_lock);
908 +
909 +int skb_save_cb(struct sk_buff *skb)
910 +{
911 + struct skb_cb_table *next;
912 +
913 + next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC);
914 + if (!next)
915 + return -ENOMEM;
916 +
917 + BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
918 +
919 + memcpy(next->cb, skb->cb, sizeof(skb->cb));
920 + next->cb_next = skb->cb_next;
921 +
922 + atomic_set(&next->refcnt, 1);
923 +
924 +got_next:
925 + skb->cb_next = next;
926 + return 0;
927 +}
928 +EXPORT_SYMBOL(skb_save_cb);
929 +
930 +int skb_restore_cb(struct sk_buff *skb)
931 +{
932 + struct skb_cb_table *next;
933 +
934 + if (!skb->cb_next)
935 + return 0;
936 +
937 + next = skb->cb_next;
938 +
939 + BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
940 +
941 + memcpy(skb->cb, next->cb, sizeof(skb->cb));
942 + skb->cb_next = next->cb_next;
943 +
944 + spin_lock(&skb_cb_store_lock);
945 +
946 + if (atomic_dec_and_test(&next->refcnt)) {
947 + kmem_cache_free(skbuff_cb_store_cache, next);
948 + }
949 +
950 + spin_unlock(&skb_cb_store_lock);
951 +
952 + return 0;
953 +}
954 +EXPORT_SYMBOL(skb_restore_cb);
955 +
956 +static void skb_copy_stored_cb(struct sk_buff *new, struct sk_buff *old)
957 +{
958 + struct skb_cb_table *next;
959 +
960 + if (!old->cb_next) {
961 + new->cb_next = 0;
962 + return;
963 + }
964 +
965 + spin_lock(&skb_cb_store_lock);
966 +
967 + next = old->cb_next;
968 + atomic_inc(&next->refcnt);
969 + new->cb_next = next;
970 +
971 + spin_unlock(&skb_cb_store_lock);
972 +}
973 +#endif
974
975 /* Pipe buffer operations for a socket. */
976 static struct pipe_buf_operations sock_pipe_buf_ops = {
977 @@ -376,6 +454,15 @@
978 WARN_ON(in_irq());
979 skb->destructor(skb);
980 }
981 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
982 + /* This should not happen. When it does, avoid memleak by restoring
983 + the chain of cb-backups. */
984 + while(skb->cb_next != NULL) {
985 + printk(KERN_WARNING "kfree_skb: skb->cb_next: %08x\n",
986 + skb->cb_next);
987 + skb_restore_cb(skb);
988 + }
989 +#endif
990 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
991 nf_conntrack_put(skb->nfct);
992 nf_conntrack_put_reasm(skb->nfct_reasm);
993 @@ -438,6 +525,9 @@
994 new->sp = secpath_get(old->sp);
995 #endif
996 memcpy(new->cb, old->cb, sizeof(old->cb));
997 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
998 + skb_copy_stored_cb(new, old);
999 +#endif
1000 new->csum_start = old->csum_start;
1001 new->csum_offset = old->csum_offset;
1002 new->local_df = old->local_df;
1003 @@ -2290,6 +2380,7 @@
1004 nskb->protocol = skb->protocol;
1005 nskb->dst = dst_clone(skb->dst);
1006 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
1007 + skb_copy_stored_cb(nskb, skb);
1008 nskb->pkt_type = skb->pkt_type;
1009 nskb->mac_len = skb->mac_len;
1010
1011 @@ -2371,6 +2462,13 @@
1012 0,
1013 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1014 NULL);
1015 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1016 + skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache",
1017 + sizeof(struct skb_cb_table),
1018 + 0,
1019 + SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1020 + NULL);
1021 +#endif
1022 }
1023
1024 /**
1025 --- a/net/netfilter/Kconfig
1026 +++ b/net/netfilter/Kconfig
1027 @@ -334,6 +334,18 @@
1028
1029 To compile it as a module, choose M here. If unsure, say N.
1030
1031 +config NETFILTER_XT_TARGET_IMQ
1032 + tristate '"IMQ" target support'
1033 + depends on NETFILTER_XTABLES
1034 + depends on IP_NF_MANGLE || IP6_NF_MANGLE
1035 + select IMQ
1036 + default m if NETFILTER_ADVANCED=n
1037 + help
1038 + This option adds a `IMQ' target which is used to specify if and
1039 + to which imq device packets should get enqueued/dequeued.
1040 +
1041 + To compile it as a module, choose M here. If unsure, say N.
1042 +
1043 config NETFILTER_XT_TARGET_MARK
1044 tristate '"MARK" target support'
1045 depends on NETFILTER_XTABLES
1046 --- a/net/netfilter/Makefile
1047 +++ b/net/netfilter/Makefile
1048 @@ -42,6 +42,7 @@
1049 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
1050 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
1051 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
1052 +obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o
1053 obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
1054 obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
1055 obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
1056 --- a/net/netfilter/nf_queue.c
1057 +++ b/net/netfilter/nf_queue.c
1058 @@ -20,6 +20,26 @@
1059
1060 static DEFINE_MUTEX(queue_handler_mutex);
1061
1062 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1063 +static const struct nf_queue_handler *queue_imq_handler;
1064 +
1065 +void nf_register_queue_imq_handler(const struct nf_queue_handler *qh)
1066 +{
1067 + mutex_lock(&queue_handler_mutex);
1068 + rcu_assign_pointer(queue_imq_handler, qh);
1069 + mutex_unlock(&queue_handler_mutex);
1070 +}
1071 +EXPORT_SYMBOL(nf_register_queue_imq_handler);
1072 +
1073 +void nf_unregister_queue_imq_handler(void)
1074 +{
1075 + mutex_lock(&queue_handler_mutex);
1076 + rcu_assign_pointer(queue_imq_handler, NULL);
1077 + mutex_unlock(&queue_handler_mutex);
1078 +}
1079 +EXPORT_SYMBOL(nf_unregister_queue_imq_handler);
1080 +#endif
1081 +
1082 /* return EBUSY when somebody else is registered, return EEXIST if the
1083 * same handler is registered, return 0 in case of success. */
1084 int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh)
1085 @@ -80,7 +100,7 @@
1086 }
1087 EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
1088
1089 -static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1090 +void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1091 {
1092 /* Release those devices we held, or Alexey will kill me. */
1093 if (entry->indev)
1094 @@ -100,6 +120,7 @@
1095 /* Drop reference to owner of hook which queued us. */
1096 module_put(entry->elem->owner);
1097 }
1098 +EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
1099
1100 /*
1101 * Any packet that leaves via this function must come back
1102 @@ -121,12 +142,26 @@
1103 #endif
1104 const struct nf_afinfo *afinfo;
1105 const struct nf_queue_handler *qh;
1106 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1107 + const struct nf_queue_handler *qih = NULL;
1108 +#endif
1109
1110 /* QUEUE == DROP if noone is waiting, to be safe. */
1111 rcu_read_lock();
1112
1113 qh = rcu_dereference(queue_handler[pf]);
1114 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1115 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1116 + if (pf == PF_INET || pf == PF_INET6)
1117 +#else
1118 + if (pf == PF_INET)
1119 +#endif
1120 + qih = rcu_dereference(queue_imq_handler);
1121 +
1122 + if (!qh && !qih)
1123 +#else /* !IMQ */
1124 if (!qh)
1125 +#endif
1126 goto err_unlock;
1127
1128 afinfo = nf_get_afinfo(pf);
1129 @@ -145,6 +180,10 @@
1130 .indev = indev,
1131 .outdev = outdev,
1132 .okfn = okfn,
1133 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1134 + .next_outfn = qh ? qh->outfn : NULL,
1135 + .next_queuenum = queuenum,
1136 +#endif
1137 };
1138
1139 /* If it's going away, ignore hook. */
1140 @@ -170,8 +209,19 @@
1141 }
1142 #endif
1143 afinfo->saveroute(skb, entry);
1144 +
1145 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1146 + if (qih) {
1147 + status = qih->outfn(entry, queuenum);
1148 + goto imq_skip_queue;
1149 + }
1150 +#endif
1151 +
1152 status = qh->outfn(entry, queuenum);
1153
1154 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1155 +imq_skip_queue:
1156 +#endif
1157 rcu_read_unlock();
1158
1159 if (status < 0) {
1160 --- /dev/null
1161 +++ b/net/netfilter/xt_IMQ.c
1162 @@ -0,0 +1,81 @@
1163 +/*
1164 + * This target marks packets to be enqueued to an imq device
1165 + */
1166 +#include <linux/module.h>
1167 +#include <linux/skbuff.h>
1168 +#include <linux/netfilter/x_tables.h>
1169 +#include <linux/netfilter/xt_IMQ.h>
1170 +#include <linux/imq.h>
1171 +
1172 +static unsigned int imq_target(struct sk_buff *pskb,
1173 + const struct net_device *in,
1174 + const struct net_device *out,
1175 + unsigned int hooknum,
1176 + const struct xt_target *target,
1177 + const void *targinfo)
1178 +{
1179 + const struct xt_imq_info *mr = targinfo;
1180 +
1181 + pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE;
1182 +
1183 + return XT_CONTINUE;
1184 +}
1185 +
1186 +static bool imq_checkentry(const char *tablename,
1187 + const void *entry,
1188 + const struct xt_target *target,
1189 + void *targinfo,
1190 + unsigned int hook_mask)
1191 +{
1192 + struct xt_imq_info *mr = targinfo;
1193 +
1194 + if (mr->todev > IMQ_MAX_DEVS - 1) {
1195 + printk(KERN_WARNING
1196 + "IMQ: invalid device specified, highest is %u\n",
1197 + IMQ_MAX_DEVS - 1);
1198 + return 0;
1199 + }
1200 +
1201 + return 1;
1202 +}
1203 +
1204 +static struct xt_target xt_imq_reg[] __read_mostly = {
1205 + {
1206 + .name = "IMQ",
1207 + .family = AF_INET,
1208 + .target = imq_target,
1209 + .targetsize = sizeof(struct xt_imq_info),
1210 + .table = "mangle",
1211 + .checkentry = imq_checkentry,
1212 + .me = THIS_MODULE
1213 + },
1214 + {
1215 + .name = "IMQ",
1216 + .family = AF_INET6,
1217 + .target = imq_target,
1218 + .targetsize = sizeof(struct xt_imq_info),
1219 + .table = "mangle",
1220 + .checkentry = imq_checkentry,
1221 + .me = THIS_MODULE
1222 + },
1223 +};
1224 +
1225 +static int __init imq_init(void)
1226 +{
1227 + return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1228 +}
1229 +
1230 +static void __exit imq_fini(void)
1231 +{
1232 + xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1233 +}
1234 +
1235 +module_init(imq_init);
1236 +module_exit(imq_fini);
1237 +
1238 +MODULE_AUTHOR("http://www.linuximq.net");
1239 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See http://www.linuximq.net/ for more information.");
1240 +MODULE_LICENSE("GPL");
1241 +MODULE_ALIAS("ipt_IMQ");
1242 +MODULE_ALIAS("ip6t_IMQ");
1243 +