2 +++ b/drivers/net/imq.c
5 + * Pseudo-driver for the intermediate queue device.
7 + * This program is free software; you can redistribute it and/or
8 + * modify it under the terms of the GNU General Public License
9 + * as published by the Free Software Foundation; either version
10 + * 2 of the License, or (at your option) any later version.
12 + * Authors: Patrick McHardy, <kaber@trash.net>
14 + * The first version was written by Martin Devera, <devik@cdi.cz>
16 + * Credits: Jan Rafaj <imq2t@cedric.vabo.cz>
17 + * - Update patch to 2.4.21
18 + * Sebastian Strollo <sstrollo@nortelnetworks.com>
19 + * - Fix "Dead-loop on netdevice imq"-issue
20 + * Marcel Sebek <sebek64@post.cz>
21 + * - Update to 2.6.2-rc1
23 + * After some time of inactivity there is a group taking care
24 + * of IMQ again: http://www.linuximq.net
27 + * 2004/06/30 - New version of IMQ patch to kernels <=2.6.7
28 + * including the following changes:
30 + * - Correction of ipv6 support "+"s issue (Hasso Tepper)
31 + * - Correction of imq_init_devs() issue that resulted in
32 + * kernel OOPS unloading IMQ as module (Norbert Buchmuller)
33 + * - Addition of functionality to choose number of IMQ devices
34 + * during kernel config (Andre Correa)
35 + * - Addition of functionality to choose how IMQ hooks on
36 + * PRE and POSTROUTING (after or before NAT) (Andre Correa)
37 + * - Cosmetic corrections (Norbert Buchmuller) (Andre Correa)
40 + * 2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were
41 + * released with almost no problems. 2.6.14-x was released
42 + * with some important changes: nfcache was removed; After
43 + * some weeks of trouble we figured out that some IMQ fields
44 + * in skb were missing in skbuff.c - skb_clone and copy_skb_header.
45 + * These functions are correctly patched by this new patch version.
47 + * Thanks for all who helped to figure out all the problems with
48 + * 2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX,
49 + * Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully
50 + * I didn't forget anybody). I apologize again for my lack of time.
53 + * 2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead
54 + * of qdisc_restart() and moved qdisc_run() to tasklet to avoid
55 + * recursive locking. New initialization routines to fix 'rmmod' not
56 + * working anymore. Used code from ifb.c. (Jussi Kivilinna)
58 + * 2008/08/06 - 2.6.26 - (JK)
59 + * - Replaced tasklet with 'netif_schedule()'.
60 + * - Cleaned up and added comments for imq_nf_queue().
62 + * 2009/05/02 - Backported 2.6.27 fixes to 2.6.26 (Jussi Kivilinna)
63 + * - Add skb_save_cb/skb_restore_cb helper functions for backuping
64 + * control buffer. This is needed because some networking layers
65 + * on kernels before 2.6.27 overwrite control buffer when they
66 + * should not. These errornous uses (wireless for example) of cb
67 + * were found when qdisc-layer started using cb in 2.6.27. As we
68 + * don't want to break up any code, even if it's buggy, use
69 + * same backup-cb trick as used with 2.6.27-patch.
70 + * - Add better locking for IMQ device by using spin_lock_bh
71 + * instead of spin_lock. There was problem where NIC-interrupt
72 + * would happen while IMQ-spin_lock was held which could lead to
73 + * deadlock. Hopefully this will solve the SMP issues.
74 + * - Fix rmmod not working.
75 + * - Use netdevice feature flags to avoid extra packet handling
76 + * by core networking layer and possibly increase performance.
78 + * Also, many thanks to pablo Sebastian Greco for making the initial
79 + * patch and to those who helped the testing.
81 + * More info at: http://www.linuximq.net/ (Andre Correa)
84 +#include <linux/module.h>
85 +#include <linux/kernel.h>
86 +#include <linux/moduleparam.h>
87 +#include <linux/list.h>
88 +#include <linux/skbuff.h>
89 +#include <linux/netdevice.h>
90 +#include <linux/etherdevice.h>
91 +#include <linux/rtnetlink.h>
92 +#include <linux/if_arp.h>
93 +#include <linux/netfilter.h>
94 +#include <linux/netfilter_ipv4.h>
95 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
96 + #include <linux/netfilter_ipv6.h>
98 +#include <linux/imq.h>
99 +#include <net/pkt_sched.h>
100 +#include <net/netfilter/nf_queue.h>
102 +static nf_hookfn imq_nf_hook;
104 +static struct nf_hook_ops imq_ingress_ipv4 = {
105 + .hook = imq_nf_hook,
106 + .owner = THIS_MODULE,
108 + .hooknum = NF_INET_PRE_ROUTING,
109 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
110 + .priority = NF_IP_PRI_MANGLE + 1
112 + .priority = NF_IP_PRI_NAT_DST + 1
116 +static struct nf_hook_ops imq_egress_ipv4 = {
117 + .hook = imq_nf_hook,
118 + .owner = THIS_MODULE,
120 + .hooknum = NF_INET_POST_ROUTING,
121 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
122 + .priority = NF_IP_PRI_LAST
124 + .priority = NF_IP_PRI_NAT_SRC - 1
128 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
129 +static struct nf_hook_ops imq_ingress_ipv6 = {
130 + .hook = imq_nf_hook,
131 + .owner = THIS_MODULE,
133 + .hooknum = NF_INET_PRE_ROUTING,
134 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
135 + .priority = NF_IP6_PRI_MANGLE + 1
137 + .priority = NF_IP6_PRI_NAT_DST + 1
141 +static struct nf_hook_ops imq_egress_ipv6 = {
142 + .hook = imq_nf_hook,
143 + .owner = THIS_MODULE,
145 + .hooknum = NF_INET_POST_ROUTING,
146 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
147 + .priority = NF_IP6_PRI_LAST
149 + .priority = NF_IP6_PRI_NAT_SRC - 1
154 +#if defined(CONFIG_IMQ_NUM_DEVS)
155 +static unsigned int numdevs = CONFIG_IMQ_NUM_DEVS;
157 +static unsigned int numdevs = IMQ_MAX_DEVS;
160 +static DEFINE_SPINLOCK(imq_nf_queue_lock);
162 +static struct net_device *imq_devs_cache[IMQ_MAX_DEVS];
164 +static struct net_device_stats *imq_get_stats(struct net_device *dev)
166 + return &dev->stats;
169 +/* called for packets kfree'd in qdiscs at places other than enqueue */
170 +static void imq_skb_destructor(struct sk_buff *skb)
172 + struct nf_queue_entry *entry = skb->nf_queue_entry;
175 + nf_queue_entry_release_refs(entry);
179 + skb_restore_cb(skb); /* kfree backup */
182 +static void imq_nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
186 + if (!entry->next_outfn) {
187 + spin_lock_bh(&imq_nf_queue_lock);
188 + nf_reinject(entry, verdict);
189 + spin_unlock_bh(&imq_nf_queue_lock);
194 + local_bh_disable();
195 + status = entry->next_outfn(entry, entry->next_queuenum);
198 + nf_queue_entry_release_refs(entry);
199 + kfree_skb(entry->skb);
206 +static int imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
208 + dev->stats.tx_bytes += skb->len;
209 + dev->stats.tx_packets++;
211 + skb->imq_flags = 0;
212 + skb->destructor = NULL;
214 + skb_restore_cb(skb); /* restore skb->cb */
216 + dev->trans_start = jiffies;
217 + imq_nf_reinject(skb->nf_queue_entry, NF_ACCEPT);
221 +static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num)
223 + struct net_device *dev;
224 + struct sk_buff *skb_orig, *skb, *skb_shared;
226 + /*spinlock_t *root_lock;*/
228 + int retval = -EINVAL;
230 + index = entry->skb->imq_flags & IMQ_F_IFMASK;
231 + if (unlikely(index > numdevs - 1)) {
232 + if (net_ratelimit())
233 + printk(KERN_WARNING
234 + "IMQ: invalid device specified, highest is %u\n",
240 + /* check for imq device by index from cache */
241 + dev = imq_devs_cache[index];
242 + if (unlikely(!dev)) {
245 + /* get device by name and cache result */
246 + snprintf(buf, sizeof(buf), "imq%d", index);
247 + dev = dev_get_by_name(&init_net, buf);
255 + imq_devs_cache[index] = dev;
259 + if (unlikely(!(dev->flags & IFF_UP))) {
260 + entry->skb->imq_flags = 0;
261 + imq_nf_reinject(entry, NF_ACCEPT);
265 + dev->last_rx = jiffies;
270 + /* skb has owner? => make clone */
271 + if (unlikely(skb->destructor)) {
273 + skb = skb_clone(skb, GFP_ATOMIC);
281 + skb->nf_queue_entry = entry;
283 + dev->stats.rx_bytes += skb->len;
284 + dev->stats.rx_packets++;
286 + q = rcu_dereference(dev->qdisc);
287 + if (unlikely(!q->enqueue))
288 + goto packet_not_eaten_by_imq_dev;
290 + spin_lock_bh(&dev->queue_lock);
292 + users = atomic_read(&skb->users);
294 + skb_shared = skb_get(skb); /* increase reference count by one */
295 + skb_save_cb(skb_shared); /* backup skb->cb, as qdisc layer will
297 + q->enqueue(skb_shared, q); /* might kfree_skb */
299 + if (likely(atomic_read(&skb_shared->users) == users + 1)) {
300 + kfree_skb(skb_shared); /* decrease reference count by one */
302 + skb->destructor = &imq_skb_destructor;
306 + kfree_skb(skb_orig); /* free original */
308 + /* schedule qdisc dequeue */
309 + netif_schedule(dev);
311 + spin_unlock_bh(&dev->queue_lock);
315 + skb_restore_cb(skb_shared); /* restore skb->cb */
316 + /* qdisc dropped packet and decreased skb reference count of
317 + * skb, so we don't really want to and try refree as that would
318 + * actually destroy the skb. */
319 + spin_unlock_bh(&dev->queue_lock);
320 + goto packet_not_eaten_by_imq_dev;
323 +packet_not_eaten_by_imq_dev:
324 + /* cloned? restore original */
327 + entry->skb = skb_orig;
334 +static struct nf_queue_handler nfqh = {
336 + .outfn = imq_nf_queue,
339 +static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb,
340 + const struct net_device *indev,
341 + const struct net_device *outdev,
342 + int (*okfn)(struct sk_buff *))
344 + if (pskb->imq_flags & IMQ_F_ENQUEUE)
350 +static int imq_close(struct net_device *dev)
352 + netif_stop_queue(dev);
356 +static int imq_open(struct net_device *dev)
358 + netif_start_queue(dev);
362 +static void imq_setup(struct net_device *dev)
364 + dev->hard_start_xmit = imq_dev_xmit;
365 + dev->open = imq_open;
366 + dev->get_stats = imq_get_stats;
367 + dev->stop = imq_close;
368 + dev->type = ARPHRD_VOID;
370 + dev->tx_queue_len = 11000;
371 + dev->flags = IFF_NOARP;
372 + dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
373 + NETIF_F_GSO | NETIF_F_HW_CSUM |
377 +static int imq_validate(struct nlattr *tb[], struct nlattr *data[])
381 + if (tb[IFLA_ADDRESS]) {
382 + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
386 + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
387 + ret = -EADDRNOTAVAIL;
393 + printk(KERN_WARNING "IMQ: imq_validate failed (%d)\n", ret);
397 +static struct rtnl_link_ops imq_link_ops __read_mostly = {
400 + .setup = imq_setup,
401 + .validate = imq_validate,
404 +static int __init imq_init_hooks(void)
408 + nf_register_queue_imq_handler(&nfqh);
410 + err = nf_register_hook(&imq_ingress_ipv4);
414 + err = nf_register_hook(&imq_egress_ipv4);
418 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
419 + err = nf_register_hook(&imq_ingress_ipv6);
423 + err = nf_register_hook(&imq_egress_ipv6);
430 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
432 + nf_unregister_hook(&imq_ingress_ipv6);
434 + nf_unregister_hook(&imq_egress_ipv4);
437 + nf_unregister_hook(&imq_ingress_ipv4);
439 + nf_unregister_queue_imq_handler();
443 +static int __init imq_init_one(int index)
445 + struct net_device *dev;
448 + dev = alloc_netdev(0, "imq%d", imq_setup);
452 + ret = dev_alloc_name(dev, dev->name);
456 + dev->rtnl_link_ops = &imq_link_ops;
457 + ret = register_netdevice(dev);
467 +static int __init imq_init_devs(void)
471 + if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) {
472 + printk(KERN_ERR "IMQ: numdevs has to be betweed 1 and %u\n",
478 + err = __rtnl_link_register(&imq_link_ops);
480 + for (i = 0; i < numdevs && !err; i++)
481 + err = imq_init_one(i);
484 + __rtnl_link_unregister(&imq_link_ops);
485 + memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
492 +static int __init imq_init_module(void)
496 +#if defined(CONFIG_IMQ_NUM_DEVS)
497 + BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16);
498 + BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2);
499 + BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK);
502 + err = imq_init_devs();
504 + printk(KERN_ERR "IMQ: Error trying imq_init_devs(net)\n");
508 + err = imq_init_hooks();
510 + printk(KERN_ERR "IMQ: Error trying imq_init_hooks()\n");
511 + rtnl_link_unregister(&imq_link_ops);
512 + memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
516 + printk(KERN_INFO "IMQ driver loaded successfully.\n");
518 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
519 + printk(KERN_INFO "\tHooking IMQ before NAT on PREROUTING.\n");
521 + printk(KERN_INFO "\tHooking IMQ after NAT on PREROUTING.\n");
523 +#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB)
524 + printk(KERN_INFO "\tHooking IMQ before NAT on POSTROUTING.\n");
526 + printk(KERN_INFO "\tHooking IMQ after NAT on POSTROUTING.\n");
532 +static void __exit imq_unhook(void)
534 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
535 + nf_unregister_hook(&imq_ingress_ipv6);
536 + nf_unregister_hook(&imq_egress_ipv6);
538 + nf_unregister_hook(&imq_ingress_ipv4);
539 + nf_unregister_hook(&imq_egress_ipv4);
541 + nf_unregister_queue_imq_handler();
544 +static void __exit imq_cleanup_devs(void)
546 + rtnl_link_unregister(&imq_link_ops);
547 + memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
550 +static void __exit imq_exit_module(void)
553 + imq_cleanup_devs();
554 + printk(KERN_INFO "IMQ driver unloaded successfully.\n");
557 +module_init(imq_init_module);
558 +module_exit(imq_exit_module);
560 +module_param(numdevs, int, 0);
561 +MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will "
563 +MODULE_AUTHOR("http://www.linuximq.net");
564 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See "
565 + "http://www.linuximq.net/ for more information.");
566 +MODULE_LICENSE("GPL");
567 +MODULE_ALIAS_RTNL_LINK("imq");
569 --- a/drivers/net/Kconfig
570 +++ b/drivers/net/Kconfig
571 @@ -117,6 +117,129 @@
572 To compile this driver as a module, choose M here: the module
573 will be called eql. If unsure, say N.
576 + tristate "IMQ (intermediate queueing device) support"
577 + depends on NETDEVICES && NETFILTER
579 + The IMQ device(s) is used as placeholder for QoS queueing
580 + disciplines. Every packet entering/leaving the IP stack can be
581 + directed through the IMQ device where it's enqueued/dequeued to the
582 + attached qdisc. This allows you to treat network devices as classes
583 + and distribute bandwidth among them. Iptables is used to specify
584 + through which IMQ device, if any, packets travel.
586 + More information at: http://www.linuximq.net/
588 + To compile this driver as a module, choose M here: the module
589 + will be called imq. If unsure, say N.
592 + prompt "IMQ behavior (PRE/POSTROUTING)"
594 + default IMQ_BEHAVIOR_AB
597 + This settings defines how IMQ behaves in respect to its
598 + hooking in PREROUTING and POSTROUTING.
600 + IMQ can work in any of the following ways:
602 + PREROUTING | POSTROUTING
603 + -----------------|-------------------
604 + #1 After NAT | After NAT
605 + #2 After NAT | Before NAT
606 + #3 Before NAT | After NAT
607 + #4 Before NAT | Before NAT
609 + The default behavior is to hook before NAT on PREROUTING
610 + and after NAT on POSTROUTING (#3).
612 + This settings are specially usefull when trying to use IMQ
613 + to shape NATed clients.
615 + More information can be found at: www.linuximq.net
617 + If not sure leave the default settings alone.
619 +config IMQ_BEHAVIOR_AA
622 + This settings defines how IMQ behaves in respect to its
623 + hooking in PREROUTING and POSTROUTING.
625 + Choosing this option will make IMQ hook like this:
627 + PREROUTING: After NAT
628 + POSTROUTING: After NAT
630 + More information can be found at: www.linuximq.net
632 + If not sure leave the default settings alone.
634 +config IMQ_BEHAVIOR_AB
637 + This settings defines how IMQ behaves in respect to its
638 + hooking in PREROUTING and POSTROUTING.
640 + Choosing this option will make IMQ hook like this:
642 + PREROUTING: After NAT
643 + POSTROUTING: Before NAT
645 + More information can be found at: www.linuximq.net
647 + If not sure leave the default settings alone.
649 +config IMQ_BEHAVIOR_BA
652 + This settings defines how IMQ behaves in respect to its
653 + hooking in PREROUTING and POSTROUTING.
655 + Choosing this option will make IMQ hook like this:
657 + PREROUTING: Before NAT
658 + POSTROUTING: After NAT
660 + More information can be found at: www.linuximq.net
662 + If not sure leave the default settings alone.
664 +config IMQ_BEHAVIOR_BB
667 + This settings defines how IMQ behaves in respect to its
668 + hooking in PREROUTING and POSTROUTING.
670 + Choosing this option will make IMQ hook like this:
672 + PREROUTING: Before NAT
673 + POSTROUTING: Before NAT
675 + More information can be found at: www.linuximq.net
677 + If not sure leave the default settings alone.
683 + int "Number of IMQ devices"
689 + This settings defines how many IMQ devices will be
692 + The default value is 16.
694 + More information can be found at: www.linuximq.net
696 + If not sure leave the default settings alone.
699 tristate "Universal TUN/TAP device driver support"
701 --- a/drivers/net/Makefile
702 +++ b/drivers/net/Makefile
704 obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
706 obj-$(CONFIG_DUMMY) += dummy.o
707 +obj-$(CONFIG_IMQ) += imq.o
708 obj-$(CONFIG_IFB) += ifb.o
709 obj-$(CONFIG_MACVLAN) += macvlan.o
710 obj-$(CONFIG_DE600) += de600.o
712 +++ b/include/linux/imq.h
717 +/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */
718 +#define IMQ_F_BITS 5
720 +#define IMQ_F_IFMASK 0x0f
721 +#define IMQ_F_ENQUEUE 0x10
723 +#define IMQ_MAX_DEVS (IMQ_F_IFMASK + 1)
728 +++ b/include/linux/netfilter_ipv4/ipt_IMQ.h
733 +/* Backwards compatibility for old userspace */
734 +#include <linux/netfilter/xt_IMQ.h>
736 +#define ipt_imq_info xt_imq_info
738 +#endif /* _IPT_IMQ_H */
741 +++ b/include/linux/netfilter_ipv6/ip6t_IMQ.h
746 +/* Backwards compatibility for old userspace */
747 +#include <linux/netfilter/xt_IMQ.h>
749 +#define ip6t_imq_info xt_imq_info
751 +#endif /* _IP6T_IMQ_H */
753 --- a/include/linux/skbuff.h
754 +++ b/include/linux/skbuff.h
756 #include <linux/rcupdate.h>
757 #include <linux/dmaengine.h>
758 #include <linux/hrtimer.h>
759 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
760 +#include <linux/imq.h>
763 #define HAVE_ALLOC_SKB /* For the drivers to know */
764 #define HAVE_ALIGNABLE_SKB /* Ditto 8) */
766 * first. This is owned by whoever has the skb queued ATM.
769 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
776 struct nf_conntrack *nfct;
777 struct sk_buff *nfct_reasm;
779 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
780 + struct nf_queue_entry *nf_queue_entry;
782 #ifdef CONFIG_BRIDGE_NETFILTER
783 struct nf_bridge_info *nf_bridge;
786 __u8 ndisc_nodetype:2;
789 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
790 + __u8 imq_flags:IMQ_F_BITS;
793 #ifdef CONFIG_NET_DMA
794 dma_cookie_t dma_cookie;
797 #include <asm/system.h>
800 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
801 +extern int skb_save_cb(struct sk_buff *skb);
802 +extern int skb_restore_cb(struct sk_buff *skb);
805 extern void kfree_skb(struct sk_buff *skb);
806 extern void __kfree_skb(struct sk_buff *skb);
807 extern struct sk_buff *__alloc_skb(unsigned int size,
808 @@ -1633,6 +1651,10 @@
809 dst->nfct_reasm = src->nfct_reasm;
810 nf_conntrack_get_reasm(src->nfct_reasm);
812 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
813 + dst->imq_flags = src->imq_flags;
814 + dst->nf_queue_entry = src->nf_queue_entry;
816 #ifdef CONFIG_BRIDGE_NETFILTER
817 dst->nf_bridge = src->nf_bridge;
818 nf_bridge_get(src->nf_bridge);
822 #include <net/net_namespace.h>
823 #include <net/sock.h>
824 #include <linux/rtnetlink.h>
825 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
826 +#include <linux/imq.h>
828 #include <linux/proc_fs.h>
829 #include <linux/seq_file.h>
830 #include <linux/stat.h>
831 @@ -1569,7 +1572,11 @@
832 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
834 if (likely(!skb->next)) {
835 - if (!list_empty(&ptype_all))
836 + if (!list_empty(&ptype_all)
837 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
838 + && !(skb->imq_flags & IMQ_F_ENQUEUE)
841 dev_queue_xmit_nit(skb, dev);
843 if (netif_needs_gso(dev, skb)) {
845 +++ b/include/linux/netfilter/xt_IMQ.h
850 +struct xt_imq_info {
851 + unsigned int todev; /* target imq device */
854 +#endif /* _XT_IMQ_H */
856 --- a/include/net/netfilter/nf_queue.h
857 +++ b/include/net/netfilter/nf_queue.h
859 struct net_device *indev;
860 struct net_device *outdev;
861 int (*okfn)(struct sk_buff *);
863 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
864 + int (*next_outfn)(struct nf_queue_entry *entry,
865 + unsigned int queuenum);
866 + unsigned int next_queuenum;
870 #define nf_queue_entry_reroute(x) ((void *)x + sizeof(struct nf_queue_entry))
872 const struct nf_queue_handler *qh);
873 extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh);
874 extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
875 +extern void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
877 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
878 +extern void nf_register_queue_imq_handler(const struct nf_queue_handler *qh);
879 +extern void nf_unregister_queue_imq_handler(void);
882 #endif /* _NF_QUEUE_H */
883 --- a/net/core/skbuff.c
884 +++ b/net/core/skbuff.c
887 static struct kmem_cache *skbuff_head_cache __read_mostly;
888 static struct kmem_cache *skbuff_fclone_cache __read_mostly;
889 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
890 +static struct kmem_cache *skbuff_cb_store_cache __read_mostly;
893 static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
894 struct pipe_buffer *buf)
899 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
900 +/* Control buffer save/restore for IMQ devices */
901 +struct skb_cb_table {
907 +static DEFINE_SPINLOCK(skb_cb_store_lock);
909 +int skb_save_cb(struct sk_buff *skb)
911 + struct skb_cb_table *next;
913 + next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC);
917 + BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
919 + memcpy(next->cb, skb->cb, sizeof(skb->cb));
920 + next->cb_next = skb->cb_next;
922 + atomic_set(&next->refcnt, 1);
925 + skb->cb_next = next;
928 +EXPORT_SYMBOL(skb_save_cb);
930 +int skb_restore_cb(struct sk_buff *skb)
932 + struct skb_cb_table *next;
937 + next = skb->cb_next;
939 + BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
941 + memcpy(skb->cb, next->cb, sizeof(skb->cb));
942 + skb->cb_next = next->cb_next;
944 + spin_lock(&skb_cb_store_lock);
946 + if (atomic_dec_and_test(&next->refcnt)) {
947 + kmem_cache_free(skbuff_cb_store_cache, next);
950 + spin_unlock(&skb_cb_store_lock);
954 +EXPORT_SYMBOL(skb_restore_cb);
956 +static void skb_copy_stored_cb(struct sk_buff *new, struct sk_buff *old)
958 + struct skb_cb_table *next;
960 + if (!old->cb_next) {
965 + spin_lock(&skb_cb_store_lock);
967 + next = old->cb_next;
968 + atomic_inc(&next->refcnt);
969 + new->cb_next = next;
971 + spin_unlock(&skb_cb_store_lock);
975 /* Pipe buffer operations for a socket. */
976 static struct pipe_buf_operations sock_pipe_buf_ops = {
979 skb->destructor(skb);
981 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
982 + /* This should not happen. When it does, avoid memleak by restoring
983 + the chain of cb-backups. */
984 + while(skb->cb_next != NULL) {
985 + printk(KERN_WARNING "kfree_skb: skb->cb_next: %08x\n",
987 + skb_restore_cb(skb);
990 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
991 nf_conntrack_put(skb->nfct);
992 nf_conntrack_put_reasm(skb->nfct_reasm);
994 new->sp = secpath_get(old->sp);
996 memcpy(new->cb, old->cb, sizeof(old->cb));
997 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
998 + skb_copy_stored_cb(new, old);
1000 new->csum_start = old->csum_start;
1001 new->csum_offset = old->csum_offset;
1002 new->local_df = old->local_df;
1003 @@ -2290,6 +2380,7 @@
1004 nskb->protocol = skb->protocol;
1005 nskb->dst = dst_clone(skb->dst);
1006 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
1007 + skb_copy_stored_cb(nskb, skb);
1008 nskb->pkt_type = skb->pkt_type;
1009 nskb->mac_len = skb->mac_len;
1011 @@ -2371,6 +2462,13 @@
1013 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1015 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1016 + skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache",
1017 + sizeof(struct skb_cb_table),
1019 + SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1025 --- a/net/netfilter/Kconfig
1026 +++ b/net/netfilter/Kconfig
1027 @@ -334,6 +334,18 @@
1029 To compile it as a module, choose M here. If unsure, say N.
1031 +config NETFILTER_XT_TARGET_IMQ
1032 + tristate '"IMQ" target support'
1033 + depends on NETFILTER_XTABLES
1034 + depends on IP_NF_MANGLE || IP6_NF_MANGLE
1036 + default m if NETFILTER_ADVANCED=n
1038 + This option adds a `IMQ' target which is used to specify if and
1039 + to which imq device packets should get enqueued/dequeued.
1041 + To compile it as a module, choose M here. If unsure, say N.
1043 config NETFILTER_XT_TARGET_MARK
1044 tristate '"MARK" target support'
1045 depends on NETFILTER_XTABLES
1046 --- a/net/netfilter/Makefile
1047 +++ b/net/netfilter/Makefile
1049 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
1050 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
1051 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
1052 +obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o
1053 obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
1054 obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
1055 obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
1056 --- a/net/netfilter/nf_queue.c
1057 +++ b/net/netfilter/nf_queue.c
1060 static DEFINE_MUTEX(queue_handler_mutex);
1062 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1063 +static const struct nf_queue_handler *queue_imq_handler;
1065 +void nf_register_queue_imq_handler(const struct nf_queue_handler *qh)
1067 + mutex_lock(&queue_handler_mutex);
1068 + rcu_assign_pointer(queue_imq_handler, qh);
1069 + mutex_unlock(&queue_handler_mutex);
1071 +EXPORT_SYMBOL(nf_register_queue_imq_handler);
1073 +void nf_unregister_queue_imq_handler(void)
1075 + mutex_lock(&queue_handler_mutex);
1076 + rcu_assign_pointer(queue_imq_handler, NULL);
1077 + mutex_unlock(&queue_handler_mutex);
1079 +EXPORT_SYMBOL(nf_unregister_queue_imq_handler);
1082 /* return EBUSY when somebody else is registered, return EEXIST if the
1083 * same handler is registered, return 0 in case of success. */
1084 int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh)
1087 EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
1089 -static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1090 +void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1092 /* Release those devices we held, or Alexey will kill me. */
1095 /* Drop reference to owner of hook which queued us. */
1096 module_put(entry->elem->owner);
1098 +EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
1101 * Any packet that leaves via this function must come back
1102 @@ -121,12 +142,26 @@
1104 const struct nf_afinfo *afinfo;
1105 const struct nf_queue_handler *qh;
1106 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1107 + const struct nf_queue_handler *qih = NULL;
1110 /* QUEUE == DROP if noone is waiting, to be safe. */
1113 qh = rcu_dereference(queue_handler[pf]);
1114 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1115 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
1116 + if (pf == PF_INET || pf == PF_INET6)
1118 + if (pf == PF_INET)
1120 + qih = rcu_dereference(queue_imq_handler);
1128 afinfo = nf_get_afinfo(pf);
1129 @@ -145,6 +180,10 @@
1133 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1134 + .next_outfn = qh ? qh->outfn : NULL,
1135 + .next_queuenum = queuenum,
1139 /* If it's going away, ignore hook. */
1140 @@ -170,8 +209,19 @@
1143 afinfo->saveroute(skb, entry);
1145 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1147 + status = qih->outfn(entry, queuenum);
1148 + goto imq_skip_queue;
1152 status = qh->outfn(entry, queuenum);
1154 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1161 +++ b/net/netfilter/xt_IMQ.c
1164 + * This target marks packets to be enqueued to an imq device
1166 +#include <linux/module.h>
1167 +#include <linux/skbuff.h>
1168 +#include <linux/netfilter/x_tables.h>
1169 +#include <linux/netfilter/xt_IMQ.h>
1170 +#include <linux/imq.h>
1172 +static unsigned int imq_target(struct sk_buff *pskb,
1173 + const struct net_device *in,
1174 + const struct net_device *out,
1175 + unsigned int hooknum,
1176 + const struct xt_target *target,
1177 + const void *targinfo)
1179 + const struct xt_imq_info *mr = targinfo;
1181 + pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE;
1183 + return XT_CONTINUE;
1186 +static bool imq_checkentry(const char *tablename,
1187 + const void *entry,
1188 + const struct xt_target *target,
1190 + unsigned int hook_mask)
1192 + struct xt_imq_info *mr = targinfo;
1194 + if (mr->todev > IMQ_MAX_DEVS - 1) {
1195 + printk(KERN_WARNING
1196 + "IMQ: invalid device specified, highest is %u\n",
1197 + IMQ_MAX_DEVS - 1);
1204 +static struct xt_target xt_imq_reg[] __read_mostly = {
1207 + .family = AF_INET,
1208 + .target = imq_target,
1209 + .targetsize = sizeof(struct xt_imq_info),
1210 + .table = "mangle",
1211 + .checkentry = imq_checkentry,
1216 + .family = AF_INET6,
1217 + .target = imq_target,
1218 + .targetsize = sizeof(struct xt_imq_info),
1219 + .table = "mangle",
1220 + .checkentry = imq_checkentry,
1225 +static int __init imq_init(void)
1227 + return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1230 +static void __exit imq_fini(void)
1232 + xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1235 +module_init(imq_init);
1236 +module_exit(imq_fini);
1238 +MODULE_AUTHOR("http://www.linuximq.net");
1239 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See http://www.linuximq.net/ for more information.");
1240 +MODULE_LICENSE("GPL");
1241 +MODULE_ALIAS("ipt_IMQ");
1242 +MODULE_ALIAS("ip6t_IMQ");