1 Subject: netfilter: conntrack: cache route for forwarded connections
3 ... to avoid per-packet FIB lookup if possible.
5 The cached dst is re-used provided the input interface
6 is the same as that of the previous packet in the same direction.
8 If not, the cached dst is invalidated.
10 For ipv6 we also need to store sernum, else dst_check doesn't work,
11 pointed out by Eric Dumazet.
13 This should speed up forwarding when conntrack is already in use
14 anyway, especially when using reverse path filtering -- active RPF
15 enforces two FIB lookups for each packet.
17 Before the routing cache removal this didn't matter since RPF was performed
18 only when route cache didn't yield a result; but without route cache it
19 comes at higher price.
21 Julian Anastasov suggested to add NETDEV_UNREGISTER handler to
22 avoid holding on to dsts of 'frozen' conntracks.
24 Signed-off-by: Florian Westphal <fw@strlen.de>
26 --- a/include/net/netfilter/nf_conntrack_extend.h
27 +++ b/include/net/netfilter/nf_conntrack_extend.h
28 @@ -30,6 +30,9 @@ enum nf_ct_ext_id {
29 #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
32 +#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE)
38 @@ -43,6 +46,7 @@ enum nf_ct_ext_id {
39 #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout
40 #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels
41 #define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy
42 +#define NF_CT_EXT_RTCACHE_TYPE struct nf_conn_rtcache
44 /* Extensions: optional stuff which isn't permanently in struct. */
47 +++ b/include/net/netfilter/nf_conntrack_rtcache.h
49 +#include <linux/gfp.h>
50 +#include <net/netfilter/nf_conntrack.h>
51 +#include <net/netfilter/nf_conntrack_extend.h>
55 +struct nf_conn_dst_cache {
56 + struct dst_entry *dst;
58 +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
64 +struct nf_conn_rtcache {
65 + struct nf_conn_dst_cache cached_dst[IP_CT_DIR_MAX];
69 +struct nf_conn_rtcache *nf_ct_rtcache_find(const struct nf_conn *ct)
71 +#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE)
72 + return nf_ct_ext_find(ct, NF_CT_EXT_RTCACHE);
78 +static inline int nf_conn_rtcache_iif_get(const struct nf_conn_rtcache *rtc,
79 + enum ip_conntrack_dir dir)
81 + return rtc->cached_dst[dir].iif;
83 --- a/net/netfilter/Kconfig
84 +++ b/net/netfilter/Kconfig
85 @@ -106,6 +106,18 @@ config NF_CONNTRACK_EVENTS
89 +config NF_CONNTRACK_RTCACHE
90 + tristate "Cache route entries in conntrack objects"
91 + depends on NETFILTER_ADVANCED
92 + depends on NF_CONNTRACK
94 + If this option is enabled, the connection tracking code will
95 + cache routing information for each connection that is being
96 + forwarded, at a cost of 32 bytes per conntrack object.
98 + To compile it as a module, choose M here. If unsure, say N.
99 + The module will be called nf_conntrack_rtcache.
101 config NF_CONNTRACK_TIMEOUT
102 bool 'Connection tracking timeout'
103 depends on NETFILTER_ADVANCED
104 --- a/net/netfilter/Makefile
105 +++ b/net/netfilter/Makefile
106 @@ -18,6 +18,9 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += n
107 # connection tracking
108 obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
110 +# optional conntrack route cache extension
111 +obj-$(CONFIG_NF_CONNTRACK_RTCACHE) += nf_conntrack_rtcache.o
113 # SCTP protocol connection tracking
114 obj-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
115 obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
117 +++ b/net/netfilter/nf_conntrack_rtcache.c
119 +/* route cache for netfilter.
121 + * (C) 2014 Red Hat GmbH
123 + * This program is free software; you can redistribute it and/or modify
124 + * it under the terms of the GNU General Public License version 2 as
125 + * published by the Free Software Foundation.
128 +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
130 +#include <linux/types.h>
131 +#include <linux/netfilter.h>
132 +#include <linux/skbuff.h>
133 +#include <linux/stddef.h>
134 +#include <linux/kernel.h>
135 +#include <linux/netdevice.h>
136 +#include <linux/export.h>
137 +#include <linux/module.h>
139 +#include <net/dst.h>
141 +#include <net/netfilter/nf_conntrack.h>
142 +#include <net/netfilter/nf_conntrack_core.h>
143 +#include <net/netfilter/nf_conntrack_extend.h>
144 +#include <net/netfilter/nf_conntrack_rtcache.h>
146 +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
147 +#include <net/ip6_fib.h>
150 +static void __nf_conn_rtcache_destroy(struct nf_conn_rtcache *rtc,
151 + enum ip_conntrack_dir dir)
153 + struct dst_entry *dst = rtc->cached_dst[dir].dst;
158 +static void nf_conn_rtcache_destroy(struct nf_conn *ct)
160 + struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
165 + __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_ORIGINAL);
166 + __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_REPLY);
169 +static void nf_ct_rtcache_ext_add(struct nf_conn *ct)
171 + struct nf_conn_rtcache *rtc;
173 + rtc = nf_ct_ext_add(ct, NF_CT_EXT_RTCACHE, GFP_ATOMIC);
175 + rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif = -1;
176 + rtc->cached_dst[IP_CT_DIR_ORIGINAL].dst = NULL;
177 + rtc->cached_dst[IP_CT_DIR_REPLY].iif = -1;
178 + rtc->cached_dst[IP_CT_DIR_REPLY].dst = NULL;
182 +static struct nf_conn_rtcache *nf_ct_rtcache_find_usable(struct nf_conn *ct)
184 + if (nf_ct_is_untracked(ct))
186 + return nf_ct_rtcache_find(ct);
189 +static struct dst_entry *
190 +nf_conn_rtcache_dst_get(const struct nf_conn_rtcache *rtc,
191 + enum ip_conntrack_dir dir)
193 + return rtc->cached_dst[dir].dst;
196 +static u32 nf_rtcache_get_cookie(int pf, const struct dst_entry *dst)
198 +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
199 + if (pf == NFPROTO_IPV6) {
200 + const struct rt6_info *rt = (const struct rt6_info *)dst;
203 + return (u32)rt->rt6i_node->fn_sernum;
209 +static void nf_conn_rtcache_dst_set(int pf,
210 + struct nf_conn_rtcache *rtc,
211 + struct dst_entry *dst,
212 + enum ip_conntrack_dir dir, int iif)
214 + if (rtc->cached_dst[dir].iif != iif)
215 + rtc->cached_dst[dir].iif = iif;
217 + if (rtc->cached_dst[dir].dst != dst) {
218 + struct dst_entry *old;
222 + old = xchg(&rtc->cached_dst[dir].dst, dst);
225 +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
226 + if (pf == NFPROTO_IPV6)
227 + rtc->cached_dst[dir].cookie =
228 + nf_rtcache_get_cookie(pf, dst);
233 +static void nf_conn_rtcache_dst_obsolete(struct nf_conn_rtcache *rtc,
234 + enum ip_conntrack_dir dir)
236 + struct dst_entry *old;
238 + pr_debug("Invalidate iif %d for dir %d on cache %p\n",
239 + rtc->cached_dst[dir].iif, dir, rtc);
241 + old = xchg(&rtc->cached_dst[dir].dst, NULL);
243 + rtc->cached_dst[dir].iif = -1;
246 +static unsigned int nf_rtcache_in(const struct nf_hook_ops *ops,
247 + struct sk_buff *skb,
248 + const struct net_device *in,
249 + const struct net_device *out,
250 + int (*okfn)(struct sk_buff *))
252 + struct nf_conn_rtcache *rtc;
253 + enum ip_conntrack_info ctinfo;
254 + enum ip_conntrack_dir dir;
255 + struct dst_entry *dst;
256 + struct nf_conn *ct;
260 + if (skb_dst(skb) || skb->sk)
263 + ct = nf_ct_get(skb, &ctinfo);
267 + rtc = nf_ct_rtcache_find_usable(ct);
271 + /* if iif changes, don't use cache and let ip stack
274 + * If rp_filter is enabled it might toss skb, so
275 + * we don't want to avoid these checks.
277 + dir = CTINFO2DIR(ctinfo);
278 + iif = nf_conn_rtcache_iif_get(rtc, dir);
279 + if (in->ifindex != iif) {
280 + pr_debug("ct %p, iif %d, cached iif %d, skip cached entry\n",
281 + ct, iif, in->ifindex);
284 + dst = nf_conn_rtcache_dst_get(rtc, dir);
288 + cookie = nf_rtcache_get_cookie(ops->pf, dst);
290 + dst = dst_check(dst, cookie);
291 + pr_debug("obtained dst %p for skb %p, cookie %d\n", dst, skb, cookie);
293 + skb_dst_set_noref_force(skb, dst);
295 + nf_conn_rtcache_dst_obsolete(rtc, dir);
300 +static unsigned int nf_rtcache_forward(const struct nf_hook_ops *ops,
301 + struct sk_buff *skb,
302 + const struct net_device *in,
303 + const struct net_device *out,
304 + int (*okfn)(struct sk_buff *))
306 + struct nf_conn_rtcache *rtc;
307 + enum ip_conntrack_info ctinfo;
308 + enum ip_conntrack_dir dir;
309 + struct nf_conn *ct;
310 + struct dst_entry *dst = skb_dst(skb);
313 + ct = nf_ct_get(skb, &ctinfo);
317 + if (dst && dst_xfrm(dst))
320 + if (!nf_ct_is_confirmed(ct)) {
321 + if (WARN_ON(nf_ct_rtcache_find(ct)))
323 + nf_ct_rtcache_ext_add(ct);
327 + rtc = nf_ct_rtcache_find_usable(ct);
331 + dir = CTINFO2DIR(ctinfo);
332 + iif = nf_conn_rtcache_iif_get(rtc, dir);
333 + pr_debug("ct %p, skb %p, dir %d, iif %d, cached iif %d\n",
334 + ct, skb, dir, iif, in->ifindex);
335 + if (likely(in->ifindex == iif))
338 + nf_conn_rtcache_dst_set(ops->pf, rtc, skb_dst(skb), dir, in->ifindex);
342 +static int nf_rtcache_dst_remove(struct nf_conn *ct, void *data)
344 + struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
345 + struct net_device *dev = data;
350 + if (dev->ifindex == rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif ||
351 + dev->ifindex == rtc->cached_dst[IP_CT_DIR_REPLY].iif) {
352 + nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_ORIGINAL);
353 + nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_REPLY);
359 +static int nf_rtcache_netdev_event(struct notifier_block *this,
360 + unsigned long event, void *ptr)
362 + struct net_device *dev = netdev_notifier_info_to_dev(ptr);
363 + struct net *net = dev_net(dev);
365 + if (event == NETDEV_DOWN)
366 + nf_ct_iterate_cleanup(net, nf_rtcache_dst_remove, dev, 0, 0);
368 + return NOTIFY_DONE;
371 +static struct notifier_block nf_rtcache_notifier = {
372 + .notifier_call = nf_rtcache_netdev_event,
375 +static struct nf_hook_ops rtcache_ops[] = {
377 + .hook = nf_rtcache_in,
378 + .owner = THIS_MODULE,
379 + .pf = NFPROTO_IPV4,
380 + .hooknum = NF_INET_PRE_ROUTING,
381 + .priority = NF_IP_PRI_LAST,
384 + .hook = nf_rtcache_forward,
385 + .owner = THIS_MODULE,
386 + .pf = NFPROTO_IPV4,
387 + .hooknum = NF_INET_FORWARD,
388 + .priority = NF_IP_PRI_LAST,
390 +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
392 + .hook = nf_rtcache_in,
393 + .owner = THIS_MODULE,
394 + .pf = NFPROTO_IPV6,
395 + .hooknum = NF_INET_PRE_ROUTING,
396 + .priority = NF_IP_PRI_LAST,
399 + .hook = nf_rtcache_forward,
400 + .owner = THIS_MODULE,
401 + .pf = NFPROTO_IPV6,
402 + .hooknum = NF_INET_FORWARD,
403 + .priority = NF_IP_PRI_LAST,
408 +static struct nf_ct_ext_type rtcache_extend __read_mostly = {
409 + .len = sizeof(struct nf_conn_rtcache),
410 + .align = __alignof__(struct nf_conn_rtcache),
411 + .id = NF_CT_EXT_RTCACHE,
412 + .destroy = nf_conn_rtcache_destroy,
415 +static int __init nf_conntrack_rtcache_init(void)
417 + int ret = nf_ct_extend_register(&rtcache_extend);
420 + pr_err("nf_conntrack_rtcache: Unable to register extension\n");
424 + ret = nf_register_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops));
426 + nf_ct_extend_unregister(&rtcache_extend);
430 + ret = register_netdevice_notifier(&nf_rtcache_notifier);
432 + nf_unregister_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops));
433 + nf_ct_extend_unregister(&rtcache_extend);
439 +static int nf_rtcache_ext_remove(struct nf_conn *ct, void *data)
441 + struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
443 + return rtc != NULL;
446 +static bool __exit nf_conntrack_rtcache_wait_for_dying(struct net *net)
451 + for_each_possible_cpu(cpu) {
452 + struct nf_conntrack_tuple_hash *h;
453 + struct hlist_nulls_node *n;
454 + struct nf_conn *ct;
455 + struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
458 + spin_lock_bh(&pcpu->lock);
460 + hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
461 + ct = nf_ct_tuplehash_to_ctrack(h);
462 + if (nf_ct_rtcache_find(ct) != NULL) {
467 + spin_unlock_bh(&pcpu->lock);
474 +static void __exit nf_conntrack_rtcache_fini(void)
479 + /* remove hooks so no new connections get rtcache extension */
480 + nf_unregister_hooks(rtcache_ops, ARRAY_SIZE(rtcache_ops));
484 + unregister_netdevice_notifier(&nf_rtcache_notifier);
488 + /* zap all conntracks with rtcache extension */
490 + nf_ct_iterate_cleanup(net, nf_rtcache_ext_remove, NULL, 0, 0);
492 + for_each_net(net) {
493 + /* .. and make sure they're gone from dying list, too */
494 + while (nf_conntrack_rtcache_wait_for_dying(net)) {
496 + WARN_ONCE(++count > 25, "Waiting for all rtcache conntracks to go away\n");
502 + nf_ct_extend_unregister(&rtcache_extend);
504 +module_init(nf_conntrack_rtcache_init);
505 +module_exit(nf_conntrack_rtcache_fini);
507 +MODULE_LICENSE("GPL");
508 +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
509 +MODULE_DESCRIPTION("Conntrack route cache extension");