1 From: Pablo Neira Ayuso <pablo@netfilter.org>
2 Date: Sun, 7 Jan 2018 01:04:26 +0100
3 Subject: [PATCH] netfilter: nf_tables: flow offload expression
5 Add new instruction for the nf_tables VM that allows us to specify what
6 flows are offloaded into a given flow table via name. This new
7 instruction creates the flow entry and adds it to the flow table.
9 Only established flows, ie. we have seen traffic in both directions, are
10 added to the flow table. You can still decide to offload entries at a
11 later stage via packet counting or checking the ct status in case you
12 want to offload assured conntracks.
14 This new extension depends on the conntrack subsystem.
16 Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
18 create mode 100644 net/netfilter/nft_flow_offload.c
20 --- a/include/uapi/linux/netfilter/nf_tables.h
21 +++ b/include/uapi/linux/netfilter/nf_tables.h
22 @@ -957,6 +957,17 @@ enum nft_ct_attributes {
24 #define NFTA_CT_MAX (__NFTA_CT_MAX - 1)
27 + * enum nft_flow_attributes - ct offload expression attributes
28 + * @NFTA_FLOW_TABLE_NAME: flow table name (NLA_STRING)
30 +enum nft_offload_attributes {
32 + NFTA_FLOW_TABLE_NAME,
35 +#define NFTA_FLOW_MAX (__NFTA_FLOW_MAX - 1)
40 --- a/net/netfilter/Kconfig
41 +++ b/net/netfilter/Kconfig
42 @@ -515,6 +515,13 @@ config NFT_CT
43 This option adds the "ct" expression that you can use to match
44 connection tracking information such as the flow state.
46 +config NFT_FLOW_OFFLOAD
47 + depends on NF_CONNTRACK
48 + tristate "Netfilter nf_tables hardware flow offload module"
50 + This option adds the "flow_offload" expression that you can use to
51 + choose what flows are placed into the hardware.
54 tristate "Netfilter nf_tables rbtree set module"
56 --- a/net/netfilter/Makefile
57 +++ b/net/netfilter/Makefile
58 @@ -87,6 +87,7 @@ obj-$(CONFIG_NFT_META) += nft_meta.o
59 obj-$(CONFIG_NFT_RT) += nft_rt.o
60 obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
61 obj-$(CONFIG_NFT_CT) += nft_ct.o
62 +obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o
63 obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
64 obj-$(CONFIG_NFT_NAT) += nft_nat.o
65 obj-$(CONFIG_NFT_OBJREF) += nft_objref.o
67 +++ b/net/netfilter/nft_flow_offload.c
69 +#include <linux/kernel.h>
70 +#include <linux/module.h>
71 +#include <linux/init.h>
72 +#include <linux/netlink.h>
73 +#include <linux/netfilter.h>
74 +#include <linux/workqueue.h>
75 +#include <linux/spinlock.h>
76 +#include <linux/netfilter/nf_tables.h>
77 +#include <net/ip.h> /* for ipv4 options. */
78 +#include <net/netfilter/nf_tables.h>
79 +#include <net/netfilter/nf_tables_core.h>
80 +#include <net/netfilter/nf_conntrack_core.h>
81 +#include <linux/netfilter/nf_conntrack_common.h>
82 +#include <net/netfilter/nf_flow_table.h>
84 +struct nft_flow_offload {
85 + struct nft_flowtable *flowtable;
88 +static int nft_flow_route(const struct nft_pktinfo *pkt,
89 + const struct nf_conn *ct,
90 + struct nf_flow_route *route,
91 + enum ip_conntrack_dir dir)
93 + struct dst_entry *this_dst = skb_dst(pkt->skb);
94 + struct dst_entry *other_dst = NULL;
97 + memset(&fl, 0, sizeof(fl));
98 + switch (nft_pf(pkt)) {
100 + fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
103 + fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
107 + nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
111 + route->tuple[dir].dst = this_dst;
112 + route->tuple[dir].ifindex = nft_in(pkt)->ifindex;
113 + route->tuple[!dir].dst = other_dst;
114 + route->tuple[!dir].ifindex = nft_out(pkt)->ifindex;
119 +static bool nft_flow_offload_skip(struct sk_buff *skb)
121 + struct ip_options *opt = &(IPCB(skb)->opt);
123 + if (unlikely(opt->optlen))
125 + if (skb_sec_path(skb))
131 +static void nft_flow_offload_eval(const struct nft_expr *expr,
132 + struct nft_regs *regs,
133 + const struct nft_pktinfo *pkt)
135 + struct nft_flow_offload *priv = nft_expr_priv(expr);
136 + struct nf_flowtable *flowtable = &priv->flowtable->data;
137 + enum ip_conntrack_info ctinfo;
138 + struct nf_flow_route route;
139 + struct flow_offload *flow;
140 + enum ip_conntrack_dir dir;
141 + struct nf_conn *ct;
144 + if (nft_flow_offload_skip(pkt->skb))
147 + ct = nf_ct_get(pkt->skb, &ctinfo);
151 + switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
159 + if (test_bit(IPS_HELPER_BIT, &ct->status))
162 + if (ctinfo == IP_CT_NEW ||
163 + ctinfo == IP_CT_RELATED)
166 + if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
169 + dir = CTINFO2DIR(ctinfo);
170 + if (nft_flow_route(pkt, ct, &route, dir) < 0)
171 + goto err_flow_route;
173 + flow = flow_offload_alloc(ct, &route);
175 + goto err_flow_alloc;
177 + ret = flow_offload_add(flowtable, flow);
184 + flow_offload_free(flow);
186 + dst_release(route.tuple[!dir].dst);
188 + clear_bit(IPS_OFFLOAD_BIT, &ct->status);
190 + regs->verdict.code = NFT_BREAK;
193 +static int nft_flow_offload_validate(const struct nft_ctx *ctx,
194 + const struct nft_expr *expr,
195 + const struct nft_data **data)
197 + unsigned int hook_mask = (1 << NF_INET_FORWARD);
199 + return nft_chain_validate_hooks(ctx->chain, hook_mask);
202 +static int nft_flow_offload_init(const struct nft_ctx *ctx,
203 + const struct nft_expr *expr,
204 + const struct nlattr * const tb[])
206 + struct nft_flow_offload *priv = nft_expr_priv(expr);
207 + u8 genmask = nft_genmask_next(ctx->net);
208 + struct nft_flowtable *flowtable;
210 + if (!tb[NFTA_FLOW_TABLE_NAME])
213 + flowtable = nf_tables_flowtable_lookup(ctx->table,
214 + tb[NFTA_FLOW_TABLE_NAME],
216 + if (IS_ERR(flowtable))
217 + return PTR_ERR(flowtable);
219 + priv->flowtable = flowtable;
222 + return nf_ct_netns_get(ctx->net, ctx->afi->family);
225 +static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
226 + const struct nft_expr *expr)
228 + struct nft_flow_offload *priv = nft_expr_priv(expr);
230 + priv->flowtable->use--;
231 + nf_ct_netns_put(ctx->net, ctx->afi->family);
234 +static int nft_flow_offload_dump(struct sk_buff *skb, const struct nft_expr *expr)
236 + struct nft_flow_offload *priv = nft_expr_priv(expr);
238 + if (nla_put_string(skb, NFTA_FLOW_TABLE_NAME, priv->flowtable->name))
239 + goto nla_put_failure;
247 +static struct nft_expr_type nft_flow_offload_type;
248 +static const struct nft_expr_ops nft_flow_offload_ops = {
249 + .type = &nft_flow_offload_type,
250 + .size = NFT_EXPR_SIZE(sizeof(struct nft_flow_offload)),
251 + .eval = nft_flow_offload_eval,
252 + .init = nft_flow_offload_init,
253 + .destroy = nft_flow_offload_destroy,
254 + .validate = nft_flow_offload_validate,
255 + .dump = nft_flow_offload_dump,
258 +static struct nft_expr_type nft_flow_offload_type __read_mostly = {
259 + .name = "flow_offload",
260 + .ops = &nft_flow_offload_ops,
261 + .maxattr = NFTA_FLOW_MAX,
262 + .owner = THIS_MODULE,
265 +static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data)
267 + struct net_device *dev = data;
269 + if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
272 + flow_offload_dead(flow);
275 +static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable,
278 + nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data);
281 +static int flow_offload_netdev_event(struct notifier_block *this,
282 + unsigned long event, void *ptr)
284 + struct net_device *dev = netdev_notifier_info_to_dev(ptr);
286 + if (event != NETDEV_DOWN)
287 + return NOTIFY_DONE;
289 + nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev);
291 + return NOTIFY_DONE;
294 +static struct notifier_block flow_offload_netdev_notifier = {
295 + .notifier_call = flow_offload_netdev_event,
298 +static int __init nft_flow_offload_module_init(void)
302 + register_netdevice_notifier(&flow_offload_netdev_notifier);
304 + err = nft_register_expr(&nft_flow_offload_type);
306 + goto register_expr;
311 + unregister_netdevice_notifier(&flow_offload_netdev_notifier);
315 +static void __exit nft_flow_offload_module_exit(void)
319 + nft_unregister_expr(&nft_flow_offload_type);
320 + unregister_netdevice_notifier(&flow_offload_netdev_notifier);
323 + nft_flow_table_iterate(net, nft_flow_offload_iterate_cleanup, NULL);
327 +module_init(nft_flow_offload_module_init);
328 +module_exit(nft_flow_offload_module_exit);
330 +MODULE_LICENSE("GPL");
331 +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
332 +MODULE_ALIAS_NFT_EXPR("flow_offload");