kernel: bump 4.19 to 4.19.84
[openwrt/staging/wigyori.git] / target / linux / generic / pending-4.19 / 640-netfilter-nf_flow_table-add-hardware-offload-support.patch
1 From: Pablo Neira Ayuso <pablo@netfilter.org>
2 Date: Thu, 11 Jan 2018 16:32:00 +0100
3 Subject: [PATCH] netfilter: nf_flow_table: add hardware offload support
4
5 This patch adds the infrastructure to offload flows to hardware, in case
6 the nic/switch comes with built-in flow tables capabilities.
7
8 If the hardware comes with no hardware flow tables or they have
9 limitations in terms of features, the existing infrastructure falls back
10 to the software flow table implementation.
11
12 The software flow table garbage collector skips entries that resides in
13 the hardware, so the hardware will be responsible for releasing this
14 flow table entry too via flow_offload_dead().
15
16 Hardware configuration, either to add or to delete entries, is done from
17 the hardware offload workqueue, to ensure this is done from user context
18 given that we may sleep when grabbing the mdio mutex.
19
20 Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
21 ---
22 create mode 100644 net/netfilter/nf_flow_table_hw.c
23
24 --- a/include/linux/netdevice.h
25 +++ b/include/linux/netdevice.h
26 @@ -918,6 +918,13 @@ struct dev_ifalias {
27 char ifalias[];
28 };
29
30 +struct flow_offload;
31 +
32 +enum flow_offload_type {
33 + FLOW_OFFLOAD_ADD = 0,
34 + FLOW_OFFLOAD_DEL,
35 +};
36 +
37 /*
38 * This structure defines the management hooks for network devices.
39 * The following hooks can be defined; unless noted otherwise, they are
40 @@ -1150,6 +1157,10 @@ struct dev_ifalias {
41 * int (*ndo_bridge_dellink)(struct net_device *dev, struct nlmsghdr *nlh,
42 * u16 flags);
43 *
44 + * int (*ndo_flow_offload)(enum flow_offload_type type,
45 + * struct flow_offload *flow);
46 + * Adds/deletes flow entry to/from net device flowtable.
47 + *
48 * int (*ndo_change_carrier)(struct net_device *dev, bool new_carrier);
49 * Called to change device carrier. Soft-devices (like dummy, team, etc)
50 * which do not represent real hardware may define this to allow their
51 @@ -1377,6 +1388,8 @@ struct net_device_ops {
52 int (*ndo_bridge_dellink)(struct net_device *dev,
53 struct nlmsghdr *nlh,
54 u16 flags);
55 + int (*ndo_flow_offload)(enum flow_offload_type type,
56 + struct flow_offload *flow);
57 int (*ndo_change_carrier)(struct net_device *dev,
58 bool new_carrier);
59 int (*ndo_get_phys_port_id)(struct net_device *dev,
60 --- a/include/net/netfilter/nf_flow_table.h
61 +++ b/include/net/netfilter/nf_flow_table.h
62 @@ -20,11 +20,17 @@ struct nf_flowtable_type {
63 struct module *owner;
64 };
65
66 +enum nf_flowtable_flags {
67 + NF_FLOWTABLE_F_HW = 0x1,
68 +};
69 +
70 struct nf_flowtable {
71 struct list_head list;
72 struct rhashtable rhashtable;
73 const struct nf_flowtable_type *type;
74 + u32 flags;
75 struct delayed_work gc_work;
76 + possible_net_t ft_net;
77 };
78
79 enum flow_offload_tuple_dir {
80 @@ -69,6 +75,7 @@ struct flow_offload_tuple_rhash {
81 #define FLOW_OFFLOAD_DNAT 0x2
82 #define FLOW_OFFLOAD_DYING 0x4
83 #define FLOW_OFFLOAD_TEARDOWN 0x8
84 +#define FLOW_OFFLOAD_HW 0x10
85
86 struct flow_offload {
87 struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
88 @@ -125,6 +132,22 @@ unsigned int nf_flow_offload_ip_hook(voi
89 unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
90 const struct nf_hook_state *state);
91
92 +void nf_flow_offload_hw_add(struct net *net, struct flow_offload *flow,
93 + struct nf_conn *ct);
94 +void nf_flow_offload_hw_del(struct net *net, struct flow_offload *flow);
95 +
96 +struct nf_flow_table_hw {
97 + struct module *owner;
98 + void (*add)(struct net *net, struct flow_offload *flow,
99 + struct nf_conn *ct);
100 + void (*del)(struct net *net, struct flow_offload *flow);
101 +};
102 +
103 +int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload);
104 +void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload);
105 +
106 +extern struct work_struct nf_flow_offload_hw_work;
107 +
108 #define MODULE_ALIAS_NF_FLOWTABLE(family) \
109 MODULE_ALIAS("nf-flowtable-" __stringify(family))
110
111 --- a/include/uapi/linux/netfilter/nf_tables.h
112 +++ b/include/uapi/linux/netfilter/nf_tables.h
113 @@ -1464,6 +1464,7 @@ enum nft_object_attributes {
114 * @NFTA_FLOWTABLE_HOOK: netfilter hook configuration(NLA_U32)
115 * @NFTA_FLOWTABLE_USE: number of references to this flow table (NLA_U32)
116 * @NFTA_FLOWTABLE_HANDLE: object handle (NLA_U64)
117 + * @NFTA_FLOWTABLE_FLAGS: flags (NLA_U32)
118 */
119 enum nft_flowtable_attributes {
120 NFTA_FLOWTABLE_UNSPEC,
121 @@ -1473,6 +1474,7 @@ enum nft_flowtable_attributes {
122 NFTA_FLOWTABLE_USE,
123 NFTA_FLOWTABLE_HANDLE,
124 NFTA_FLOWTABLE_PAD,
125 + NFTA_FLOWTABLE_FLAGS,
126 __NFTA_FLOWTABLE_MAX
127 };
128 #define NFTA_FLOWTABLE_MAX (__NFTA_FLOWTABLE_MAX - 1)
129 --- a/net/netfilter/Kconfig
130 +++ b/net/netfilter/Kconfig
131 @@ -714,6 +714,15 @@ config NF_FLOW_TABLE
132
133 To compile it as a module, choose M here.
134
135 +config NF_FLOW_TABLE_HW
136 + tristate "Netfilter flow table hardware offload module"
137 + depends on NF_FLOW_TABLE
138 + help
139 + This option adds hardware offload support for the flow table core
140 + infrastructure.
141 +
142 + To compile it as a module, choose M here.
143 +
144 config NETFILTER_XTABLES
145 tristate "Netfilter Xtables support (required for ip_tables)"
146 default m if NETFILTER_ADVANCED=n
147 --- a/net/netfilter/Makefile
148 +++ b/net/netfilter/Makefile
149 @@ -126,6 +126,7 @@ obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_t
150 nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
151
152 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
153 +obj-$(CONFIG_NF_FLOW_TABLE_HW) += nf_flow_table_hw.o
154
155 # generic X tables
156 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
157 --- a/net/netfilter/nf_flow_table_core.c
158 +++ b/net/netfilter/nf_flow_table_core.c
159 @@ -228,10 +228,16 @@ int flow_offload_add(struct nf_flowtable
160 }
161 EXPORT_SYMBOL_GPL(flow_offload_add);
162
163 +static inline bool nf_flow_in_hw(const struct flow_offload *flow)
164 +{
165 + return flow->flags & FLOW_OFFLOAD_HW;
166 +}
167 +
168 static void flow_offload_del(struct nf_flowtable *flow_table,
169 struct flow_offload *flow)
170 {
171 struct flow_offload_entry *e;
172 + struct net *net = read_pnet(&flow_table->ft_net);
173
174 rhashtable_remove_fast(&flow_table->rhashtable,
175 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
176 @@ -246,6 +252,9 @@ static void flow_offload_del(struct nf_f
177 if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
178 flow_offload_fixup_ct_state(e->ct);
179
180 + if (nf_flow_in_hw(flow))
181 + nf_flow_offload_hw_del(net, flow);
182 +
183 flow_offload_free(flow);
184 }
185
186 @@ -359,6 +368,9 @@ static int nf_flow_offload_gc_step(struc
187 if (!teardown)
188 nf_ct_offload_timeout(flow);
189
190 + if (nf_flow_in_hw(flow) && !teardown)
191 + continue;
192 +
193 if (nf_flow_has_expired(flow) || teardown)
194 flow_offload_del(flow_table, flow);
195 }
196 @@ -494,10 +506,43 @@ int nf_flow_dnat_port(const struct flow_
197 }
198 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
199
200 +static const struct nf_flow_table_hw __rcu *nf_flow_table_hw_hook __read_mostly;
201 +
202 +static int nf_flow_offload_hw_init(struct nf_flowtable *flow_table)
203 +{
204 + const struct nf_flow_table_hw *offload;
205 +
206 + if (!rcu_access_pointer(nf_flow_table_hw_hook))
207 + request_module("nf-flow-table-hw");
208 +
209 + rcu_read_lock();
210 + offload = rcu_dereference(nf_flow_table_hw_hook);
211 + if (!offload)
212 + goto err_no_hw_offload;
213 +
214 + if (!try_module_get(offload->owner))
215 + goto err_no_hw_offload;
216 +
217 + rcu_read_unlock();
218 +
219 + return 0;
220 +
221 +err_no_hw_offload:
222 + rcu_read_unlock();
223 +
224 + return -EOPNOTSUPP;
225 +}
226 +
227 int nf_flow_table_init(struct nf_flowtable *flowtable)
228 {
229 int err;
230
231 + if (flowtable->flags & NF_FLOWTABLE_F_HW) {
232 + err = nf_flow_offload_hw_init(flowtable);
233 + if (err)
234 + return err;
235 + }
236 +
237 INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
238
239 err = rhashtable_init(&flowtable->rhashtable,
240 @@ -535,6 +580,8 @@ static void nf_flow_table_iterate_cleanu
241 {
242 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
243 flush_delayed_work(&flowtable->gc_work);
244 + if (flowtable->flags & NF_FLOWTABLE_F_HW)
245 + flush_work(&nf_flow_offload_hw_work);
246 }
247
248 void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
249 @@ -548,6 +595,26 @@ void nf_flow_table_cleanup(struct net *n
250 }
251 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
252
253 +struct work_struct nf_flow_offload_hw_work;
254 +EXPORT_SYMBOL_GPL(nf_flow_offload_hw_work);
255 +
256 +/* Give the hardware workqueue the chance to remove entries from hardware.*/
257 +static void nf_flow_offload_hw_free(struct nf_flowtable *flowtable)
258 +{
259 + const struct nf_flow_table_hw *offload;
260 +
261 + flush_work(&nf_flow_offload_hw_work);
262 +
263 + rcu_read_lock();
264 + offload = rcu_dereference(nf_flow_table_hw_hook);
265 + if (!offload) {
266 + rcu_read_unlock();
267 + return;
268 + }
269 + module_put(offload->owner);
270 + rcu_read_unlock();
271 +}
272 +
273 void nf_flow_table_free(struct nf_flowtable *flow_table)
274 {
275 mutex_lock(&flowtable_lock);
276 @@ -557,9 +624,58 @@ void nf_flow_table_free(struct nf_flowta
277 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
278 WARN_ON(!nf_flow_offload_gc_step(flow_table));
279 rhashtable_destroy(&flow_table->rhashtable);
280 + if (flow_table->flags & NF_FLOWTABLE_F_HW)
281 + nf_flow_offload_hw_free(flow_table);
282 }
283 EXPORT_SYMBOL_GPL(nf_flow_table_free);
284
285 +/* Must be called from user context. */
286 +void nf_flow_offload_hw_add(struct net *net, struct flow_offload *flow,
287 + struct nf_conn *ct)
288 +{
289 + const struct nf_flow_table_hw *offload;
290 +
291 + rcu_read_lock();
292 + offload = rcu_dereference(nf_flow_table_hw_hook);
293 + if (offload)
294 + offload->add(net, flow, ct);
295 + rcu_read_unlock();
296 +}
297 +EXPORT_SYMBOL_GPL(nf_flow_offload_hw_add);
298 +
299 +/* Must be called from user context. */
300 +void nf_flow_offload_hw_del(struct net *net, struct flow_offload *flow)
301 +{
302 + const struct nf_flow_table_hw *offload;
303 +
304 + rcu_read_lock();
305 + offload = rcu_dereference(nf_flow_table_hw_hook);
306 + if (offload)
307 + offload->del(net, flow);
308 + rcu_read_unlock();
309 +}
310 +EXPORT_SYMBOL_GPL(nf_flow_offload_hw_del);
311 +
312 +int nf_flow_table_hw_register(const struct nf_flow_table_hw *offload)
313 +{
314 + if (rcu_access_pointer(nf_flow_table_hw_hook))
315 + return -EBUSY;
316 +
317 + rcu_assign_pointer(nf_flow_table_hw_hook, offload);
318 +
319 + return 0;
320 +}
321 +EXPORT_SYMBOL_GPL(nf_flow_table_hw_register);
322 +
323 +void nf_flow_table_hw_unregister(const struct nf_flow_table_hw *offload)
324 +{
325 + WARN_ON(rcu_access_pointer(nf_flow_table_hw_hook) != offload);
326 + rcu_assign_pointer(nf_flow_table_hw_hook, NULL);
327 +
328 + synchronize_rcu();
329 +}
330 +EXPORT_SYMBOL_GPL(nf_flow_table_hw_unregister);
331 +
332 static int nf_flow_table_netdev_event(struct notifier_block *this,
333 unsigned long event, void *ptr)
334 {
335 --- /dev/null
336 +++ b/net/netfilter/nf_flow_table_hw.c
337 @@ -0,0 +1,169 @@
338 +#include <linux/kernel.h>
339 +#include <linux/init.h>
340 +#include <linux/module.h>
341 +#include <linux/netfilter.h>
342 +#include <linux/rhashtable.h>
343 +#include <linux/netdevice.h>
344 +#include <net/netfilter/nf_flow_table.h>
345 +#include <net/netfilter/nf_conntrack.h>
346 +#include <net/netfilter/nf_conntrack_core.h>
347 +#include <net/netfilter/nf_conntrack_tuple.h>
348 +
349 +static DEFINE_SPINLOCK(flow_offload_hw_pending_list_lock);
350 +static LIST_HEAD(flow_offload_hw_pending_list);
351 +
352 +static DEFINE_MUTEX(nf_flow_offload_hw_mutex);
353 +
354 +struct flow_offload_hw {
355 + struct list_head list;
356 + enum flow_offload_type type;
357 + struct flow_offload *flow;
358 + struct nf_conn *ct;
359 + possible_net_t flow_hw_net;
360 +};
361 +
362 +static int do_flow_offload_hw(struct net *net, struct flow_offload *flow,
363 + int type)
364 +{
365 + struct net_device *indev;
366 + int ret, ifindex;
367 +
368 + ifindex = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx;
369 + indev = dev_get_by_index(net, ifindex);
370 + if (WARN_ON(!indev))
371 + return 0;
372 +
373 + mutex_lock(&nf_flow_offload_hw_mutex);
374 + ret = indev->netdev_ops->ndo_flow_offload(type, flow);
375 + mutex_unlock(&nf_flow_offload_hw_mutex);
376 +
377 + dev_put(indev);
378 +
379 + return ret;
380 +}
381 +
382 +static void flow_offload_hw_work_add(struct flow_offload_hw *offload)
383 +{
384 + struct net *net;
385 + int ret;
386 +
387 + if (nf_ct_is_dying(offload->ct))
388 + return;
389 +
390 + net = read_pnet(&offload->flow_hw_net);
391 + ret = do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_ADD);
392 + if (ret >= 0)
393 + offload->flow->flags |= FLOW_OFFLOAD_HW;
394 +}
395 +
396 +static void flow_offload_hw_work_del(struct flow_offload_hw *offload)
397 +{
398 + struct net *net = read_pnet(&offload->flow_hw_net);
399 +
400 + do_flow_offload_hw(net, offload->flow, FLOW_OFFLOAD_DEL);
401 +}
402 +
403 +static void flow_offload_hw_work(struct work_struct *work)
404 +{
405 + struct flow_offload_hw *offload, *next;
406 + LIST_HEAD(hw_offload_pending);
407 +
408 + spin_lock_bh(&flow_offload_hw_pending_list_lock);
409 + list_replace_init(&flow_offload_hw_pending_list, &hw_offload_pending);
410 + spin_unlock_bh(&flow_offload_hw_pending_list_lock);
411 +
412 + list_for_each_entry_safe(offload, next, &hw_offload_pending, list) {
413 + switch (offload->type) {
414 + case FLOW_OFFLOAD_ADD:
415 + flow_offload_hw_work_add(offload);
416 + break;
417 + case FLOW_OFFLOAD_DEL:
418 + flow_offload_hw_work_del(offload);
419 + break;
420 + }
421 + if (offload->ct)
422 + nf_conntrack_put(&offload->ct->ct_general);
423 + list_del(&offload->list);
424 + kfree(offload);
425 + }
426 +}
427 +
428 +static void flow_offload_queue_work(struct flow_offload_hw *offload)
429 +{
430 + spin_lock_bh(&flow_offload_hw_pending_list_lock);
431 + list_add_tail(&offload->list, &flow_offload_hw_pending_list);
432 + spin_unlock_bh(&flow_offload_hw_pending_list_lock);
433 +
434 + schedule_work(&nf_flow_offload_hw_work);
435 +}
436 +
437 +static void flow_offload_hw_add(struct net *net, struct flow_offload *flow,
438 + struct nf_conn *ct)
439 +{
440 + struct flow_offload_hw *offload;
441 +
442 + offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC);
443 + if (!offload)
444 + return;
445 +
446 + nf_conntrack_get(&ct->ct_general);
447 + offload->type = FLOW_OFFLOAD_ADD;
448 + offload->ct = ct;
449 + offload->flow = flow;
450 + write_pnet(&offload->flow_hw_net, net);
451 +
452 + flow_offload_queue_work(offload);
453 +}
454 +
455 +static void flow_offload_hw_del(struct net *net, struct flow_offload *flow)
456 +{
457 + struct flow_offload_hw *offload;
458 +
459 + offload = kmalloc(sizeof(struct flow_offload_hw), GFP_ATOMIC);
460 + if (!offload)
461 + return;
462 +
463 + offload->type = FLOW_OFFLOAD_DEL;
464 + offload->ct = NULL;
465 + offload->flow = flow;
466 + write_pnet(&offload->flow_hw_net, net);
467 +
468 + flow_offload_queue_work(offload);
469 +}
470 +
471 +static const struct nf_flow_table_hw flow_offload_hw = {
472 + .add = flow_offload_hw_add,
473 + .del = flow_offload_hw_del,
474 + .owner = THIS_MODULE,
475 +};
476 +
477 +static int __init nf_flow_table_hw_module_init(void)
478 +{
479 + INIT_WORK(&nf_flow_offload_hw_work, flow_offload_hw_work);
480 + nf_flow_table_hw_register(&flow_offload_hw);
481 +
482 + return 0;
483 +}
484 +
485 +static void __exit nf_flow_table_hw_module_exit(void)
486 +{
487 + struct flow_offload_hw *offload, *next;
488 + LIST_HEAD(hw_offload_pending);
489 +
490 + nf_flow_table_hw_unregister(&flow_offload_hw);
491 + cancel_work_sync(&nf_flow_offload_hw_work);
492 +
493 + list_for_each_entry_safe(offload, next, &hw_offload_pending, list) {
494 + if (offload->ct)
495 + nf_conntrack_put(&offload->ct->ct_general);
496 + list_del(&offload->list);
497 + kfree(offload);
498 + }
499 +}
500 +
501 +module_init(nf_flow_table_hw_module_init);
502 +module_exit(nf_flow_table_hw_module_exit);
503 +
504 +MODULE_LICENSE("GPL");
505 +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
506 +MODULE_ALIAS("nf-flow-table-hw");
507 --- a/net/netfilter/nf_tables_api.c
508 +++ b/net/netfilter/nf_tables_api.c
509 @@ -5471,6 +5471,13 @@ static int nf_tables_flowtable_parse_hoo
510 if (err < 0)
511 return err;
512
513 + for (i = 0; i < n; i++) {
514 + if (flowtable->data.flags & NF_FLOWTABLE_F_HW &&
515 + !dev_array[i]->netdev_ops->ndo_flow_offload) {
516 + return -EOPNOTSUPP;
517 + }
518 + }
519 +
520 ops = kcalloc(n, sizeof(struct nf_hook_ops), GFP_KERNEL);
521 if (!ops)
522 return -ENOMEM;
523 @@ -5602,10 +5609,19 @@ static int nf_tables_newflowtable(struct
524 }
525
526 flowtable->data.type = type;
527 + write_pnet(&flowtable->data.ft_net, net);
528 +
529 err = type->init(&flowtable->data);
530 if (err < 0)
531 goto err3;
532
533 + if (nla[NFTA_FLOWTABLE_FLAGS]) {
534 + flowtable->data.flags =
535 + ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
536 + if (flowtable->data.flags & ~NF_FLOWTABLE_F_HW)
537 + goto err4;
538 + }
539 +
540 err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
541 flowtable);
542 if (err < 0)
543 @@ -5731,7 +5747,8 @@ static int nf_tables_fill_flowtable_info
544 nla_put_string(skb, NFTA_FLOWTABLE_NAME, flowtable->name) ||
545 nla_put_be32(skb, NFTA_FLOWTABLE_USE, htonl(flowtable->use)) ||
546 nla_put_be64(skb, NFTA_FLOWTABLE_HANDLE, cpu_to_be64(flowtable->handle),
547 - NFTA_FLOWTABLE_PAD))
548 + NFTA_FLOWTABLE_PAD) ||
549 + nla_put_be32(skb, NFTA_FLOWTABLE_FLAGS, htonl(flowtable->data.flags)))
550 goto nla_put_failure;
551
552 nest = nla_nest_start(skb, NFTA_FLOWTABLE_HOOK);
553 --- a/net/netfilter/nft_flow_offload.c
554 +++ b/net/netfilter/nft_flow_offload.c
555 @@ -127,6 +127,9 @@ static void nft_flow_offload_eval(const
556 if (ret < 0)
557 goto err_flow_add;
558
559 + if (flowtable->flags & NF_FLOWTABLE_F_HW)
560 + nf_flow_offload_hw_add(nft_net(pkt), flow, ct);
561 +
562 dst_release(route.tuple[!dir].dst);
563 return;
564