--- /dev/null
+From 13b1ecc3401653a355798eb1dee10cc1608202f4 Mon Sep 17 00:00:00 2001
+From: Felix Fietkau <nbd@nbd.name>
+Date: Mon, 18 Jan 2016 12:27:49 +0100
+Subject: [PATCH 33/34] Kbuild: don't hardcode path to awk in
+ scripts/ld-version.sh
+
+On some systems /usr/bin/awk does not exist, or is broken. Find it via
+$PATH instead.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+ scripts/ld-version.sh | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/scripts/ld-version.sh
++++ b/scripts/ld-version.sh
+@@ -1,6 +1,7 @@
+-#!/usr/bin/awk -f
++#!/bin/sh
+ # SPDX-License-Identifier: GPL-2.0
+ # extract linker version number from stdin and turn into single number
++exec awk '
+ {
+ gsub(".*\\)", "");
+ gsub(".*version ", "");
+@@ -9,3 +10,4 @@
+ print a[1]*100000000 + a[2]*1000000 + a[3]*10000;
+ exit
+ }
++'
--- /dev/null
+From 173019b66dcc9d68ad9333aa744dad1e369b5aa8 Mon Sep 17 00:00:00 2001
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sun, 9 Jul 2017 00:26:53 +0200
+Subject: [PATCH 34/34] kernel: add compile fix for linux 4.9 on x86
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+ Makefile | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/Makefile
++++ b/Makefile
+@@ -428,8 +428,8 @@ KBUILD_CFLAGS_MODULE := -DMODULE
+ KBUILD_LDFLAGS_MODULE := -T $(srctree)/scripts/module-common.lds
+ GCC_PLUGINS_CFLAGS :=
+
+-export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
+-export CPP AR NM STRIP OBJCOPY OBJDUMP HOSTLDFLAGS HOST_LOADLIBES
++export ARCH SRCARCH SUBARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD
++export CC CPP AR NM STRIP OBJCOPY OBJDUMP HOSTLDFLAGS HOST_LOADLIBES
+ export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE
+ export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
+
--- /dev/null
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Tue, 19 Jun 2018 13:14:56 -0700
+Subject: [PATCH] kbuild: add macro for controlling warnings to
+ linux/compiler.h
+
+I have occasionally run into a situation where it would make sense to
+control a compiler warning from a source file rather than doing so from
+a Makefile using the $(cc-disable-warning, ...) or $(cc-option, ...)
+helpers.
+
+The approach here is similar to what glibc uses, using __diag() and
+related macros to encapsulate a _Pragma("GCC diagnostic ...") statement
+that gets turned into the respective "#pragma GCC diagnostic ..." by
+the preprocessor when the macro gets expanded.
+
+Like glibc, I also have an argument to pass the affected compiler
+version, but decided to actually evaluate that one. For now, this
+supports GCC_4_6, GCC_4_7, GCC_4_8, GCC_4_9, GCC_5, GCC_6, GCC_7,
+GCC_8 and GCC_9. Adding support for CLANG_5 and other interesting
+versions is straightforward here. GNU compilers starting with gcc-4.2
+could support it in principle, but "#pragma GCC diagnostic push"
+was only added in gcc-4.6, so it seems simpler to not deal with those
+at all. The same versions show a large number of warnings already,
+so it seems easier to just leave it at that and not do a more
+fine-grained control for them.
+
+The use cases I found so far include:
+
+- turning off the gcc-8 -Wattribute-alias warning inside of the
+ SYSCALL_DEFINEx() macro without having to do it globally.
+
+- Reducing the build time for a simple re-make after a change,
+ once we move the warnings from ./Makefile and
+ ./scripts/Makefile.extrawarn into linux/compiler.h
+
+- More control over the warnings based on other configurations,
+ using preprocessor syntax instead of Makefile syntax. This should make
+ it easier for the average developer to understand and change things.
+
+- Adding an easy way to turn the W=1 option on unconditionally
+ for a subdirectory or a specific file. This has been requested
+ by several developers in the past that want to have their subsystems
+ W=1 clean.
+
+- Integrating clang better into the build systems. Clang supports
+ more warnings than GCC, and we probably want to classify them
+ as default, W=1, W=2 etc, but there are cases in which the
+ warnings should be classified differently due to excessive false
+ positives from one or the other compiler.
+
+- Adding a way to turn the default warnings into errors (e.g. using
+ a new "make E=0" tag) while not also turning the W=1 warnings into
+ errors.
+
+This patch for now just adds the minimal infrastructure in order to
+do the first of the list above. As the #pragma GCC diagnostic
+takes precedence over command line options, the next step would be
+to convert a lot of the individual Makefiles that set nonstandard
+options to use __diag() instead.
+
+[paul.burton@mips.com:
+ - Rebase atop current master.
+ - Add __diag_GCC, or more generally __diag_<compiler>, abstraction to
+ avoid code outside of linux/compiler-gcc.h needing to duplicate
+ knowledge about different GCC versions.
+ - Add a comment argument to __diag_{ignore,warn,error} which isn't
+ used in the expansion of the macros but serves to push people to
+ document the reason for using them - per feedback from Kees Cook.
+ - Translate severity to GCC-specific pragmas in linux/compiler-gcc.h
+ rather than using GCC-specific in linux/compiler_types.h.
+ - Drop all but GCC 8 macros, since we only need to define macros for
+ versions that we need to introduce pragmas for, and as of this
+ series that's just GCC 8.
+ - Capitalize comments in linux/compiler-gcc.h to match the style of
+ the rest of the file.
+ - Line up macro definitions with tabs in linux/compiler-gcc.h.]
+
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Paul Burton <paul.burton@mips.com>
+Tested-by: Christophe Leroy <christophe.leroy@c-s.fr>
+Tested-by: Stafford Horne <shorne@gmail.com>
+Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
+---
+
+--- a/include/linux/compiler-gcc.h
++++ b/include/linux/compiler-gcc.h
+@@ -358,3 +358,28 @@
+ * code
+ */
+ #define uninitialized_var(x) x = x
++
++/*
++ * Turn individual warnings and errors on and off locally, depending
++ * on version.
++ */
++#define __diag_GCC(version, severity, s) \
++ __diag_GCC_ ## version(__diag_GCC_ ## severity s)
++
++/* Severity used in pragma directives */
++#define __diag_GCC_ignore ignored
++#define __diag_GCC_warn warning
++#define __diag_GCC_error error
++
++/* Compilers before gcc-4.6 do not understand "#pragma GCC diagnostic push" */
++#if GCC_VERSION >= 40600
++#define __diag_str1(s) #s
++#define __diag_str(s) __diag_str1(s)
++#define __diag(s) _Pragma(__diag_str(GCC diagnostic s))
++#endif
++
++#if GCC_VERSION >= 80000
++#define __diag_GCC_8(s) __diag(s)
++#else
++#define __diag_GCC_8(s)
++#endif
+--- a/include/linux/compiler_types.h
++++ b/include/linux/compiler_types.h
+@@ -283,4 +283,22 @@ struct ftrace_likely_data {
+ # define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long))
+ #endif
+
++#ifndef __diag
++#define __diag(string)
++#endif
++
++#ifndef __diag_GCC
++#define __diag_GCC(version, severity, string)
++#endif
++
++#define __diag_push() __diag(push)
++#define __diag_pop() __diag(pop)
++
++#define __diag_ignore(compiler, version, option, comment) \
++ __diag_ ## compiler(version, ignore, option)
++#define __diag_warn(compiler, version, option, comment) \
++ __diag_ ## compiler(version, warn, option)
++#define __diag_error(compiler, version, option, comment) \
++ __diag_ ## compiler(version, error, option)
++
+ #endif /* __LINUX_COMPILER_TYPES_H */
--- /dev/null
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Tue, 19 Jun 2018 13:14:57 -0700
+Subject: [PATCH] disable -Wattribute-alias warning for SYSCALL_DEFINEx()
+
+gcc-8 warns for every single definition of a system call entry
+point, e.g.:
+
+include/linux/compat.h:56:18: error: 'compat_sys_rt_sigprocmask' alias between functions of incompatible types 'long int(int, compat_sigset_t *, compat_sigset_t *, compat_size_t)' {aka 'long int(int, struct <anonymous> *, struct <anonymous> *, unsigned int)'} and 'long int(long int, long int, long int, long int)' [-Werror=attribute-alias]
+ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\
+ ^~~~~~~~~~
+include/linux/compat.h:45:2: note: in expansion of macro 'COMPAT_SYSCALL_DEFINEx'
+ COMPAT_SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
+ ^~~~~~~~~~~~~~~~~~~~~~
+kernel/signal.c:2601:1: note: in expansion of macro 'COMPAT_SYSCALL_DEFINE4'
+ COMPAT_SYSCALL_DEFINE4(rt_sigprocmask, int, how, compat_sigset_t __user *, nset,
+ ^~~~~~~~~~~~~~~~~~~~~~
+include/linux/compat.h:60:18: note: aliased declaration here
+ asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
+ ^~~~~~~~~~
+
+The new warning seems reasonable in principle, but it doesn't
+help us here, since we rely on the type mismatch to sanitize the
+system call arguments. After I reported this as GCC PR82435, a new
+-Wno-attribute-alias option was added that could be used to turn the
+warning off globally on the command line, but I'd prefer to do it a
+little more fine-grained.
+
+Interestingly, turning a warning off and on again inside of
+a single macro doesn't always work, in this case I had to add
+an extra statement inbetween and decided to copy the __SC_TEST
+one from the native syscall to the compat syscall macro. See
+https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83256 for more details
+about this.
+
+[paul.burton@mips.com:
+ - Rebase atop current master.
+ - Split GCC & version arguments to __diag_ignore() in order to match
+ changes to the preceding patch.
+ - Add the comment argument to match the preceding patch.]
+
+Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82435
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Paul Burton <paul.burton@mips.com>
+Tested-by: Christophe Leroy <christophe.leroy@c-s.fr>
+Tested-by: Stafford Horne <shorne@gmail.com>
+Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
+---
+
+--- a/include/linux/compat.h
++++ b/include/linux/compat.h
+@@ -48,6 +48,9 @@
+ COMPAT_SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
+
+ #define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
++ __diag_push(); \
++ __diag_ignore(GCC, 8, "-Wattribute-alias", \
++ "Type aliasing is used to sanitize syscall arguments");\
+ asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))\
+ __attribute__((alias(__stringify(compat_SyS##name)))); \
+ static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+@@ -56,6 +59,7 @@
+ { \
+ return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \
+ } \
++ __diag_pop(); \
+ static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+ #ifndef compat_user_stack_pointer
+--- a/include/linux/syscalls.h
++++ b/include/linux/syscalls.h
+@@ -208,6 +208,9 @@ static inline int is_syscall_trace_event
+
+ #define __PROTECT(...) asmlinkage_protect(__VA_ARGS__)
+ #define __SYSCALL_DEFINEx(x, name, ...) \
++ __diag_push(); \
++ __diag_ignore(GCC, 8, "-Wattribute-alias", \
++ "Type aliasing is used to sanitize syscall arguments");\
+ asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
+ __attribute__((alias(__stringify(SyS##name)))); \
+ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
+@@ -219,6 +222,7 @@ static inline int is_syscall_trace_event
+ __PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
+ return ret; \
+ } \
++ __diag_pop(); \
+ static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__))
+
+ /*
--- /dev/null
+From 1bb0c3ec899827cfa4668bb63a08713a40744d21 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Sun, 9 Jul 2017 08:58:30 +0200
+Subject: [PATCH] netfilter: conntrack: cache route for forwarded connections
+
+... to avoid per-packet FIB lookup if possible.
+
+The cached dst is re-used provided the input interface
+is the same as that of the previous packet in the same direction.
+
+If not, the cached dst is invalidated.
+
+For ipv6 we also need to store sernum, else dst_check doesn't work,
+pointed out by Eric Dumazet.
+
+This should speed up forwarding when conntrack is already in use
+anyway, especially when using reverse path filtering -- active RPF
+enforces two FIB lookups for each packet.
+
+Before the routing cache removal this didn't matter since RPF was performed
+only when route cache didn't yield a result; but without route cache it
+comes at higher price.
+
+Julian Anastasov suggested to add NETDEV_UNREGISTER handler to
+avoid holding on to dsts of 'frozen' conntracks.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+---
+ include/net/netfilter/nf_conntrack_extend.h | 4 +
+ include/net/netfilter/nf_conntrack_rtcache.h | 34 +++
+ net/netfilter/Kconfig | 12 +
+ net/netfilter/Makefile | 3 +
+ net/netfilter/nf_conntrack_rtcache.c | 428 +++++++++++++++++++++++++++
+ 5 files changed, 481 insertions(+)
+ create mode 100644 include/net/netfilter/nf_conntrack_rtcache.h
+ create mode 100644 net/netfilter/nf_conntrack_rtcache.c
+
+--- a/include/net/netfilter/nf_conntrack_extend.h
++++ b/include/net/netfilter/nf_conntrack_extend.h
+@@ -28,6 +28,9 @@ enum nf_ct_ext_id {
+ #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+ NF_CT_EXT_SYNPROXY,
+ #endif
++#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE)
++ NF_CT_EXT_RTCACHE,
++#endif
+ NF_CT_EXT_NUM,
+ };
+
+@@ -40,6 +43,7 @@ enum nf_ct_ext_id {
+ #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout
+ #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels
+ #define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy
++#define NF_CT_EXT_RTCACHE_TYPE struct nf_conn_rtcache
+
+ /* Extensions: optional stuff which isn't permanently in struct. */
+ struct nf_ct_ext {
+--- /dev/null
++++ b/include/net/netfilter/nf_conntrack_rtcache.h
+@@ -0,0 +1,34 @@
++#include <linux/gfp.h>
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_extend.h>
++
++struct dst_entry;
++
++struct nf_conn_dst_cache {
++ struct dst_entry *dst;
++ int iif;
++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
++ u32 cookie;
++#endif
++
++};
++
++struct nf_conn_rtcache {
++ struct nf_conn_dst_cache cached_dst[IP_CT_DIR_MAX];
++};
++
++static inline
++struct nf_conn_rtcache *nf_ct_rtcache_find(const struct nf_conn *ct)
++{
++#if IS_ENABLED(CONFIG_NF_CONNTRACK_RTCACHE)
++ return nf_ct_ext_find(ct, NF_CT_EXT_RTCACHE);
++#else
++ return NULL;
++#endif
++}
++
++static inline int nf_conn_rtcache_iif_get(const struct nf_conn_rtcache *rtc,
++ enum ip_conntrack_dir dir)
++{
++ return rtc->cached_dst[dir].iif;
++}
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -118,6 +118,18 @@ config NF_CONNTRACK_EVENTS
+
+ If unsure, say `N'.
+
++config NF_CONNTRACK_RTCACHE
++ tristate "Cache route entries in conntrack objects"
++ depends on NETFILTER_ADVANCED
++ depends on NF_CONNTRACK
++ help
++ If this option is enabled, the connection tracking code will
++ cache routing information for each connection that is being
++ forwarded, at a cost of 32 bytes per conntrack object.
++
++ To compile it as a module, choose M here. If unsure, say N.
++ The module will be called nf_conntrack_rtcache.
++
+ config NF_CONNTRACK_TIMEOUT
+ bool 'Connection tracking timeout'
+ depends on NETFILTER_ADVANCED
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -19,6 +19,9 @@ obj-$(CONFIG_NETFILTER_NETLINK_LOG) += n
+ # connection tracking
+ obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
+
++# optional conntrack route cache extension
++obj-$(CONFIG_NF_CONNTRACK_RTCACHE) += nf_conntrack_rtcache.o
++
+ obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
+
+ # netlink interface for nf_conntrack
+--- /dev/null
++++ b/net/netfilter/nf_conntrack_rtcache.c
+@@ -0,0 +1,428 @@
++/* route cache for netfilter.
++ *
++ * (C) 2014 Red Hat GmbH
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
++
++#include <linux/types.h>
++#include <linux/netfilter.h>
++#include <linux/skbuff.h>
++#include <linux/stddef.h>
++#include <linux/kernel.h>
++#include <linux/netdevice.h>
++#include <linux/export.h>
++#include <linux/module.h>
++
++#include <net/dst.h>
++
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_core.h>
++#include <net/netfilter/nf_conntrack_extend.h>
++#include <net/netfilter/nf_conntrack_rtcache.h>
++
++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
++#include <net/ip6_fib.h>
++#endif
++
++static void __nf_conn_rtcache_destroy(struct nf_conn_rtcache *rtc,
++ enum ip_conntrack_dir dir)
++{
++ struct dst_entry *dst = rtc->cached_dst[dir].dst;
++
++ dst_release(dst);
++}
++
++static void nf_conn_rtcache_destroy(struct nf_conn *ct)
++{
++ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
++
++ if (!rtc)
++ return;
++
++ __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_ORIGINAL);
++ __nf_conn_rtcache_destroy(rtc, IP_CT_DIR_REPLY);
++}
++
++static void nf_ct_rtcache_ext_add(struct nf_conn *ct)
++{
++ struct nf_conn_rtcache *rtc;
++
++ rtc = nf_ct_ext_add(ct, NF_CT_EXT_RTCACHE, GFP_ATOMIC);
++ if (rtc) {
++ rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif = -1;
++ rtc->cached_dst[IP_CT_DIR_ORIGINAL].dst = NULL;
++ rtc->cached_dst[IP_CT_DIR_REPLY].iif = -1;
++ rtc->cached_dst[IP_CT_DIR_REPLY].dst = NULL;
++ }
++}
++
++static struct nf_conn_rtcache *nf_ct_rtcache_find_usable(struct nf_conn *ct)
++{
++ return nf_ct_rtcache_find(ct);
++}
++
++static struct dst_entry *
++nf_conn_rtcache_dst_get(const struct nf_conn_rtcache *rtc,
++ enum ip_conntrack_dir dir)
++{
++ return rtc->cached_dst[dir].dst;
++}
++
++static u32 nf_rtcache_get_cookie(int pf, const struct dst_entry *dst)
++{
++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
++ if (pf == NFPROTO_IPV6) {
++ const struct rt6_info *rt = (const struct rt6_info *)dst;
++
++ if (rt->rt6i_node)
++ return (u32)rt->rt6i_node->fn_sernum;
++ }
++#endif
++ return 0;
++}
++
++static void nf_conn_rtcache_dst_set(int pf,
++ struct nf_conn_rtcache *rtc,
++ struct dst_entry *dst,
++ enum ip_conntrack_dir dir, int iif)
++{
++ if (rtc->cached_dst[dir].iif != iif)
++ rtc->cached_dst[dir].iif = iif;
++
++ if (rtc->cached_dst[dir].dst != dst) {
++ struct dst_entry *old;
++
++ dst_hold(dst);
++
++ old = xchg(&rtc->cached_dst[dir].dst, dst);
++ dst_release(old);
++
++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
++ if (pf == NFPROTO_IPV6)
++ rtc->cached_dst[dir].cookie =
++ nf_rtcache_get_cookie(pf, dst);
++#endif
++ }
++}
++
++static void nf_conn_rtcache_dst_obsolete(struct nf_conn_rtcache *rtc,
++ enum ip_conntrack_dir dir)
++{
++ struct dst_entry *old;
++
++ pr_debug("Invalidate iif %d for dir %d on cache %p\n",
++ rtc->cached_dst[dir].iif, dir, rtc);
++
++ old = xchg(&rtc->cached_dst[dir].dst, NULL);
++ dst_release(old);
++ rtc->cached_dst[dir].iif = -1;
++}
++
++static unsigned int nf_rtcache_in(u_int8_t pf,
++ struct sk_buff *skb,
++ const struct nf_hook_state *state)
++{
++ struct nf_conn_rtcache *rtc;
++ enum ip_conntrack_info ctinfo;
++ enum ip_conntrack_dir dir;
++ struct dst_entry *dst;
++ struct nf_conn *ct;
++ int iif;
++ u32 cookie;
++
++ if (skb_dst(skb) || skb->sk)
++ return NF_ACCEPT;
++
++ ct = nf_ct_get(skb, &ctinfo);
++ if (!ct)
++ return NF_ACCEPT;
++
++ rtc = nf_ct_rtcache_find_usable(ct);
++ if (!rtc)
++ return NF_ACCEPT;
++
++ /* if iif changes, don't use cache and let ip stack
++ * do route lookup.
++ *
++ * If rp_filter is enabled it might toss skb, so
++ * we don't want to avoid these checks.
++ */
++ dir = CTINFO2DIR(ctinfo);
++ iif = nf_conn_rtcache_iif_get(rtc, dir);
++ if (state->in->ifindex != iif) {
++ pr_debug("ct %p, iif %d, cached iif %d, skip cached entry\n",
++ ct, iif, state->in->ifindex);
++ return NF_ACCEPT;
++ }
++ dst = nf_conn_rtcache_dst_get(rtc, dir);
++ if (dst == NULL)
++ return NF_ACCEPT;
++
++ cookie = nf_rtcache_get_cookie(pf, dst);
++
++ dst = dst_check(dst, cookie);
++ pr_debug("obtained dst %p for skb %p, cookie %d\n", dst, skb, cookie);
++ if (likely(dst))
++ skb_dst_set_noref(skb, dst);
++ else
++ nf_conn_rtcache_dst_obsolete(rtc, dir);
++
++ return NF_ACCEPT;
++}
++
++static unsigned int nf_rtcache_forward(u_int8_t pf,
++ struct sk_buff *skb,
++ const struct nf_hook_state *state)
++{
++ struct nf_conn_rtcache *rtc;
++ enum ip_conntrack_info ctinfo;
++ enum ip_conntrack_dir dir;
++ struct nf_conn *ct;
++ struct dst_entry *dst = skb_dst(skb);
++ int iif;
++
++ ct = nf_ct_get(skb, &ctinfo);
++ if (!ct)
++ return NF_ACCEPT;
++
++ if (dst && dst_xfrm(dst))
++ return NF_ACCEPT;
++
++ if (!nf_ct_is_confirmed(ct)) {
++ if (WARN_ON(nf_ct_rtcache_find(ct)))
++ return NF_ACCEPT;
++ nf_ct_rtcache_ext_add(ct);
++ return NF_ACCEPT;
++ }
++
++ rtc = nf_ct_rtcache_find_usable(ct);
++ if (!rtc)
++ return NF_ACCEPT;
++
++ dir = CTINFO2DIR(ctinfo);
++ iif = nf_conn_rtcache_iif_get(rtc, dir);
++ pr_debug("ct %p, skb %p, dir %d, iif %d, cached iif %d\n",
++ ct, skb, dir, iif, state->in->ifindex);
++ if (likely(state->in->ifindex == iif))
++ return NF_ACCEPT;
++
++ nf_conn_rtcache_dst_set(pf, rtc, skb_dst(skb), dir, state->in->ifindex);
++ return NF_ACCEPT;
++}
++
++static unsigned int nf_rtcache_in4(void *priv,
++ struct sk_buff *skb,
++ const struct nf_hook_state *state)
++{
++ return nf_rtcache_in(NFPROTO_IPV4, skb, state);
++}
++
++static unsigned int nf_rtcache_forward4(void *priv,
++ struct sk_buff *skb,
++ const struct nf_hook_state *state)
++{
++ return nf_rtcache_forward(NFPROTO_IPV4, skb, state);
++}
++
++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
++static unsigned int nf_rtcache_in6(void *priv,
++ struct sk_buff *skb,
++ const struct nf_hook_state *state)
++{
++ return nf_rtcache_in(NFPROTO_IPV6, skb, state);
++}
++
++static unsigned int nf_rtcache_forward6(void *priv,
++ struct sk_buff *skb,
++ const struct nf_hook_state *state)
++{
++ return nf_rtcache_forward(NFPROTO_IPV6, skb, state);
++}
++#endif
++
++static int nf_rtcache_dst_remove(struct nf_conn *ct, void *data)
++{
++ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
++ struct net_device *dev = data;
++
++ if (!rtc)
++ return 0;
++
++ if (dev->ifindex == rtc->cached_dst[IP_CT_DIR_ORIGINAL].iif ||
++ dev->ifindex == rtc->cached_dst[IP_CT_DIR_REPLY].iif) {
++ nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_ORIGINAL);
++ nf_conn_rtcache_dst_obsolete(rtc, IP_CT_DIR_REPLY);
++ }
++
++ return 0;
++}
++
++static int nf_rtcache_netdev_event(struct notifier_block *this,
++ unsigned long event, void *ptr)
++{
++ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
++ struct net *net = dev_net(dev);
++
++ if (event == NETDEV_DOWN)
++ nf_ct_iterate_cleanup_net(net, nf_rtcache_dst_remove, dev, 0, 0);
++
++ return NOTIFY_DONE;
++}
++
++static struct notifier_block nf_rtcache_notifier = {
++ .notifier_call = nf_rtcache_netdev_event,
++};
++
++static struct nf_hook_ops rtcache_ops[] = {
++ {
++ .hook = nf_rtcache_in4,
++ .pf = NFPROTO_IPV4,
++ .hooknum = NF_INET_PRE_ROUTING,
++ .priority = NF_IP_PRI_LAST,
++ },
++ {
++ .hook = nf_rtcache_forward4,
++ .pf = NFPROTO_IPV4,
++ .hooknum = NF_INET_FORWARD,
++ .priority = NF_IP_PRI_LAST,
++ },
++#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
++ {
++ .hook = nf_rtcache_in6,
++ .pf = NFPROTO_IPV6,
++ .hooknum = NF_INET_PRE_ROUTING,
++ .priority = NF_IP_PRI_LAST,
++ },
++ {
++ .hook = nf_rtcache_forward6,
++ .pf = NFPROTO_IPV6,
++ .hooknum = NF_INET_FORWARD,
++ .priority = NF_IP_PRI_LAST,
++ },
++#endif
++};
++
++static struct nf_ct_ext_type rtcache_extend __read_mostly = {
++ .len = sizeof(struct nf_conn_rtcache),
++ .align = __alignof__(struct nf_conn_rtcache),
++ .id = NF_CT_EXT_RTCACHE,
++ .destroy = nf_conn_rtcache_destroy,
++};
++
++static void __net_exit rtcache_net_exit(struct net *net)
++{
++ /* remove hooks so no new connections get rtcache extension */
++ nf_unregister_net_hooks(net, rtcache_ops, ARRAY_SIZE(rtcache_ops));
++}
++
++static struct pernet_operations rtcache_ops_net_ops = {
++ .exit = rtcache_net_exit,
++};
++
++static int __init nf_conntrack_rtcache_init(void)
++{
++ int ret = nf_ct_extend_register(&rtcache_extend);
++
++ if (ret < 0) {
++ pr_err("nf_conntrack_rtcache: Unable to register extension\n");
++ return ret;
++ }
++
++ ret = register_pernet_subsys(&rtcache_ops_net_ops);
++ if (ret) {
++ nf_ct_extend_unregister(&rtcache_extend);
++ return ret;
++ }
++
++ ret = nf_register_net_hooks(&init_net, rtcache_ops,
++ ARRAY_SIZE(rtcache_ops));
++ if (ret < 0) {
++ nf_ct_extend_unregister(&rtcache_extend);
++ unregister_pernet_subsys(&rtcache_ops_net_ops);
++ return ret;
++ }
++
++ ret = register_netdevice_notifier(&nf_rtcache_notifier);
++ if (ret) {
++ nf_unregister_net_hooks(&init_net, rtcache_ops,
++ ARRAY_SIZE(rtcache_ops));
++ nf_ct_extend_unregister(&rtcache_extend);
++ unregister_pernet_subsys(&rtcache_ops_net_ops);
++ }
++
++ return ret;
++}
++
++static int nf_rtcache_ext_remove(struct nf_conn *ct, void *data)
++{
++ struct nf_conn_rtcache *rtc = nf_ct_rtcache_find(ct);
++
++ return rtc != NULL;
++}
++
++static bool __exit nf_conntrack_rtcache_wait_for_dying(struct net *net)
++{
++ bool wait = false;
++ int cpu;
++
++ for_each_possible_cpu(cpu) {
++ struct nf_conntrack_tuple_hash *h;
++ struct hlist_nulls_node *n;
++ struct nf_conn *ct;
++ struct ct_pcpu *pcpu = per_cpu_ptr(net->ct.pcpu_lists, cpu);
++
++ rcu_read_lock();
++ spin_lock_bh(&pcpu->lock);
++
++ hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) {
++ ct = nf_ct_tuplehash_to_ctrack(h);
++ if (nf_ct_rtcache_find(ct) != NULL) {
++ wait = true;
++ break;
++ }
++ }
++ spin_unlock_bh(&pcpu->lock);
++ rcu_read_unlock();
++ }
++
++ return wait;
++}
++
++static void __exit nf_conntrack_rtcache_fini(void)
++{
++ struct net *net;
++ int count = 0;
++
++ synchronize_net();
++
++ unregister_netdevice_notifier(&nf_rtcache_notifier);
++
++ rtnl_lock();
++
++ /* zap all conntracks with rtcache extension */
++ for_each_net(net)
++ nf_ct_iterate_cleanup_net(net, nf_rtcache_ext_remove, NULL, 0, 0);
++
++ for_each_net(net) {
++ /* .. and make sure they're gone from dying list, too */
++ while (nf_conntrack_rtcache_wait_for_dying(net)) {
++ msleep(200);
++ WARN_ONCE(++count > 25, "Waiting for all rtcache conntracks to go away\n");
++ }
++ }
++
++ rtnl_unlock();
++ synchronize_net();
++ nf_ct_extend_unregister(&rtcache_extend);
++}
++module_init(nf_conntrack_rtcache_init);
++module_exit(nf_conntrack_rtcache_fini);
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
++MODULE_DESCRIPTION("Conntrack route cache extension");
--- /dev/null
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 11 Nov 2017 15:54:12 -0800
+Subject: [PATCH] tcp: allow drivers to tweak TSQ logic
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+I had many reports that TSQ logic breaks wifi aggregation.
+
+Current logic is to allow up to 1 ms of bytes to be queued into qdisc
+and drivers queues.
+
+But Wifi aggregation needs a bigger budget to allow bigger rates to
+be discovered by various TCP Congestion Controls algorithms.
+
+This patch adds an extra socket field, allowing wifi drivers to select
+another log scale to derive TCP Small Queue credit from current pacing
+rate.
+
+Initial value is 10, meaning that this patch does not change current
+behavior.
+
+We expect wifi drivers to set this field to smaller values (tests have
+been done with values from 6 to 9)
+
+They would have to use following template :
+
+if (skb->sk && skb->sk->sk_pacing_shift != MY_PACING_SHIFT)
+ skb->sk->sk_pacing_shift = MY_PACING_SHIFT;
+
+Ref: https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1670041
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Johannes Berg <johannes.berg@intel.com>
+Cc: Toke Høiland-Jørgensen <toke@toke.dk>
+Cc: Kir Kolyshkin <kir@openvz.org>
+---
+--- a/include/net/sock.h
++++ b/include/net/sock.h
+@@ -267,6 +267,7 @@ struct sock_common {
+ * @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
+ * @sk_gso_max_size: Maximum GSO segment size to build
+ * @sk_gso_max_segs: Maximum number of GSO segments
++ * @sk_pacing_shift: scaling factor for TCP Small Queues
+ * @sk_lingertime: %SO_LINGER l_linger setting
+ * @sk_backlog: always used with the per-socket spinlock held
+ * @sk_callback_lock: used with the callbacks in the end of this struct
+@@ -445,6 +446,8 @@ struct sock {
+ sk_type : 16;
+ #define SK_PROTOCOL_MAX U8_MAX
+ u16 sk_gso_max_segs;
++#define sk_pacing_shift sk_pacing_shift /* for backport checks */
++ u8 sk_pacing_shift;
+ unsigned long sk_lingertime;
+ struct proto *sk_prot_creator;
+ rwlock_t sk_callback_lock;
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -2739,6 +2739,7 @@ void sock_init_data(struct socket *sock,
+
+ sk->sk_max_pacing_rate = ~0U;
+ sk->sk_pacing_rate = ~0U;
++ sk->sk_pacing_shift = 10;
+ sk->sk_incoming_cpu = -1;
+ /*
+ * Before updating sk_refcnt, we must commit prior changes to memory
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -1683,7 +1683,7 @@ u32 tcp_tso_autosize(const struct sock *
+ {
+ u32 bytes, segs;
+
+- bytes = min(sk->sk_pacing_rate >> 10,
++ bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift,
+ sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
+
+ /* Goal is to send at least one packet per ms,
+@@ -2184,7 +2184,7 @@ static bool tcp_small_queue_check(struct
+ {
+ unsigned int limit;
+
+- limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
++ limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift);
+ limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
+ limit <<= factor;
+
--- /dev/null
+From 4d304a6fe93538ce356b4593dc43476b50c023e7 Mon Sep 17 00:00:00 2001
+From: Giuseppe Lippolis <giu.lippolis@gmail.com>
+Date: Mon, 23 Apr 2018 09:03:06 +0200
+Subject: USB: serial: option: blacklist unused dwm-158 interfaces
+
+The dwm-158 interface 4 and 5 doesn't answer to the AT commands
+and doesn't appears a option interface.
+Tested on openwrt distribution (kernel 4.14 using the old blacklist
+definitions).
+
+Lars Melin also writes:
+
+ Blacklisting interface 4 and 5 is correct because:
+
+ MI_00 D-Link Mobile Broadband Device (cdc_ether)
+ MI_02 D-Link HSPA+DataCard Diagnostics Interface (also ppp modem)
+ MI_03 D-Link HSPA+DataCard NMEA Device
+ MI_04 D-Link HSPA+DataCard Speech Port
+ MI_05 D-Link HSPA+DataCard Debug Port
+ MI_06 USB Mass Storage Device
+
+Signed-off-by: Giuseppe Lippolis <giu.lippolis@gmail.com>
+[ johan: add Lars's comment on the interface layout and reword summary ]
+Cc: Lars Melin <larsm17@gmail.com>
+Cc: Dan Williams <dcbw@redhat.com>
+Signed-off-by: Johan Hovold <johan@kernel.org>
+---
+ drivers/usb/serial/option.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/usb/serial/option.c
++++ b/drivers/usb/serial/option.c
+@@ -1927,7 +1927,8 @@ static const struct usb_device_id option
+ { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d01, 0xff) }, /* D-Link DWM-156 (variant) */
+ { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d02, 0xff) },
+ { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d03, 0xff) },
+- { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d04, 0xff) }, /* D-Link DWM-158 */
++ { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d04, 0xff), /* D-Link DWM-158 */
++ .driver_info = RSVD(4) | RSVD(5) },
+ { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7d0e, 0xff) }, /* D-Link DWM-157 C1 */
+ { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e19, 0xff), /* D-Link DWM-221 B1 */
+ .driver_info = RSVD(4) },
--- /dev/null
+From 531ef5ebea96394ddb7f554d4d88e017dde30a59 Mon Sep 17 00:00:00 2001
+From: Amelie Delaunay <amelie.delaunay@st.com>
+Date: Tue, 13 Feb 2018 09:28:12 +0100
+Subject: [PATCH] usb: dwc2: add support for host mode external vbus supply
+
+This patch adds a way to enable an external vbus supply in host mode,
+when dwc2 drvvbus signal is not used.
+
+This patch is very similar to the one done in U-Boot dwc2 driver [1]. It
+also adds dynamic vbus supply management depending on the role and state
+of the core.
+
+[1] https://lists.denx.de/pipermail/u-boot/2017-March/283434.html
+
+Signed-off-by: Amelie Delaunay <amelie.delaunay@st.com>
+Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
+---
+ drivers/usb/dwc2/core.h | 2 ++
+ drivers/usb/dwc2/hcd.c | 26 ++++++++++++++++++++++++++
+ 2 files changed, 28 insertions(+)
+
+--- a/drivers/usb/dwc2/core.h
++++ b/drivers/usb/dwc2/core.h
+@@ -777,6 +777,7 @@ struct dwc2_hregs_backup {
+ * @plat: The platform specific configuration data. This can be
+ * removed once all SoCs support usb transceiver.
+ * @supplies: Definition of USB power supplies
++ * @vbus_supply: Regulator supplying vbus.
+ * @phyif: PHY interface width
+ * @lock: Spinlock that protects all the driver data structures
+ * @priv: Stores a pointer to the struct usb_hcd
+@@ -914,6 +915,7 @@ struct dwc2_hsotg {
+ struct usb_phy *uphy;
+ struct dwc2_hsotg_plat *plat;
+ struct regulator_bulk_data supplies[DWC2_NUM_SUPPLIES];
++ struct regulator *vbus_supply;
+ u32 phyif;
+
+ spinlock_t lock;
+--- a/drivers/usb/dwc2/hcd.c
++++ b/drivers/usb/dwc2/hcd.c
+@@ -359,6 +359,23 @@ static void dwc2_gusbcfg_init(struct dwc
+ dwc2_writel(usbcfg, hsotg->regs + GUSBCFG);
+ }
+
++static int dwc2_vbus_supply_init(struct dwc2_hsotg *hsotg)
++{
++ hsotg->vbus_supply = devm_regulator_get_optional(hsotg->dev, "vbus");
++ if (IS_ERR(hsotg->vbus_supply))
++ return 0;
++
++ return regulator_enable(hsotg->vbus_supply);
++}
++
++static int dwc2_vbus_supply_exit(struct dwc2_hsotg *hsotg)
++{
++ if (hsotg->vbus_supply)
++ return regulator_disable(hsotg->vbus_supply);
++
++ return 0;
++}
++
+ /**
+ * dwc2_enable_host_interrupts() - Enables the Host mode interrupts
+ *
+@@ -3342,6 +3359,7 @@ static void dwc2_conn_id_status_change(s
+
+ /* B-Device connector (Device Mode) */
+ if (gotgctl & GOTGCTL_CONID_B) {
++ dwc2_vbus_supply_exit(hsotg);
+ /* Wait for switch to device mode */
+ dev_dbg(hsotg->dev, "connId B\n");
+ if (hsotg->bus_suspended) {
+@@ -4448,6 +4466,9 @@ static int _dwc2_hcd_start(struct usb_hc
+ }
+
+ spin_unlock_irqrestore(&hsotg->lock, flags);
++
++ dwc2_vbus_supply_init(hsotg);
++
+ return 0;
+ }
+
+@@ -4475,6 +4496,8 @@ static void _dwc2_hcd_stop(struct usb_hc
+ clear_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags);
+ spin_unlock_irqrestore(&hsotg->lock, flags);
+
++ dwc2_vbus_supply_exit(hsotg);
++
+ usleep_range(1000, 3000);
+ }
+
+@@ -4511,6 +4534,7 @@ static int _dwc2_hcd_suspend(struct usb_
+ hprt0 |= HPRT0_SUSP;
+ hprt0 &= ~HPRT0_PWR;
+ dwc2_writel(hprt0, hsotg->regs + HPRT0);
++ dwc2_vbus_supply_exit(hsotg);
+ }
+
+ /* Enter hibernation */
+@@ -4591,6 +4615,8 @@ static int _dwc2_hcd_resume(struct usb_h
+ spin_unlock_irqrestore(&hsotg->lock, flags);
+ dwc2_port_resume(hsotg);
+ } else {
++ dwc2_vbus_supply_init(hsotg);
++
+ /* Wait for controller to correctly update D+/D- level */
+ usleep_range(3000, 5000);
+
--- /dev/null
+From 438fea2a6325933868aebc20279e2669c9a21207 Mon Sep 17 00:00:00 2001
+From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
+Date: Mon, 26 Mar 2018 11:00:01 +0200
+Subject: [PATCH] usb: dwc2: dwc2_vbus_supply_init: fix error check
+
+devm_regulator_get_optional returns -ENODEV if the regulator isn't
+there, so if that's the case we have to make sure not to leave -ENODEV
+in the regulator pointer.
+
+Also, make sure we return 0 in that case, but correctly propagate any
+other errors. Also propagate the error from _dwc2_hcd_start.
+
+Fixes: 531ef5ebea96 ("usb: dwc2: add support for host mode external vbus supply")
+Cc: Amelie Delaunay <amelie.delaunay@st.com>
+Reviewed-by: Amelie Delaunay <amelie.delaunay@st.com>
+Reviewed-by: Heiko Stuebner <heiko@sntech.de>
+Reviewed-by: Grigor Tovmasyan <tovmasya@synopsys.com>
+Tested-by: Heiko Stuebner <heiko@sntech.de>
+Acked-by: Minas Harutyunyan <hminas@synopsys.com>
+Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
+Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
+---
+ drivers/usb/dwc2/hcd.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+--- a/drivers/usb/dwc2/hcd.c
++++ b/drivers/usb/dwc2/hcd.c
+@@ -361,9 +361,14 @@ static void dwc2_gusbcfg_init(struct dwc
+
+ static int dwc2_vbus_supply_init(struct dwc2_hsotg *hsotg)
+ {
++ int ret;
++
+ hsotg->vbus_supply = devm_regulator_get_optional(hsotg->dev, "vbus");
+- if (IS_ERR(hsotg->vbus_supply))
+- return 0;
++ if (IS_ERR(hsotg->vbus_supply)) {
++ ret = PTR_ERR(hsotg->vbus_supply);
++ hsotg->vbus_supply = NULL;
++ return ret == -ENODEV ? 0 : ret;
++ }
+
+ return regulator_enable(hsotg->vbus_supply);
+ }
+@@ -4467,9 +4472,7 @@ static int _dwc2_hcd_start(struct usb_hc
+
+ spin_unlock_irqrestore(&hsotg->lock, flags);
+
+- dwc2_vbus_supply_init(hsotg);
+-
+- return 0;
++ return dwc2_vbus_supply_init(hsotg);
+ }
+
+ /*
--- /dev/null
+From 2c77c57d22adb05b21cdb333a0c42bdfa0e19835 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
+Date: Tue, 16 Jan 2018 16:45:41 +0100
+Subject: [PATCH] mtd: move code adding master MTD out of
+ mtd_add_device_partitions()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This change is a small cleanup of mtd_device_parse_register(). When
+using MTD_PARTITIONED_MASTER it makes sure a master MTD is registered
+before dealing with partitions. The advantage of this is not mixing
+code handling master MTD with code handling partitions.
+
+This commit doesn't change any behavior except from a slightly different
+failure code path. The new code may need to call del_mtd_device when
+something goes wrong.
+
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+---
+ drivers/mtd/mtdcore.c | 25 +++++++++++++------------
+ 1 file changed, 13 insertions(+), 12 deletions(-)
+
+--- a/drivers/mtd/mtdcore.c
++++ b/drivers/mtd/mtdcore.c
+@@ -641,20 +641,12 @@ static int mtd_add_device_partitions(str
+ {
+ const struct mtd_partition *real_parts = parts->parts;
+ int nbparts = parts->nr_parts;
+- int ret;
+
+- if (nbparts == 0 || IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
+- ret = add_mtd_device(mtd);
+- if (ret)
+- return ret;
+- }
++ if (!nbparts && !device_is_registered(&mtd->dev))
++ return add_mtd_device(mtd);
+
+- if (nbparts > 0) {
+- ret = add_mtd_partitions(mtd, real_parts, nbparts);
+- if (ret && IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER))
+- del_mtd_device(mtd);
+- return ret;
+- }
++ if (nbparts > 0)
++ return add_mtd_partitions(mtd, real_parts, nbparts);
+
+ return 0;
+ }
+@@ -714,6 +706,12 @@ int mtd_device_parse_register(struct mtd
+
+ mtd_set_dev_defaults(mtd);
+
++ if (IS_ENABLED(CONFIG_MTD_PARTITIONED_MASTER)) {
++ ret = add_mtd_device(mtd);
++ if (ret)
++ return ret;
++ }
++
+ memset(&parsed, 0, sizeof(parsed));
+
+ ret = parse_mtd_partitions(mtd, types, &parsed, parser_data);
+@@ -753,6 +751,9 @@ int mtd_device_parse_register(struct mtd
+ out:
+ /* Cleanup any parsed partitions */
+ mtd_part_parser_cleanup(&parsed);
++ if (ret && device_is_registered(&mtd->dev))
++ del_mtd_device(mtd);
++
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(mtd_device_parse_register);
--- /dev/null
+From 0dbe4ea78d69756efeb0bba0764f6bd4a9ee9567 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
+Date: Tue, 16 Jan 2018 16:45:42 +0100
+Subject: [PATCH] mtd: get rid of the mtd_add_device_partitions()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This simplifies code a bit by:
+1) Avoiding an extra (tiny) function
+2) Checking for amount of parsed (found) partitions just once
+3) Avoiding clearing/filling struct mtd_partitions manually
+
+With this commit proper functions are called directly from the
+mtd_device_parse_register(). It doesn't need to use minor tricks like
+memsetting struct to 0 to trigger an expected
+mtd_add_device_partitions() behavior.
+
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+---
+ drivers/mtd/mtdcore.c | 43 ++++++++++++-------------------------------
+ 1 file changed, 12 insertions(+), 31 deletions(-)
+
+--- a/drivers/mtd/mtdcore.c
++++ b/drivers/mtd/mtdcore.c
+@@ -636,21 +636,6 @@ out_error:
+ return ret;
+ }
+
+-static int mtd_add_device_partitions(struct mtd_info *mtd,
+- struct mtd_partitions *parts)
+-{
+- const struct mtd_partition *real_parts = parts->parts;
+- int nbparts = parts->nr_parts;
+-
+- if (!nbparts && !device_is_registered(&mtd->dev))
+- return add_mtd_device(mtd);
+-
+- if (nbparts > 0)
+- return add_mtd_partitions(mtd, real_parts, nbparts);
+-
+- return 0;
+-}
+-
+ /*
+ * Set a few defaults based on the parent devices, if not provided by the
+ * driver
+@@ -701,7 +686,7 @@ int mtd_device_parse_register(struct mtd
+ const struct mtd_partition *parts,
+ int nr_parts)
+ {
+- struct mtd_partitions parsed;
++ struct mtd_partitions parsed = { };
+ int ret;
+
+ mtd_set_dev_defaults(mtd);
+@@ -712,24 +697,20 @@ int mtd_device_parse_register(struct mtd
+ return ret;
+ }
+
+- memset(&parsed, 0, sizeof(parsed));
+-
++ /* Prefer parsed partitions over driver-provided fallback */
+ ret = parse_mtd_partitions(mtd, types, &parsed, parser_data);
+- if ((ret < 0 || parsed.nr_parts == 0) && parts && nr_parts) {
+- /* Fall back to driver-provided partitions */
+- parsed = (struct mtd_partitions){
+- .parts = parts,
+- .nr_parts = nr_parts,
+- };
+- } else if (ret < 0) {
+- /* Didn't come up with parsed OR fallback partitions */
+- pr_info("mtd: failed to find partitions; one or more parsers reports errors (%d)\n",
+- ret);
+- /* Don't abort on errors; we can still use unpartitioned MTD */
+- memset(&parsed, 0, sizeof(parsed));
++ if (!ret && parsed.nr_parts) {
++ parts = parsed.parts;
++ nr_parts = parsed.nr_parts;
+ }
+
+- ret = mtd_add_device_partitions(mtd, &parsed);
++ if (nr_parts)
++ ret = add_mtd_partitions(mtd, parts, nr_parts);
++ else if (!device_is_registered(&mtd->dev))
++ ret = add_mtd_device(mtd);
++ else
++ ret = 0;
++
+ if (ret)
+ goto out;
+
--- /dev/null
+From 5b644aa012f67fd211138a067b9f351f30bdcc60 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
+Date: Wed, 14 Mar 2018 13:10:42 +0100
+Subject: [PATCH] mtd: partitions: add of_match_table parser matching for the
+ "ofpart" type
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+In order to properly support compatibility strings as described in the
+bindings/mtd/partition.txt "ofpart" type should be treated as an
+indication for looking into OF. MTD should check "compatible" property
+and search for a matching parser rather than blindly trying the one
+supporting "fixed-partitions".
+
+It also means that existing "fixed-partitions" parser should get renamed
+to use a more meaningful name.
+
+This commit achievies that aim by introducing a new mtd_part_of_parse().
+It works by looking for a matching parser for every string in the
+"compatibility" property (starting with the most specific one).
+
+Please note that driver-specified parsers still take a precedence. It's
+assumed that driver providing a parser type has a good reason for that
+(e.g. having platform data with device-specific info). Also doing
+otherwise could break existing setups. The same applies to using default
+parsers (including "cmdlinepart") as some overwrite DT data with cmdline
+argument.
+
+Partition parsers can now provide an of_match_table to enable
+flash<-->parser matching via device tree as documented in the
+mtd/partition.txt.
+
+This support is currently limited to built-in parsers as it uses
+request_module() and friends. This should be sufficient for most cases
+though as compiling parsers as modules isn't a common choice.
+
+Signed-off-by: Brian Norris <computersforpeace@gmail.com>
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Tested-by: Peter Rosin <peda@axentia.se>
+Reviewed-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+---
+ drivers/mtd/mtdpart.c | 116 +++++++++++++++++++++++++++++++++++++----
+ include/linux/mtd/partitions.h | 1 +
+ 2 files changed, 108 insertions(+), 9 deletions(-)
+
+--- a/drivers/mtd/mtdpart.c
++++ b/drivers/mtd/mtdpart.c
+@@ -30,6 +30,7 @@
+ #include <linux/mtd/mtd.h>
+ #include <linux/mtd/partitions.h>
+ #include <linux/err.h>
++#include <linux/of.h>
+
+ #include "mtdcore.h"
+
+@@ -894,6 +895,92 @@ static int mtd_part_do_parse(struct mtd_
+ }
+
+ /**
++ * mtd_part_get_compatible_parser - find MTD parser by a compatible string
++ *
++ * @compat: compatible string describing partitions in a device tree
++ *
++ * MTD parsers can specify supported partitions by providing a table of
++ * compatibility strings. This function finds a parser that advertises support
++ * for a passed value of "compatible".
++ */
++static struct mtd_part_parser *mtd_part_get_compatible_parser(const char *compat)
++{
++ struct mtd_part_parser *p, *ret = NULL;
++
++ spin_lock(&part_parser_lock);
++
++ list_for_each_entry(p, &part_parsers, list) {
++ const struct of_device_id *matches;
++
++ matches = p->of_match_table;
++ if (!matches)
++ continue;
++
++ for (; matches->compatible[0]; matches++) {
++ if (!strcmp(matches->compatible, compat) &&
++ try_module_get(p->owner)) {
++ ret = p;
++ break;
++ }
++ }
++
++ if (ret)
++ break;
++ }
++
++ spin_unlock(&part_parser_lock);
++
++ return ret;
++}
++
++static int mtd_part_of_parse(struct mtd_info *master,
++ struct mtd_partitions *pparts)
++{
++ struct mtd_part_parser *parser;
++ struct device_node *np;
++ struct property *prop;
++ const char *compat;
++ const char *fixed = "ofpart";
++ int ret, err = 0;
++
++ np = of_get_child_by_name(mtd_get_of_node(master), "partitions");
++ of_property_for_each_string(np, "compatible", prop, compat) {
++ parser = mtd_part_get_compatible_parser(compat);
++ if (!parser)
++ continue;
++ ret = mtd_part_do_parse(parser, master, pparts, NULL);
++ if (ret > 0) {
++ of_node_put(np);
++ return ret;
++ }
++ mtd_part_parser_put(parser);
++ if (ret < 0 && !err)
++ err = ret;
++ }
++ of_node_put(np);
++
++ /*
++ * For backward compatibility we have to try the "ofpart"
++ * parser. It supports old DT format with partitions specified as a
++ * direct subnodes of a flash device DT node without any compatibility
++ * specified we could match.
++ */
++ parser = mtd_part_parser_get(fixed);
++ if (!parser && !request_module("%s", fixed))
++ parser = mtd_part_parser_get(fixed);
++ if (parser) {
++ ret = mtd_part_do_parse(parser, master, pparts, NULL);
++ if (ret > 0)
++ return ret;
++ mtd_part_parser_put(parser);
++ if (ret < 0 && !err)
++ err = ret;
++ }
++
++ return err;
++}
++
++/**
+ * parse_mtd_partitions - parse MTD partitions
+ * @master: the master partition (describes whole MTD device)
+ * @types: names of partition parsers to try or %NULL
+@@ -925,19 +1012,30 @@ int parse_mtd_partitions(struct mtd_info
+ types = default_mtd_part_types;
+
+ for ( ; *types; types++) {
+- pr_debug("%s: parsing partitions %s\n", master->name, *types);
+- parser = mtd_part_parser_get(*types);
+- if (!parser && !request_module("%s", *types))
++ /*
++ * ofpart is a special type that means OF partitioning info
++ * should be used. It requires a bit different logic so it is
++ * handled in a separated function.
++ */
++ if (!strcmp(*types, "ofpart")) {
++ ret = mtd_part_of_parse(master, pparts);
++ } else {
++ pr_debug("%s: parsing partitions %s\n", master->name,
++ *types);
+ parser = mtd_part_parser_get(*types);
+- pr_debug("%s: got parser %s\n", master->name,
+- parser ? parser->name : NULL);
+- if (!parser)
+- continue;
+- ret = mtd_part_do_parse(parser, master, pparts, data);
++ if (!parser && !request_module("%s", *types))
++ parser = mtd_part_parser_get(*types);
++ pr_debug("%s: got parser %s\n", master->name,
++ parser ? parser->name : NULL);
++ if (!parser)
++ continue;
++ ret = mtd_part_do_parse(parser, master, pparts, data);
++ if (ret <= 0)
++ mtd_part_parser_put(parser);
++ }
+ /* Found partitions! */
+ if (ret > 0)
+ return 0;
+- mtd_part_parser_put(parser);
+ /*
+ * Stash the first error we see; only report it if no parser
+ * succeeds
+--- a/include/linux/mtd/partitions.h
++++ b/include/linux/mtd/partitions.h
+@@ -77,6 +77,7 @@ struct mtd_part_parser {
+ struct list_head list;
+ struct module *owner;
+ const char *name;
++ const struct of_device_id *of_match_table;
+ int (*parse_fn)(struct mtd_info *, const struct mtd_partition **,
+ struct mtd_part_parser_data *);
+ void (*cleanup)(const struct mtd_partition *pparts, int nr_parts);
--- /dev/null
+From c0faf43482e7f7dfb6d61847cb93d17748560b24 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
+Date: Wed, 14 Mar 2018 13:10:43 +0100
+Subject: [PATCH] mtd: rename "ofpart" parser to "fixed-partitions" as it fits
+ it better
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Type "ofpart" means that OF should be used to get partitioning info and
+this driver supports "fixed-partitions" binding only. Renaming it should
+lead to less confusion especially when parsers for new compatibility
+strings start to appear.
+
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Reviewed-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+---
+ drivers/mtd/mtdpart.c | 4 ++--
+ drivers/mtd/ofpart.c | 11 ++++++-----
+ 2 files changed, 8 insertions(+), 7 deletions(-)
+
+--- a/drivers/mtd/mtdpart.c
++++ b/drivers/mtd/mtdpart.c
+@@ -940,7 +940,7 @@ static int mtd_part_of_parse(struct mtd_
+ struct device_node *np;
+ struct property *prop;
+ const char *compat;
+- const char *fixed = "ofpart";
++ const char *fixed = "fixed-partitions";
+ int ret, err = 0;
+
+ np = of_get_child_by_name(mtd_get_of_node(master), "partitions");
+@@ -960,7 +960,7 @@ static int mtd_part_of_parse(struct mtd_
+ of_node_put(np);
+
+ /*
+- * For backward compatibility we have to try the "ofpart"
++ * For backward compatibility we have to try the "fixed-partitions"
+ * parser. It supports old DT format with partitions specified as a
+ * direct subnodes of a flash device DT node without any compatibility
+ * specified we could match.
+--- a/drivers/mtd/ofpart.c
++++ b/drivers/mtd/ofpart.c
+@@ -25,9 +25,9 @@ static bool node_has_compatible(struct d
+ return of_get_property(pp, "compatible", NULL);
+ }
+
+-static int parse_ofpart_partitions(struct mtd_info *master,
+- const struct mtd_partition **pparts,
+- struct mtd_part_parser_data *data)
++static int parse_fixed_partitions(struct mtd_info *master,
++ const struct mtd_partition **pparts,
++ struct mtd_part_parser_data *data)
+ {
+ struct mtd_partition *parts;
+ struct device_node *mtd_node;
+@@ -141,8 +141,8 @@ ofpart_none:
+ }
+
+ static struct mtd_part_parser ofpart_parser = {
+- .parse_fn = parse_ofpart_partitions,
+- .name = "ofpart",
++ .parse_fn = parse_fixed_partitions,
++ .name = "fixed-partitions",
+ };
+
+ static int parse_ofoldpart_partitions(struct mtd_info *master,
+@@ -229,4 +229,5 @@ MODULE_AUTHOR("Vitaly Wool, David Gibson
+ * with the same name. Since we provide the ofoldpart parser, we should have
+ * the corresponding alias.
+ */
++MODULE_ALIAS("fixed-partitions");
+ MODULE_ALIAS("ofoldpart");
--- /dev/null
+From 97b0c7c0df3efd7048ed39d7e2dee34cafd55887 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
+Date: Wed, 14 Mar 2018 13:10:44 +0100
+Subject: [PATCH] mtd: ofpart: add of_match_table with "fixed-partitions"
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This allows using this parser with any flash driver that takes care of
+setting of_node (using mtd_set_of_node helper) correctly. Up to now
+support for "fixed-partitions" DT compatibility string was working only
+with flash drivers that were specifying "ofpart" (manually or by letting
+mtd use the default set of parsers).
+
+This matches existing bindings documentation.
+
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Reviewed-by: Brian Norris <computersforpeace@gmail.com>
+Tested-by: Brian Norris <computersforpeace@gmail.com>
+Reviewed-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+---
+ drivers/mtd/ofpart.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/mtd/ofpart.c
++++ b/drivers/mtd/ofpart.c
+@@ -140,9 +140,16 @@ ofpart_none:
+ return ret;
+ }
+
++static const struct of_device_id parse_ofpart_match_table[] = {
++ { .compatible = "fixed-partitions" },
++ {},
++};
++MODULE_DEVICE_TABLE(of, parse_ofpart_match_table);
++
+ static struct mtd_part_parser ofpart_parser = {
+ .parse_fn = parse_fixed_partitions,
+ .name = "fixed-partitions",
++ .of_match_table = parse_ofpart_match_table,
+ };
+
+ static int parse_ofoldpart_partitions(struct mtd_info *master,
--- /dev/null
+From 5ac67ce36cfe38b4c104a42ce52c5c8d526f1c95 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
+Date: Tue, 27 Mar 2018 22:35:41 +0200
+Subject: [PATCH] mtd: move code adding (registering) partitions to the
+ parse_mtd_partitions()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This commit slightly simplifies the code. Every parse_mtd_partitions()
+caller (out of two existing ones) had to add partitions & cleanup parser
+on its own. This moves that responsibility into the function.
+
+That change also allows dropping struct mtd_partitions argument.
+
+There is one minor behavior change caused by this cleanup. If
+parse_mtd_partitions() fails to add partitions (add_mtd_partitions()
+return an error) then mtd_device_parse_register() will still try to
+add (register) fallback partitions. It's a real corner case affecting
+one of uncommon error paths and shouldn't cause any harm.
+
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+---
+ drivers/mtd/mtdcore.c | 14 ++++----------
+ drivers/mtd/mtdcore.h | 1 -
+ drivers/mtd/mtdpart.c | 44 ++++++++++++++++----------------------------
+ 3 files changed, 20 insertions(+), 39 deletions(-)
+
+--- a/drivers/mtd/mtdcore.c
++++ b/drivers/mtd/mtdcore.c
+@@ -686,7 +686,6 @@ int mtd_device_parse_register(struct mtd
+ const struct mtd_partition *parts,
+ int nr_parts)
+ {
+- struct mtd_partitions parsed = { };
+ int ret;
+
+ mtd_set_dev_defaults(mtd);
+@@ -698,13 +697,10 @@ int mtd_device_parse_register(struct mtd
+ }
+
+ /* Prefer parsed partitions over driver-provided fallback */
+- ret = parse_mtd_partitions(mtd, types, &parsed, parser_data);
+- if (!ret && parsed.nr_parts) {
+- parts = parsed.parts;
+- nr_parts = parsed.nr_parts;
+- }
+-
+- if (nr_parts)
++ ret = parse_mtd_partitions(mtd, types, parser_data);
++ if (ret > 0)
++ ret = 0;
++ else if (nr_parts)
+ ret = add_mtd_partitions(mtd, parts, nr_parts);
+ else if (!device_is_registered(&mtd->dev))
+ ret = add_mtd_device(mtd);
+@@ -730,8 +726,6 @@ int mtd_device_parse_register(struct mtd
+ }
+
+ out:
+- /* Cleanup any parsed partitions */
+- mtd_part_parser_cleanup(&parsed);
+ if (ret && device_is_registered(&mtd->dev))
+ del_mtd_device(mtd);
+
+--- a/drivers/mtd/mtdcore.h
++++ b/drivers/mtd/mtdcore.h
+@@ -15,7 +15,6 @@ int del_mtd_partitions(struct mtd_info *
+ struct mtd_partitions;
+
+ int parse_mtd_partitions(struct mtd_info *master, const char * const *types,
+- struct mtd_partitions *pparts,
+ struct mtd_part_parser_data *data);
+
+ void mtd_part_parser_cleanup(struct mtd_partitions *parts);
+--- a/drivers/mtd/mtdpart.c
++++ b/drivers/mtd/mtdpart.c
+@@ -383,20 +383,7 @@ static inline void free_partition(struct
+ */
+ static int mtd_parse_part(struct mtd_part *slave, const char *const *types)
+ {
+- struct mtd_partitions parsed;
+- int err;
+-
+- err = parse_mtd_partitions(&slave->mtd, types, &parsed, NULL);
+- if (err)
+- return err;
+- else if (!parsed.nr_parts)
+- return -ENOENT;
+-
+- err = add_mtd_partitions(&slave->mtd, parsed.parts, parsed.nr_parts);
+-
+- mtd_part_parser_cleanup(&parsed);
+-
+- return err;
++ return parse_mtd_partitions(&slave->mtd, types, NULL);
+ }
+
+ static struct mtd_part *allocate_partition(struct mtd_info *parent,
+@@ -981,30 +968,27 @@ static int mtd_part_of_parse(struct mtd_
+ }
+
+ /**
+- * parse_mtd_partitions - parse MTD partitions
++ * parse_mtd_partitions - parse and register MTD partitions
++ *
+ * @master: the master partition (describes whole MTD device)
+ * @types: names of partition parsers to try or %NULL
+- * @pparts: info about partitions found is returned here
+ * @data: MTD partition parser-specific data
+ *
+- * This function tries to find partition on MTD device @master. It uses MTD
+- * partition parsers, specified in @types. However, if @types is %NULL, then
+- * the default list of parsers is used. The default list contains only the
++ * This function tries to find & register partitions on MTD device @master. It
++ * uses MTD partition parsers, specified in @types. However, if @types is %NULL,
++ * then the default list of parsers is used. The default list contains only the
+ * "cmdlinepart" and "ofpart" parsers ATM.
+ * Note: If there are more then one parser in @types, the kernel only takes the
+ * partitions parsed out by the first parser.
+ *
+ * This function may return:
+ * o a negative error code in case of failure
+- * o zero otherwise, and @pparts will describe the partitions, number of
+- * partitions, and the parser which parsed them. Caller must release
+- * resources with mtd_part_parser_cleanup() when finished with the returned
+- * data.
++ * o number of found partitions otherwise
+ */
+ int parse_mtd_partitions(struct mtd_info *master, const char *const *types,
+- struct mtd_partitions *pparts,
+ struct mtd_part_parser_data *data)
+ {
++ struct mtd_partitions pparts = { };
+ struct mtd_part_parser *parser;
+ int ret, err = 0;
+
+@@ -1018,7 +1002,7 @@ int parse_mtd_partitions(struct mtd_info
+ * handled in a separated function.
+ */
+ if (!strcmp(*types, "ofpart")) {
+- ret = mtd_part_of_parse(master, pparts);
++ ret = mtd_part_of_parse(master, &pparts);
+ } else {
+ pr_debug("%s: parsing partitions %s\n", master->name,
+ *types);
+@@ -1029,13 +1013,17 @@ int parse_mtd_partitions(struct mtd_info
+ parser ? parser->name : NULL);
+ if (!parser)
+ continue;
+- ret = mtd_part_do_parse(parser, master, pparts, data);
++ ret = mtd_part_do_parse(parser, master, &pparts, data);
+ if (ret <= 0)
+ mtd_part_parser_put(parser);
+ }
+ /* Found partitions! */
+- if (ret > 0)
+- return 0;
++ if (ret > 0) {
++ err = add_mtd_partitions(master, pparts.parts,
++ pparts.nr_parts);
++ mtd_part_parser_cleanup(&pparts);
++ return err ? err : pparts.nr_parts;
++ }
+ /*
+ * Stash the first error we see; only report it if no parser
+ * succeeds
--- /dev/null
+From 237ea0d4762cc14d0fc80e80d61f0f08e1050c7f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
+Date: Thu, 12 Apr 2018 07:24:52 +0200
+Subject: [PATCH] mtd: bcm47xxpart: improve handling TRX partition size
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When bcm47xxpart finds a TRX partition (container) it's supposed to jump
+to the end of it and keep looking for more partitions. TRX and its
+subpartitions are handled by a separate parser.
+
+The problem with old code was relying on the length specified in a TRX
+header. That isn't reliable as TRX is commonly modified to have checksum
+cover only non-changing subpartitions. Otherwise modifying e.g. a rootfs
+would result in CRC32 mismatch and bootloader refusing to boot a
+firmware.
+
+Fix it by trying better to figure out a real TRX size. We can securely
+assume that TRX has to cover all subpartitions and the last one is at
+least of a block size in size. Then compare it with a length field.
+
+This makes code more optimal & reliable thanks to skipping data that
+shouldn't be parsed.
+
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+---
+ drivers/mtd/bcm47xxpart.c | 22 ++++++++++++++++++----
+ 1 file changed, 18 insertions(+), 4 deletions(-)
+
+--- a/drivers/mtd/bcm47xxpart.c
++++ b/drivers/mtd/bcm47xxpart.c
+@@ -186,6 +186,8 @@ static int bcm47xxpart_parse(struct mtd_
+ /* TRX */
+ if (buf[0x000 / 4] == TRX_MAGIC) {
+ struct trx_header *trx;
++ uint32_t last_subpart;
++ uint32_t trx_size;
+
+ if (trx_num >= ARRAY_SIZE(trx_parts))
+ pr_warn("No enough space to store another TRX found at 0x%X\n",
+@@ -195,11 +197,23 @@ static int bcm47xxpart_parse(struct mtd_
+ bcm47xxpart_add_part(&parts[curr_part++], "firmware",
+ offset, 0);
+
+- /* Jump to the end of TRX */
++ /*
++ * Try to find TRX size. The "length" field isn't fully
++ * reliable as it could be decreased to make CRC32 cover
++ * only part of TRX data. It's commonly used as checksum
++ * can't cover e.g. ever-changing rootfs partition.
++ * Use offsets as helpers for assuming min TRX size.
++ */
+ trx = (struct trx_header *)buf;
+- offset = roundup(offset + trx->length, blocksize);
+- /* Next loop iteration will increase the offset */
+- offset -= blocksize;
++ last_subpart = max3(trx->offset[0], trx->offset[1],
++ trx->offset[2]);
++ trx_size = max(trx->length, last_subpart + blocksize);
++
++ /*
++ * Skip the TRX data. Decrease offset by block size as
++ * the next loop iteration will increase it.
++ */
++ offset += roundup(trx_size, blocksize) - blocksize;
+ continue;
+ }
+
--- /dev/null
+From cf589ce71e84d3b8811c65740645af254c5248c0 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
+Date: Wed, 9 May 2018 10:17:29 +0200
+Subject: [PATCH] mtd: bcm47xxpart: add of_match_table with a new DT binding
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This allows using bcm47xxpart parser to find partitions on flash
+described in DT using the "brcm,bcm947xx-cfe-partitions" compatible
+property. It means this parser doesn't have to be explicitly selected by
+a flash driver anymore. It can be used e.g. together with a generic
+m25p80 / spi-nor if device is just properly described.
+
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+---
+ drivers/mtd/bcm47xxpart.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/mtd/bcm47xxpart.c
++++ b/drivers/mtd/bcm47xxpart.c
+@@ -304,9 +304,16 @@ static int bcm47xxpart_parse(struct mtd_
+ return curr_part;
+ };
+
++static const struct of_device_id bcm47xxpart_of_match_table[] = {
++ { .compatible = "brcm,bcm947xx-cfe-partitions" },
++ {},
++};
++MODULE_DEVICE_TABLE(of, bcm47xxpart_of_match_table);
++
+ static struct mtd_part_parser bcm47xxpart_mtd_parser = {
+ .parse_fn = bcm47xxpart_parse,
+ .name = "bcm47xxpart",
++ .of_match_table = bcm47xxpart_of_match_table,
+ };
+ module_mtd_part_parser(bcm47xxpart_mtd_parser);
+
--- /dev/null
+From 98534a58c8a40cdc9e3bcb04d74719fbcedfeb52 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
+Date: Tue, 26 Jun 2018 00:05:08 +0200
+Subject: [PATCH] mtd: parsers: trx: add of_match_table with the new DT binding
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This allows using TRX parser to find TRX partitions on flash device
+described in DT using a proper binding. It's useful for devices storing
+firmware on a separated flash and having rootfs partition in it.
+
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+---
+ drivers/mtd/parsers/parser_trx.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/drivers/mtd/parsers/parser_trx.c
++++ b/drivers/mtd/parsers/parser_trx.c
+@@ -116,9 +116,16 @@ static int parser_trx_parse(struct mtd_i
+ return i;
+ };
+
++static const struct of_device_id mtd_parser_trx_of_match_table[] = {
++ { .compatible = "brcm,trx" },
++ {},
++};
++MODULE_DEVICE_TABLE(of, mtd_parser_trx_of_match_table);
++
+ static struct mtd_part_parser mtd_parser_trx = {
+ .parse_fn = parser_trx_parse,
+ .name = "trx",
++ .of_match_table = mtd_parser_trx_of_match_table,
+ };
+ module_mtd_part_parser(mtd_parser_trx);
+
--- /dev/null
+From 76a832254ab05502c9394cc51ded6f0abe0e0bee Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
+Date: Fri, 13 Jul 2018 16:32:21 +0200
+Subject: [PATCH] mtd: partitions: use DT info for parsing partitions with
+ "compatible" prop
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+So far only flash devices could be described in DT regarding partitions
+parsing. That could be done with "partitions" subnode and a proper
+"compatible" string.
+
+Some devices may use hierarchical (multi-level) layouts and may mix used
+layouts (fixed and dynamic). Describing that in DT is done by specifying
+"compatible" for DT-represented partition plus optionally more
+properties and/or subnodes.
+
+To support such layouts each DT partition has to be checked for
+additional description.
+
+Please note this implementation will work in parallel with support for
+partition type specified for non-DT setups. That already works since
+commit 1a0915be1926 ("mtd: partitions: add support for partition
+parsers").
+
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+---
+ drivers/mtd/mtdpart.c | 33 +++++++++++++--------------------
+ 1 file changed, 13 insertions(+), 20 deletions(-)
+
+--- a/drivers/mtd/mtdpart.c
++++ b/drivers/mtd/mtdpart.c
+@@ -370,22 +370,6 @@ static inline void free_partition(struct
+ kfree(p);
+ }
+
+-/**
+- * mtd_parse_part - parse MTD partition looking for subpartitions
+- *
+- * @slave: part that is supposed to be a container and should be parsed
+- * @types: NULL-terminated array with names of partition parsers to try
+- *
+- * Some partitions are kind of containers with extra subpartitions (volumes).
+- * There can be various formats of such containers. This function tries to use
+- * specified parsers to analyze given partition and registers found
+- * subpartitions on success.
+- */
+-static int mtd_parse_part(struct mtd_part *slave, const char *const *types)
+-{
+- return parse_mtd_partitions(&slave->mtd, types, NULL);
+-}
+-
+ static struct mtd_part *allocate_partition(struct mtd_info *parent,
+ const struct mtd_partition *part, int partno,
+ uint64_t cur_offset)
+@@ -783,8 +767,8 @@ int add_mtd_partitions(struct mtd_info *
+
+ add_mtd_device(&slave->mtd);
+ mtd_add_partition_attrs(slave);
+- if (parts[i].types)
+- mtd_parse_part(slave, parts[i].types);
++ /* Look for subpartitions */
++ parse_mtd_partitions(&slave->mtd, parts[i].types, NULL);
+
+ cur_offset = slave->offset + slave->mtd.size;
+ }
+@@ -860,6 +844,12 @@ static const char * const default_mtd_pa
+ NULL
+ };
+
++/* Check DT only when looking for subpartitions. */
++static const char * const default_subpartition_types[] = {
++ "ofpart",
++ NULL
++};
++
+ static int mtd_part_do_parse(struct mtd_part_parser *parser,
+ struct mtd_info *master,
+ struct mtd_partitions *pparts,
+@@ -930,7 +920,9 @@ static int mtd_part_of_parse(struct mtd_
+ const char *fixed = "fixed-partitions";
+ int ret, err = 0;
+
+- np = of_get_child_by_name(mtd_get_of_node(master), "partitions");
++ np = mtd_get_of_node(master);
++ if (!mtd_is_partition(master))
++ np = of_get_child_by_name(np, "partitions");
+ of_property_for_each_string(np, "compatible", prop, compat) {
+ parser = mtd_part_get_compatible_parser(compat);
+ if (!parser)
+@@ -993,7 +985,8 @@ int parse_mtd_partitions(struct mtd_info
+ int ret, err = 0;
+
+ if (!types)
+- types = default_mtd_part_types;
++ types = mtd_is_partition(master) ? default_subpartition_types :
++ default_mtd_part_types;
+
+ for ( ; *types; types++) {
+ /*
--- /dev/null
+From 1186af457cc186c5ed01708da71b1ffbdf0a2638 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
+Date: Tue, 20 Nov 2018 09:55:45 +0100
+Subject: [PATCH] mtd: keep original flags for every struct mtd_info
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When allocating a new partition mtd subsystem runs internal tests in the
+allocate_partition(). They may result in modifying specified flags (e.g.
+dropping some /features/ like write access).
+
+Those constraints don't have to be necessary true for subpartitions. It
+may happen parent partition isn't block aligned (effectively disabling
+write access) while subpartition may fit blocks nicely. In such case all
+checks should be run again (starting with original flags value).
+
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+---
+ drivers/mtd/mtdcore.c | 2 ++
+ drivers/mtd/mtdpart.c | 3 ++-
+ include/linux/mtd/mtd.h | 1 +
+ 3 files changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/mtd/mtdcore.c
++++ b/drivers/mtd/mtdcore.c
+@@ -650,6 +650,8 @@ static void mtd_set_dev_defaults(struct
+ } else {
+ pr_debug("mtd device won't show a device symlink in sysfs\n");
+ }
++
++ mtd->orig_flags = mtd->flags;
+ }
+
+ /**
+--- a/drivers/mtd/mtdpart.c
++++ b/drivers/mtd/mtdpart.c
+@@ -394,7 +394,8 @@ static struct mtd_part *allocate_partiti
+
+ /* set up the MTD object for this partition */
+ slave->mtd.type = parent->type;
+- slave->mtd.flags = parent->flags & ~part->mask_flags;
++ slave->mtd.flags = parent->orig_flags & ~part->mask_flags;
++ slave->mtd.orig_flags = slave->mtd.flags;
+ slave->mtd.size = part->size;
+ slave->mtd.writesize = parent->writesize;
+ slave->mtd.writebufsize = parent->writebufsize;
+--- a/include/linux/mtd/mtd.h
++++ b/include/linux/mtd/mtd.h
+@@ -218,6 +218,7 @@ struct mtd_debug_info {
+ struct mtd_info {
+ u_char type;
+ uint32_t flags;
++ uint32_t orig_flags; /* Flags as before running mtd checks */
+ uint64_t size; // Total size of the MTD
+
+ /* "Major" erase size for the device. Naïve users may take this
--- /dev/null
+From 6750f61a13a0197c40e4a40739117493b15f19e8 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
+Date: Tue, 20 Nov 2018 10:24:09 +0100
+Subject: [PATCH] mtd: improve calculating partition boundaries when checking
+ for alignment
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+When checking for alignment mtd should check absolute offsets. It's
+important for subpartitions as it doesn't make sense to check their
+relative addresses.
+
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
+---
+ drivers/mtd/mtdpart.c | 13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/drivers/mtd/mtdpart.c
++++ b/drivers/mtd/mtdpart.c
+@@ -61,6 +61,15 @@ static inline struct mtd_part *mtd_to_pa
+ return container_of(mtd, struct mtd_part, mtd);
+ }
+
++static u64 part_absolute_offset(struct mtd_info *mtd)
++{
++ struct mtd_part *part = mtd_to_part(mtd);
++
++ if (!mtd_is_partition(mtd))
++ return 0;
++
++ return part_absolute_offset(part->parent) + part->offset;
++}
+
+ /*
+ * MTD methods which simply translate the effective address and pass through
+@@ -562,7 +571,7 @@ static struct mtd_part *allocate_partiti
+ if (!(slave->mtd.flags & MTD_NO_ERASE))
+ wr_alignment = slave->mtd.erasesize;
+
+- tmp = slave->offset;
++ tmp = part_absolute_offset(parent) + slave->offset;
+ remainder = do_div(tmp, wr_alignment);
+ if ((slave->mtd.flags & MTD_WRITEABLE) && remainder) {
+ /* Doesn't start on a boundary of major erase size */
+@@ -573,7 +582,7 @@ static struct mtd_part *allocate_partiti
+ part->name);
+ }
+
+- tmp = slave->mtd.size;
++ tmp = part_absolute_offset(parent) + slave->mtd.size;
+ remainder = do_div(tmp, wr_alignment);
+ if ((slave->mtd.flags & MTD_WRITEABLE) && remainder) {
+ slave->mtd.flags &= ~MTD_WRITEABLE;
--- /dev/null
+From 12acd136913ccdf394eeb2bc8686ff5505368119 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
+Date: Thu, 12 Oct 2017 10:21:26 +0200
+Subject: [PATCH] net: bgmac: enable master mode for BCM54210E and B50212E PHYs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+There are 4 very similar PHYs:
+0x600d84a1: BCM54210E (rev B0)
+0x600d84a2: BCM54210E (rev B1)
+0x600d84a5: B50212E (rev B0)
+0x600d84a6: B50212E (rev B1)
+that need setting master mode manually. It's because they run in slave
+mode by default with Automatic Slave/Master configuration disabled which
+can lead to unreliable connection with massive ping loss.
+
+So far it was reported for a board with BCM47189 SoC and B50212E B1 PHY
+connected to the bgmac supported ethernet device. Telling PHY driver to
+setup PHY properly solves this issue.
+
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/ethernet/broadcom/bgmac-bcma.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
++++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
+@@ -184,13 +184,19 @@ static int bgmac_probe(struct bcma_devic
+
+ if (!bgmac_is_bcm4707_family(core) &&
+ !(ci->id == BCMA_CHIP_ID_BCM53573 && core->core_unit == 1)) {
++ struct phy_device *phydev;
++
+ mii_bus = bcma_mdio_mii_register(bgmac);
+ if (IS_ERR(mii_bus)) {
+ err = PTR_ERR(mii_bus);
+ goto err;
+ }
+-
+ bgmac->mii_bus = mii_bus;
++
++ phydev = mdiobus_get_phy(bgmac->mii_bus, bgmac->phyaddr);
++ if (ci->id == BCMA_CHIP_ID_BCM53573 && phydev &&
++ (phydev->drv->phy_id & phydev->drv->phy_id_mask) == PHY_ID_BCM54210E)
++ phydev->dev_flags |= PHY_BRCM_EN_MASTER_MODE;
+ }
+
+ if (core->bus->hosttype == BCMA_HOSTTYPE_PCI) {
--- /dev/null
+From 2355a6546a053b1c16ebefd6ce1f0cccc00e1da5 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
+Date: Thu, 12 Oct 2017 10:21:25 +0200
+Subject: [PATCH] net: phy: broadcom: support new device flag for setting
+ master mode
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Some of Broadcom's PHYs run by default in slave mode with Automatic
+Slave/Master configuration disabled. It stops them from working properly
+with some devices.
+
+So far it has been verified for BCM54210E and BCM50212E which don't
+work well with Intel's I217-LM and I218-LM:
+http://ark.intel.com/products/60019/Intel-Ethernet-Connection-I217-LM
+http://ark.intel.com/products/71307/Intel-Ethernet-Connection-I218-LM
+I was told there is massive ping loss.
+
+This commit adds support for a new flag which can be set by an ethernet
+driver to fixup PHY setup.
+
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ drivers/net/phy/broadcom.c | 6 ++++++
+ include/linux/brcmphy.h | 1 +
+ 2 files changed, 7 insertions(+)
+
+--- a/drivers/net/phy/broadcom.c
++++ b/drivers/net/phy/broadcom.c
+@@ -43,6 +43,12 @@ static int bcm54210e_config_init(struct
+ val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN;
+ bcm_phy_write_shadow(phydev, BCM54810_SHD_CLK_CTL, val);
+
++ if (phydev->dev_flags & PHY_BRCM_EN_MASTER_MODE) {
++ val = phy_read(phydev, MII_CTRL1000);
++ val |= CTL1000_AS_MASTER | CTL1000_ENABLE_MASTER;
++ phy_write(phydev, MII_CTRL1000, val);
++ }
++
+ return 0;
+ }
+
+--- a/include/linux/brcmphy.h
++++ b/include/linux/brcmphy.h
+@@ -64,6 +64,7 @@
+ #define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000
+ #define PHY_BRCM_CLEAR_RGMII_MODE 0x00004000
+ #define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00008000
++#define PHY_BRCM_EN_MASTER_MODE 0x00010000
+
+ /* Broadcom BCM7xxx specific workarounds */
+ #define PHY_BRCM_7XXX_REV(x) (((x) >> 8) & 0xff)
--- /dev/null
+From f11a04464ae57e8db1bb7634547842b43e36a898 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Jan=20Kundr=C3=A1t?= <jan.kundrat@cesnet.cz>
+Date: Fri, 22 Dec 2017 22:47:16 +0100
+Subject: i2c: gpio: Enable working over slow can_sleep GPIOs
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+"Slow" GPIOs (usually those connected over an SPI or an I2C bus) are,
+well, slow in their operation. It is generally a good idea to avoid
+using them for time-critical operation, but sometimes the hardware just
+sucks, and the software has to cope. In addition to that, the I2C bus
+itself does not actually define any strict timing limits; the bus is
+free to go all the way down to DC. The timeouts (and therefore the
+slowest acceptable frequency) are present only in SMBus.
+
+The `can_sleep` is IMHO a wrong concept to use here. My SPI-to-quad-UART
+chip (MAX14830) is connected via a 26MHz SPI bus, and it happily drives
+SCL at 200kHz (5µs pulses) during my benchmarks. That's faster than the
+maximal allowed speed of the traditional I2C.
+
+The previous version of this code did not really block operation over
+slow GPIO pins, anyway. Instead, it just resorted to printing a warning
+with a backtrace each time a GPIO pin was accessed, thereby slowing
+things down even more.
+
+Finally, it's not just me. A similar patch was originally submitted in
+2015 [1].
+
+[1] https://patchwork.ozlabs.org/patch/450956/
+
+Signed-off-by: Jan Kundrát <jan.kundrat@cesnet.cz>
+Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
+Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
+---
+ drivers/i2c/busses/i2c-gpio.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/drivers/i2c/busses/i2c-gpio.c
++++ b/drivers/i2c/busses/i2c-gpio.c
+@@ -44,7 +44,7 @@ static void i2c_gpio_setsda_val(void *da
+ {
+ struct i2c_gpio_platform_data *pdata = data;
+
+- gpio_set_value(pdata->sda_pin, state);
++ gpio_set_value_cansleep(pdata->sda_pin, state);
+ }
+
+ /* Toggle SCL by changing the direction of the pin. */
+@@ -68,21 +68,21 @@ static void i2c_gpio_setscl_val(void *da
+ {
+ struct i2c_gpio_platform_data *pdata = data;
+
+- gpio_set_value(pdata->scl_pin, state);
++ gpio_set_value_cansleep(pdata->scl_pin, state);
+ }
+
+ static int i2c_gpio_getsda(void *data)
+ {
+ struct i2c_gpio_platform_data *pdata = data;
+
+- return gpio_get_value(pdata->sda_pin);
++ return gpio_get_value_cansleep(pdata->sda_pin);
+ }
+
+ static int i2c_gpio_getscl(void *data)
+ {
+ struct i2c_gpio_platform_data *pdata = data;
+
+- return gpio_get_value(pdata->scl_pin);
++ return gpio_get_value_cansleep(pdata->scl_pin);
+ }
+
+ static int of_i2c_gpio_get_pins(struct device_node *np,
+@@ -175,6 +175,9 @@ static int i2c_gpio_probe(struct platfor
+ memcpy(pdata, dev_get_platdata(&pdev->dev), sizeof(*pdata));
+ }
+
++ if (gpiod_cansleep(gpio_to_desc(pdata->sda_pin)) || gpiod_cansleep(gpio_to_desc(pdata->scl_pin)))
++ dev_warn(&pdev->dev, "Slow GPIO pins might wreak havoc into I2C/SMBus bus timing");
++
+ if (pdata->sda_is_open_drain) {
+ gpio_direction_output(pdata->sda_pin, 1);
+ bit_data->setsda = i2c_gpio_setsda_val;
--- /dev/null
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Thu, 24 May 2018 11:56:48 +0300
+Subject: [PATCH] net: bridge: add support for port isolation
+
+This patch adds support for a new port flag - BR_ISOLATED. If it is set
+then isolated ports cannot communicate between each other, but they can
+still communicate with non-isolated ports. The same can be achieved via
+ACLs but they can't scale with large number of ports and also the
+complexity of the rules grows. This feature can be used to achieve
+isolated vlan functionality (similar to pvlan) as well, though currently
+it will be port-wide (for all vlans on the port). The new test in
+should_deliver uses data that is already cache hot and the new boolean
+is used to avoid an additional source port test in should_deliver.
+
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Reviewed-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+
+--- a/include/uapi/linux/if_link.h
++++ b/include/uapi/linux/if_link.h
+@@ -326,6 +326,8 @@ enum {
+ IFLA_BRPORT_MCAST_TO_UCAST,
+ IFLA_BRPORT_VLAN_TUNNEL,
+ IFLA_BRPORT_BCAST_FLOOD,
++ IFLA_BRPORT_NEIGH_SUPPRESS,
++ IFLA_BRPORT_ISOLATED,
+ __IFLA_BRPORT_MAX
+ };
+ #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
+--- a/net/bridge/br_forward.c
++++ b/net/bridge/br_forward.c
+@@ -30,7 +30,8 @@ static inline int should_deliver(const s
+ vg = nbp_vlan_group_rcu(p);
+ return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
+ br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING &&
+- nbp_switchdev_allowed_egress(p, skb);
++ nbp_switchdev_allowed_egress(p, skb) &&
++ !br_skb_isolated(p, skb);
+ }
+
+ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
+--- a/net/bridge/br_input.c
++++ b/net/bridge/br_input.c
+@@ -170,6 +170,7 @@ int br_handle_frame_finish(struct net *n
+ goto drop;
+
+ BR_INPUT_SKB_CB(skb)->brdev = br->dev;
++ BR_INPUT_SKB_CB(skb)->src_port_isolated = !!(p->flags & BR_ISOLATED);
+
+ if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP))
+ br_do_proxy_arp(skb, br, vid, p);
+--- a/net/bridge/br_netlink.c
++++ b/net/bridge/br_netlink.c
+@@ -138,6 +138,7 @@ static inline size_t br_port_info_size(v
+ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */
+ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */
+ + nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */
++ + nla_total_size(1) /* IFLA_BRPORT_ISOLATED */
+ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
+ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
+ + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
+@@ -208,7 +209,8 @@ static int br_port_fill_attrs(struct sk_
+ p->topology_change_ack) ||
+ nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) ||
+ nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags &
+- BR_VLAN_TUNNEL)))
++ BR_VLAN_TUNNEL)) ||
++ nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)))
+ return -EMSGSIZE;
+
+ timerval = br_timer_value(&p->message_age_timer);
+@@ -637,6 +639,7 @@ static const struct nla_policy br_port_p
+ [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 },
+ [IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 },
+ [IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 },
++ [IFLA_BRPORT_ISOLATED] = { .type = NLA_U8 },
+ };
+
+ /* Change the state of the port and notify spanning tree */
+@@ -773,6 +776,11 @@ static int br_setport(struct net_bridge_
+ return err;
+ }
+ #endif
++
++ err = br_set_port_flag(p, tb, IFLA_BRPORT_ISOLATED, BR_ISOLATED);
++ if (err)
++ return err;
++
+ br_port_flags_change(p, old_flags ^ p->flags);
+ return 0;
+ }
+--- a/net/bridge/br_private.h
++++ b/net/bridge/br_private.h
+@@ -407,6 +407,7 @@ struct br_input_skb_cb {
+ #endif
+
+ bool proxyarp_replied;
++ bool src_port_isolated;
+
+ #ifdef CONFIG_BRIDGE_VLAN_FILTERING
+ bool vlan_filtered;
+@@ -554,6 +555,14 @@ int br_forward_finish(struct net *net, s
+ void br_flood(struct net_bridge *br, struct sk_buff *skb,
+ enum br_pkt_type pkt_type, bool local_rcv, bool local_orig);
+
++/* return true if both source port and dest port are isolated */
++static inline bool br_skb_isolated(const struct net_bridge_port *to,
++ const struct sk_buff *skb)
++{
++ return BR_INPUT_SKB_CB(skb)->src_port_isolated &&
++ (to->flags & BR_ISOLATED);
++}
++
+ /* br_if.c */
+ void br_port_carrier_check(struct net_bridge_port *p);
+ int br_add_bridge(struct net *net, const char *name);
+--- a/net/bridge/br_sysfs_if.c
++++ b/net/bridge/br_sysfs_if.c
+@@ -174,6 +174,7 @@ BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
+ BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
+ BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD);
+ BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD);
++BRPORT_ATTR_FLAG(isolated, BR_ISOLATED);
+
+ #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
+@@ -223,6 +224,7 @@ static const struct brport_attribute *br
+ &brport_attr_proxyarp_wifi,
+ &brport_attr_multicast_flood,
+ &brport_attr_broadcast_flood,
++ &brport_attr_isolated,
+ NULL
+ };
+
+--- a/include/linux/if_bridge.h
++++ b/include/linux/if_bridge.h
+@@ -49,6 +49,7 @@ struct br_ip_list {
+ #define BR_MULTICAST_TO_UNICAST BIT(12)
+ #define BR_VLAN_TUNNEL BIT(13)
+ #define BR_BCAST_FLOOD BIT(14)
++#define BR_ISOLATED BIT(16)
+
+ #define BR_DEFAULT_AGEING_TIME (300 * HZ)
+
--- /dev/null
+From 649affd04813c43e0a72886517fcfccd63230981 Mon Sep 17 00:00:00 2001
+From: Hauke Mehrtens <hauke@hauke-m.de>
+Date: Mon, 29 Jun 2015 16:53:03 +0200
+Subject: uapi/if_ether.h: prevent redefinition of struct ethhdr
+
+Musl provides its own ethhdr struct definition. Add a guard to prevent
+its definition of the appropriate musl header has already been included.
+
+glibc does not implement this header, but when glibc will implement this
+they can just define __UAPI_DEF_ETHHDR 0 to make it work with the
+kernel.
+
+Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
+---
+ include/uapi/linux/if_ether.h | 3 +++
+ include/uapi/linux/libc-compat.h | 6 ++++++
+ 2 files changed, 9 insertions(+)
+
+--- a/include/uapi/linux/if_ether.h
++++ b/include/uapi/linux/if_ether.h
+@@ -23,6 +23,7 @@
+ #define _UAPI_LINUX_IF_ETHER_H
+
+ #include <linux/types.h>
++#include <linux/libc-compat.h>
+
+ /*
+ * IEEE 802.3 Ethernet magic constants. The frame sizes omit the preamble
+@@ -150,11 +151,13 @@
+ * This is an Ethernet frame header.
+ */
+
++#if __UAPI_DEF_ETHHDR
+ struct ethhdr {
+ unsigned char h_dest[ETH_ALEN]; /* destination eth addr */
+ unsigned char h_source[ETH_ALEN]; /* source ether addr */
+ __be16 h_proto; /* packet type ID field */
+ } __attribute__((packed));
++#endif
+
+
+ #endif /* _UAPI_LINUX_IF_ETHER_H */
+--- a/include/uapi/linux/libc-compat.h
++++ b/include/uapi/linux/libc-compat.h
+@@ -264,4 +264,10 @@
+
+ #endif /* __GLIBC__ */
+
++/* Definitions for if_ether.h */
++/* allow libcs like musl to deactivate this, glibc does not implement this. */
++#ifndef __UAPI_DEF_ETHHDR
++#define __UAPI_DEF_ETHHDR 1
++#endif
++
+ #endif /* _UAPI_LIBC_COMPAT_H */
--- /dev/null
+From e58f33cc84bc089c430ac955f3cad6380ae98591 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Thu, 7 Dec 2017 16:28:23 +0100
+Subject: [PATCH] netfilter: add defines for arp/decnet max hooks
+
+The kernel already has defines for this, but they are in uapi exposed
+headers.
+
+Including these from netns.h causes build errors and also adds unneeded
+dependencies on heads that we don't need.
+
+So move these defines to netfilter_defs.h and place the uapi ones
+in ifndef __KERNEL__ to keep them for userspace.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ include/linux/netfilter_defs.h | 6 ++++++
+ include/uapi/linux/netfilter_arp.h | 3 +++
+ include/uapi/linux/netfilter_decnet.h | 4 +++-
+ 3 files changed, 12 insertions(+), 1 deletion(-)
+
+--- a/include/linux/netfilter_defs.h
++++ b/include/linux/netfilter_defs.h
+@@ -7,4 +7,10 @@
+ /* Largest hook number + 1, see uapi/linux/netfilter_decnet.h */
+ #define NF_MAX_HOOKS 8
+
++/* in/out/forward only */
++#define NF_ARP_NUMHOOKS 3
++
++/* max hook is NF_DN_ROUTE (6), also see uapi/linux/netfilter_decnet.h */
++#define NF_DN_NUMHOOKS 7
++
+ #endif
+--- a/include/uapi/linux/netfilter_arp.h
++++ b/include/uapi/linux/netfilter_arp.h
+@@ -15,6 +15,9 @@
+ #define NF_ARP_IN 0
+ #define NF_ARP_OUT 1
+ #define NF_ARP_FORWARD 2
++
++#ifndef __KERNEL__
+ #define NF_ARP_NUMHOOKS 3
++#endif
+
+ #endif /* __LINUX_ARP_NETFILTER_H */
+--- a/include/uapi/linux/netfilter_decnet.h
++++ b/include/uapi/linux/netfilter_decnet.h
+@@ -24,6 +24,9 @@
+ #define NFC_DN_IF_IN 0x0004
+ /* Output device. */
+ #define NFC_DN_IF_OUT 0x0008
++
++/* kernel define is in netfilter_defs.h */
++#define NF_DN_NUMHOOKS 7
+ #endif /* ! __KERNEL__ */
+
+ /* DECnet Hooks */
+@@ -41,7 +44,6 @@
+ #define NF_DN_HELLO 5
+ /* Input Routing Packets */
+ #define NF_DN_ROUTE 6
+-#define NF_DN_NUMHOOKS 7
+
+ enum nf_dn_hook_priorities {
+ NF_DN_PRI_FIRST = INT_MIN,
--- /dev/null
+From 4e645b47c4f000a503b9c90163ad905786b9bc1d Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 1 Dec 2017 00:21:02 +0100
+Subject: [PATCH 02/11] netfilter: core: make nf_unregister_net_hooks simple
+ wrapper again
+
+This reverts commit d3ad2c17b4047
+("netfilter: core: batch nf_unregister_net_hooks synchronize_net calls").
+
+Nothing wrong with it. However, followup patch will delay freeing of hooks
+with call_rcu, so all synchronize_net() calls become obsolete and there
+is no need anymore for this batching.
+
+This revert causes a temporary performance degradation when destroying
+network namespace, but its resolved with the upcoming call_rcu conversion.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ net/netfilter/core.c | 59 +++-------------------------------------------------
+ 1 file changed, 3 insertions(+), 56 deletions(-)
+
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -395,63 +395,10 @@ EXPORT_SYMBOL(nf_register_net_hooks);
+ void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
+ unsigned int hookcount)
+ {
+- struct nf_hook_entries *to_free[16], *p;
+- struct nf_hook_entries __rcu **pp;
+- unsigned int i, j, n;
++ unsigned int i;
+
+- mutex_lock(&nf_hook_mutex);
+- for (i = 0; i < hookcount; i++) {
+- pp = nf_hook_entry_head(net, ®[i]);
+- if (!pp)
+- continue;
+-
+- p = nf_entry_dereference(*pp);
+- if (WARN_ON_ONCE(!p))
+- continue;
+- __nf_unregister_net_hook(p, ®[i]);
+- }
+- mutex_unlock(&nf_hook_mutex);
+-
+- do {
+- n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free));
+-
+- mutex_lock(&nf_hook_mutex);
+-
+- for (i = 0, j = 0; i < hookcount && j < n; i++) {
+- pp = nf_hook_entry_head(net, ®[i]);
+- if (!pp)
+- continue;
+-
+- p = nf_entry_dereference(*pp);
+- if (!p)
+- continue;
+-
+- to_free[j] = __nf_hook_entries_try_shrink(pp);
+- if (to_free[j])
+- ++j;
+- }
+-
+- mutex_unlock(&nf_hook_mutex);
+-
+- if (j) {
+- unsigned int nfq;
+-
+- synchronize_net();
+-
+- /* need 2nd synchronize_net() if nfqueue is used, skb
+- * can get reinjected right before nf_queue_hook_drop()
+- */
+- nfq = nf_queue_nf_hook_drop(net);
+- if (nfq)
+- synchronize_net();
+-
+- for (i = 0; i < j; i++)
+- kvfree(to_free[i]);
+- }
+-
+- reg += n;
+- hookcount -= n;
+- } while (hookcount > 0);
++ for (i = 0; i < hookcount; i++)
++ nf_unregister_net_hook(net, ®[i]);
+ }
+ EXPORT_SYMBOL(nf_unregister_net_hooks);
+
--- /dev/null
+From 26888dfd7e7454686b8d3ea9ba5045d5f236e4d7 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 1 Dec 2017 00:21:03 +0100
+Subject: [PATCH 03/11] netfilter: core: remove synchronize_net call if nfqueue
+ is used
+
+since commit 960632ece6949b ("netfilter: convert hook list to an array")
+nfqueue no longer stores a pointer to the hook that caused the packet
+to be queued. Therefore no extra synchronize_net() call is needed after
+dropping the packets enqueued by the old rule blob.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ include/net/netfilter/nf_queue.h | 2 +-
+ net/netfilter/core.c | 6 +-----
+ net/netfilter/nf_internals.h | 2 +-
+ net/netfilter/nf_queue.c | 7 ++-----
+ net/netfilter/nfnetlink_queue.c | 9 ++-------
+ 5 files changed, 7 insertions(+), 19 deletions(-)
+
+--- a/include/net/netfilter/nf_queue.h
++++ b/include/net/netfilter/nf_queue.h
+@@ -25,7 +25,7 @@ struct nf_queue_entry {
+ struct nf_queue_handler {
+ int (*outfn)(struct nf_queue_entry *entry,
+ unsigned int queuenum);
+- unsigned int (*nf_hook_drop)(struct net *net);
++ void (*nf_hook_drop)(struct net *net);
+ };
+
+ void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh);
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -341,7 +341,6 @@ void nf_unregister_net_hook(struct net *
+ {
+ struct nf_hook_entries __rcu **pp;
+ struct nf_hook_entries *p;
+- unsigned int nfq;
+
+ pp = nf_hook_entry_head(net, reg);
+ if (!pp)
+@@ -364,10 +363,7 @@ void nf_unregister_net_hook(struct net *
+
+ synchronize_net();
+
+- /* other cpu might still process nfqueue verdict that used reg */
+- nfq = nf_queue_nf_hook_drop(net);
+- if (nfq)
+- synchronize_net();
++ nf_queue_nf_hook_drop(net);
+ kvfree(p);
+ }
+ EXPORT_SYMBOL(nf_unregister_net_hook);
+--- a/net/netfilter/nf_internals.h
++++ b/net/netfilter/nf_internals.h
+@@ -10,7 +10,7 @@
+ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
+ const struct nf_hook_entries *entries, unsigned int index,
+ unsigned int verdict);
+-unsigned int nf_queue_nf_hook_drop(struct net *net);
++void nf_queue_nf_hook_drop(struct net *net);
+
+ /* nf_log.c */
+ int __init netfilter_log_init(void);
+--- a/net/netfilter/nf_queue.c
++++ b/net/netfilter/nf_queue.c
+@@ -96,18 +96,15 @@ void nf_queue_entry_get_refs(struct nf_q
+ }
+ EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
+
+-unsigned int nf_queue_nf_hook_drop(struct net *net)
++void nf_queue_nf_hook_drop(struct net *net)
+ {
+ const struct nf_queue_handler *qh;
+- unsigned int count = 0;
+
+ rcu_read_lock();
+ qh = rcu_dereference(net->nf.queue_handler);
+ if (qh)
+- count = qh->nf_hook_drop(net);
++ qh->nf_hook_drop(net);
+ rcu_read_unlock();
+-
+- return count;
+ }
+ EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
+
+--- a/net/netfilter/nfnetlink_queue.c
++++ b/net/netfilter/nfnetlink_queue.c
+@@ -941,23 +941,18 @@ static struct notifier_block nfqnl_dev_n
+ .notifier_call = nfqnl_rcv_dev_event,
+ };
+
+-static unsigned int nfqnl_nf_hook_drop(struct net *net)
++static void nfqnl_nf_hook_drop(struct net *net)
+ {
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+- unsigned int instances = 0;
+ int i;
+
+ for (i = 0; i < INSTANCE_BUCKETS; i++) {
+ struct nfqnl_instance *inst;
+ struct hlist_head *head = &q->instance_table[i];
+
+- hlist_for_each_entry_rcu(inst, head, hlist) {
++ hlist_for_each_entry_rcu(inst, head, hlist)
+ nfqnl_flush(inst, NULL, 0);
+- instances++;
+- }
+ }
+-
+- return instances;
+ }
+
+ static int
--- /dev/null
+From 8c873e2199700c2de7dbd5eedb9d90d5f109462b Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 1 Dec 2017 00:21:04 +0100
+Subject: [PATCH 04/11] netfilter: core: free hooks with call_rcu
+
+Giuseppe Scrivano says:
+ "SELinux, if enabled, registers for each new network namespace 6
+ netfilter hooks."
+
+Cost for this is high. With synchronize_net() removed:
+ "The net benefit on an SMP machine with two cores is that creating a
+ new network namespace takes -40% of the original time."
+
+This patch replaces synchronize_net+kvfree with call_rcu().
+We store rcu_head at the tail of a structure that has no fixed layout,
+i.e. we cannot use offsetof() to compute the start of the original
+allocation. Thus store this information right after the rcu head.
+
+We could simplify this by just placing the rcu_head at the start
+of struct nf_hook_entries. However, this structure is used in
+packet processing hotpath, so only place what is needed for that
+at the beginning of the struct.
+
+Reported-by: Giuseppe Scrivano <gscrivan@redhat.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ include/linux/netfilter.h | 19 +++++++++++++++----
+ net/netfilter/core.c | 34 ++++++++++++++++++++++++++++------
+ 2 files changed, 43 insertions(+), 10 deletions(-)
+
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -77,17 +77,28 @@ struct nf_hook_entry {
+ void *priv;
+ };
+
++struct nf_hook_entries_rcu_head {
++ struct rcu_head head;
++ void *allocation;
++};
++
+ struct nf_hook_entries {
+ u16 num_hook_entries;
+ /* padding */
+ struct nf_hook_entry hooks[];
+
+- /* trailer: pointers to original orig_ops of each hook.
+- *
+- * This is not part of struct nf_hook_entry since its only
+- * needed in slow path (hook register/unregister).
++ /* trailer: pointers to original orig_ops of each hook,
++ * followed by rcu_head and scratch space used for freeing
++ * the structure via call_rcu.
+ *
++ * This is not part of struct nf_hook_entry since its only
++ * needed in slow path (hook register/unregister):
+ * const struct nf_hook_ops *orig_ops[]
++ *
++ * For the same reason, we store this at end -- its
++ * only needed when a hook is deleted, not during
++ * packet path processing:
++ * struct nf_hook_entries_rcu_head head
+ */
+ };
+
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -74,7 +74,8 @@ static struct nf_hook_entries *allocate_
+ struct nf_hook_entries *e;
+ size_t alloc = sizeof(*e) +
+ sizeof(struct nf_hook_entry) * num +
+- sizeof(struct nf_hook_ops *) * num;
++ sizeof(struct nf_hook_ops *) * num +
++ sizeof(struct nf_hook_entries_rcu_head);
+
+ if (num == 0)
+ return NULL;
+@@ -85,6 +86,30 @@ static struct nf_hook_entries *allocate_
+ return e;
+ }
+
++static void __nf_hook_entries_free(struct rcu_head *h)
++{
++ struct nf_hook_entries_rcu_head *head;
++
++ head = container_of(h, struct nf_hook_entries_rcu_head, head);
++ kvfree(head->allocation);
++}
++
++static void nf_hook_entries_free(struct nf_hook_entries *e)
++{
++ struct nf_hook_entries_rcu_head *head;
++ struct nf_hook_ops **ops;
++ unsigned int num;
++
++ if (!e)
++ return;
++
++ num = e->num_hook_entries;
++ ops = nf_hook_entries_get_hook_ops(e);
++ head = (void *)&ops[num];
++ head->allocation = e;
++ call_rcu(&head->head, __nf_hook_entries_free);
++}
++
+ static unsigned int accept_all(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+@@ -291,9 +316,8 @@ int nf_register_net_hook(struct net *net
+ #ifdef HAVE_JUMP_LABEL
+ static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
+ #endif
+- synchronize_net();
+ BUG_ON(p == new_hooks);
+- kvfree(p);
++ nf_hook_entries_free(p);
+ return 0;
+ }
+ EXPORT_SYMBOL(nf_register_net_hook);
+@@ -361,10 +385,8 @@ void nf_unregister_net_hook(struct net *
+ if (!p)
+ return;
+
+- synchronize_net();
+-
+ nf_queue_nf_hook_drop(net);
+- kvfree(p);
++ nf_hook_entries_free(p);
+ }
+ EXPORT_SYMBOL(nf_unregister_net_hook);
+
--- /dev/null
+From b0f38338aef2dae5ade3c16acf713737e3b15a73 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Sun, 3 Dec 2017 00:58:47 +0100
+Subject: [PATCH 05/11] netfilter: reduce size of hook entry point locations
+
+struct net contains:
+
+struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+
+which store the hook entry point locations for the various protocol
+families and the hooks.
+
+Using array results in compact c code when doing accesses, i.e.
+ x = rcu_dereference(net->nf.hooks[pf][hook]);
+
+but its also wasting a lot of memory, as most families are
+not used.
+
+So split the array into those families that are used, which
+are only 5 (instead of 13). In most cases, the 'pf' argument is
+constant, i.e. gcc removes switch statement.
+
+struct net before:
+ /* size: 5184, cachelines: 81, members: 46 */
+after:
+ /* size: 4672, cachelines: 73, members: 46 */
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ include/linux/netfilter.h | 24 ++++++++++++++++++++++--
+ include/net/netns/netfilter.h | 6 +++++-
+ net/bridge/br_netfilter_hooks.c | 2 +-
+ net/netfilter/core.c | 38 ++++++++++++++++++++++++++++++--------
+ net/netfilter/nf_queue.c | 21 +++++++++++++++++++--
+ 5 files changed, 77 insertions(+), 14 deletions(-)
+
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -195,7 +195,7 @@ static inline int nf_hook(u_int8_t pf, u
+ struct net_device *indev, struct net_device *outdev,
+ int (*okfn)(struct net *, struct sock *, struct sk_buff *))
+ {
+- struct nf_hook_entries *hook_head;
++ struct nf_hook_entries *hook_head = NULL;
+ int ret = 1;
+
+ #ifdef HAVE_JUMP_LABEL
+@@ -206,7 +206,27 @@ static inline int nf_hook(u_int8_t pf, u
+ #endif
+
+ rcu_read_lock();
+- hook_head = rcu_dereference(net->nf.hooks[pf][hook]);
++ switch (pf) {
++ case NFPROTO_IPV4:
++ hook_head = rcu_dereference(net->nf.hooks_ipv4[hook]);
++ break;
++ case NFPROTO_IPV6:
++ hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]);
++ break;
++ case NFPROTO_ARP:
++ hook_head = rcu_dereference(net->nf.hooks_arp[hook]);
++ break;
++ case NFPROTO_BRIDGE:
++ hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
++ break;
++ case NFPROTO_DECNET:
++ hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
++ break;
++ default:
++ WARN_ON_ONCE(1);
++ break;
++ }
++
+ if (hook_head) {
+ struct nf_hook_state state;
+
+--- a/include/net/netns/netfilter.h
++++ b/include/net/netns/netfilter.h
+@@ -17,7 +17,11 @@ struct netns_nf {
+ #ifdef CONFIG_SYSCTL
+ struct ctl_table_header *nf_log_dir_header;
+ #endif
+- struct nf_hook_entries __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
++ struct nf_hook_entries __rcu *hooks_ipv4[NF_MAX_HOOKS];
++ struct nf_hook_entries __rcu *hooks_ipv6[NF_MAX_HOOKS];
++ struct nf_hook_entries __rcu *hooks_arp[NF_MAX_HOOKS];
++ struct nf_hook_entries __rcu *hooks_bridge[NF_MAX_HOOKS];
++ struct nf_hook_entries __rcu *hooks_decnet[NF_MAX_HOOKS];
+ #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+ bool defrag_ipv4;
+ #endif
+--- a/net/bridge/br_netfilter_hooks.c
++++ b/net/bridge/br_netfilter_hooks.c
+@@ -992,7 +992,7 @@ int br_nf_hook_thresh(unsigned int hook,
+ unsigned int i;
+ int ret;
+
+- e = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
++ e = rcu_dereference(net->nf.hooks_bridge[hook]);
+ if (!e)
+ return okfn(net, sk, skb);
+
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -264,8 +264,23 @@ out_assign:
+
+ static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
+ {
+- if (reg->pf != NFPROTO_NETDEV)
+- return net->nf.hooks[reg->pf]+reg->hooknum;
++ switch (reg->pf) {
++ case NFPROTO_NETDEV:
++ break;
++ case NFPROTO_ARP:
++ return net->nf.hooks_arp + reg->hooknum;
++ case NFPROTO_BRIDGE:
++ return net->nf.hooks_bridge + reg->hooknum;
++ case NFPROTO_IPV4:
++ return net->nf.hooks_ipv4 + reg->hooknum;
++ case NFPROTO_IPV6:
++ return net->nf.hooks_ipv6 + reg->hooknum;
++ case NFPROTO_DECNET:
++ return net->nf.hooks_decnet + reg->hooknum;
++ default:
++ WARN_ON_ONCE(1);
++ return NULL;
++ }
+
+ #ifdef CONFIG_NETFILTER_INGRESS
+ if (reg->hooknum == NF_NETDEV_INGRESS) {
+@@ -534,14 +549,21 @@ void (*nf_nat_decode_session_hook)(struc
+ EXPORT_SYMBOL(nf_nat_decode_session_hook);
+ #endif
+
+-static int __net_init netfilter_net_init(struct net *net)
++static void __net_init __netfilter_net_init(struct nf_hook_entries *e[NF_MAX_HOOKS])
+ {
+- int i, h;
++ int h;
+
+- for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
+- for (h = 0; h < NF_MAX_HOOKS; h++)
+- RCU_INIT_POINTER(net->nf.hooks[i][h], NULL);
+- }
++ for (h = 0; h < NF_MAX_HOOKS; h++)
++ RCU_INIT_POINTER(e[h], NULL);
++}
++
++static int __net_init netfilter_net_init(struct net *net)
++{
++ __netfilter_net_init(net->nf.hooks_ipv4);
++ __netfilter_net_init(net->nf.hooks_ipv6);
++ __netfilter_net_init(net->nf.hooks_arp);
++ __netfilter_net_init(net->nf.hooks_bridge);
++ __netfilter_net_init(net->nf.hooks_decnet);
+
+ #ifdef CONFIG_PROC_FS
+ net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
+--- a/net/netfilter/nf_queue.c
++++ b/net/netfilter/nf_queue.c
+@@ -201,6 +201,23 @@ repeat:
+ return NF_ACCEPT;
+ }
+
++static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum)
++{
++ switch (pf) {
++ case NFPROTO_BRIDGE:
++ return rcu_dereference(net->nf.hooks_bridge[hooknum]);
++ case NFPROTO_IPV4:
++ return rcu_dereference(net->nf.hooks_ipv4[hooknum]);
++ case NFPROTO_IPV6:
++ return rcu_dereference(net->nf.hooks_ipv6[hooknum]);
++ default:
++ WARN_ON_ONCE(1);
++ return NULL;
++ }
++
++ return NULL;
++}
++
+ /* Caller must hold rcu read-side lock */
+ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
+ {
+@@ -216,12 +233,12 @@ void nf_reinject(struct nf_queue_entry *
+ net = entry->state.net;
+ pf = entry->state.pf;
+
+- hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]);
++ hooks = nf_hook_entries_head(net, pf, entry->state.hook);
+
+ nf_queue_entry_release_refs(entry);
+
+ i = entry->hook_index;
+- if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) {
++ if (WARN_ON_ONCE(!hooks || i >= hooks->num_hook_entries)) {
+ kfree_skb(skb);
+ kfree(entry);
+ return;
--- /dev/null
+From ef57170bbfdd6958281011332b1fd237712f69f0 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Thu, 7 Dec 2017 16:28:24 +0100
+Subject: [PATCH 06/11] netfilter: reduce hook array sizes to what is needed
+
+Not all families share the same hook count, adjust sizes to what is
+needed.
+
+struct net before:
+/* size: 6592, cachelines: 103, members: 46 */
+after:
+/* size: 5952, cachelines: 93, members: 46 */
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ include/net/netns/netfilter.h | 10 +++++-----
+ net/netfilter/core.c | 24 +++++++++++++++++-------
+ 2 files changed, 22 insertions(+), 12 deletions(-)
+
+--- a/include/net/netns/netfilter.h
++++ b/include/net/netns/netfilter.h
+@@ -17,11 +17,11 @@ struct netns_nf {
+ #ifdef CONFIG_SYSCTL
+ struct ctl_table_header *nf_log_dir_header;
+ #endif
+- struct nf_hook_entries __rcu *hooks_ipv4[NF_MAX_HOOKS];
+- struct nf_hook_entries __rcu *hooks_ipv6[NF_MAX_HOOKS];
+- struct nf_hook_entries __rcu *hooks_arp[NF_MAX_HOOKS];
+- struct nf_hook_entries __rcu *hooks_bridge[NF_MAX_HOOKS];
+- struct nf_hook_entries __rcu *hooks_decnet[NF_MAX_HOOKS];
++ struct nf_hook_entries __rcu *hooks_ipv4[NF_INET_NUMHOOKS];
++ struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS];
++ struct nf_hook_entries __rcu *hooks_arp[NF_ARP_NUMHOOKS];
++ struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS];
++ struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS];
+ #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+ bool defrag_ipv4;
+ #endif
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -268,14 +268,24 @@ static struct nf_hook_entries __rcu **nf
+ case NFPROTO_NETDEV:
+ break;
+ case NFPROTO_ARP:
++ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= reg->hooknum))
++ return NULL;
+ return net->nf.hooks_arp + reg->hooknum;
+ case NFPROTO_BRIDGE:
++ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= reg->hooknum))
++ return NULL;
+ return net->nf.hooks_bridge + reg->hooknum;
+ case NFPROTO_IPV4:
++ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= reg->hooknum))
++ return NULL;
+ return net->nf.hooks_ipv4 + reg->hooknum;
+ case NFPROTO_IPV6:
++ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= reg->hooknum))
++ return NULL;
+ return net->nf.hooks_ipv6 + reg->hooknum;
+ case NFPROTO_DECNET:
++ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= reg->hooknum))
++ return NULL;
+ return net->nf.hooks_decnet + reg->hooknum;
+ default:
+ WARN_ON_ONCE(1);
+@@ -549,21 +559,21 @@ void (*nf_nat_decode_session_hook)(struc
+ EXPORT_SYMBOL(nf_nat_decode_session_hook);
+ #endif
+
+-static void __net_init __netfilter_net_init(struct nf_hook_entries *e[NF_MAX_HOOKS])
++static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max)
+ {
+ int h;
+
+- for (h = 0; h < NF_MAX_HOOKS; h++)
++ for (h = 0; h < max; h++)
+ RCU_INIT_POINTER(e[h], NULL);
+ }
+
+ static int __net_init netfilter_net_init(struct net *net)
+ {
+- __netfilter_net_init(net->nf.hooks_ipv4);
+- __netfilter_net_init(net->nf.hooks_ipv6);
+- __netfilter_net_init(net->nf.hooks_arp);
+- __netfilter_net_init(net->nf.hooks_bridge);
+- __netfilter_net_init(net->nf.hooks_decnet);
++ __netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4));
++ __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6));
++ __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp));
++ __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
++ __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
+
+ #ifdef CONFIG_PROC_FS
+ net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
--- /dev/null
+From bb4badf3a3dc81190f7c1c1fa063cdefb18df45f Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Thu, 7 Dec 2017 16:28:25 +0100
+Subject: [PATCH 07/11] netfilter: don't allocate space for decnet hooks unless
+ needed
+
+no need to define hook points if the family isn't supported.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ include/linux/netfilter.h | 2 ++
+ include/net/netns/netfilter.h | 2 ++
+ net/netfilter/core.c | 4 ++++
+ 3 files changed, 8 insertions(+)
+
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -219,9 +219,11 @@ static inline int nf_hook(u_int8_t pf, u
+ case NFPROTO_BRIDGE:
+ hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
+ break;
++#if IS_ENABLED(CONFIG_DECNET)
+ case NFPROTO_DECNET:
+ hook_head = rcu_dereference(net->nf.hooks_decnet[hook]);
+ break;
++#endif
+ default:
+ WARN_ON_ONCE(1);
+ break;
+--- a/include/net/netns/netfilter.h
++++ b/include/net/netns/netfilter.h
+@@ -21,7 +21,9 @@ struct netns_nf {
+ struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS];
+ struct nf_hook_entries __rcu *hooks_arp[NF_ARP_NUMHOOKS];
+ struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS];
++#if IS_ENABLED(CONFIG_DECNET)
+ struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS];
++#endif
+ #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+ bool defrag_ipv4;
+ #endif
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -283,10 +283,12 @@ static struct nf_hook_entries __rcu **nf
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= reg->hooknum))
+ return NULL;
+ return net->nf.hooks_ipv6 + reg->hooknum;
++#if IS_ENABLED(CONFIG_DECNET)
+ case NFPROTO_DECNET:
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= reg->hooknum))
+ return NULL;
+ return net->nf.hooks_decnet + reg->hooknum;
++#endif
+ default:
+ WARN_ON_ONCE(1);
+ return NULL;
+@@ -573,7 +575,9 @@ static int __net_init netfilter_net_init
+ __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6));
+ __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp));
+ __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
++#if IS_ENABLED(CONFIG_DECNET)
+ __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
++#endif
+
+ #ifdef CONFIG_PROC_FS
+ net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
--- /dev/null
+From 2a95183a5e0375df756efb2ca37602d71e8455f9 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Thu, 7 Dec 2017 16:28:26 +0100
+Subject: [PATCH 08/11] netfilter: don't allocate space for arp/bridge hooks
+ unless needed
+
+no need to define hook points if the family isn't supported.
+Because we need these hooks for either nftables, arp/ebtables
+or the 'call-iptables' hack we have in the bridge layer add two
+new dependencies, NETFILTER_FAMILY_{ARP,BRIDGE}, and have the
+users select them.
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ include/linux/netfilter.h | 4 ++++
+ include/net/netns/netfilter.h | 4 ++++
+ net/Kconfig | 1 +
+ net/bridge/netfilter/Kconfig | 2 ++
+ net/ipv4/netfilter/Kconfig | 2 ++
+ net/netfilter/Kconfig | 6 ++++++
+ net/netfilter/core.c | 8 ++++++++
+ net/netfilter/nf_queue.c | 2 ++
+ 8 files changed, 29 insertions(+)
+
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -214,10 +214,14 @@ static inline int nf_hook(u_int8_t pf, u
+ hook_head = rcu_dereference(net->nf.hooks_ipv6[hook]);
+ break;
+ case NFPROTO_ARP:
++#ifdef CONFIG_NETFILTER_FAMILY_ARP
+ hook_head = rcu_dereference(net->nf.hooks_arp[hook]);
++#endif
+ break;
+ case NFPROTO_BRIDGE:
++#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ hook_head = rcu_dereference(net->nf.hooks_bridge[hook]);
++#endif
+ break;
+ #if IS_ENABLED(CONFIG_DECNET)
+ case NFPROTO_DECNET:
+--- a/include/net/netns/netfilter.h
++++ b/include/net/netns/netfilter.h
+@@ -19,8 +19,12 @@ struct netns_nf {
+ #endif
+ struct nf_hook_entries __rcu *hooks_ipv4[NF_INET_NUMHOOKS];
+ struct nf_hook_entries __rcu *hooks_ipv6[NF_INET_NUMHOOKS];
++#ifdef CONFIG_NETFILTER_FAMILY_ARP
+ struct nf_hook_entries __rcu *hooks_arp[NF_ARP_NUMHOOKS];
++#endif
++#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ struct nf_hook_entries __rcu *hooks_bridge[NF_INET_NUMHOOKS];
++#endif
+ #if IS_ENABLED(CONFIG_DECNET)
+ struct nf_hook_entries __rcu *hooks_decnet[NF_DN_NUMHOOKS];
+ #endif
+--- a/net/Kconfig
++++ b/net/Kconfig
+@@ -182,6 +182,7 @@ config BRIDGE_NETFILTER
+ depends on BRIDGE
+ depends on NETFILTER && INET
+ depends on NETFILTER_ADVANCED
++ select NETFILTER_FAMILY_BRIDGE
+ default m
+ ---help---
+ Enabling this option will let arptables resp. iptables see bridged
+--- a/net/bridge/netfilter/Kconfig
++++ b/net/bridge/netfilter/Kconfig
+@@ -4,6 +4,7 @@
+ #
+ menuconfig NF_TABLES_BRIDGE
+ depends on BRIDGE && NETFILTER && NF_TABLES
++ select NETFILTER_FAMILY_BRIDGE
+ tristate "Ethernet Bridge nf_tables support"
+
+ if NF_TABLES_BRIDGE
+@@ -29,6 +30,7 @@ endif # NF_TABLES_BRIDGE
+ menuconfig BRIDGE_NF_EBTABLES
+ tristate "Ethernet Bridge tables (ebtables) support"
+ depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
++ select NETFILTER_FAMILY_BRIDGE
+ help
+ ebtables is a general, extensible frame/packet identification
+ framework. Say 'Y' or 'M' here if you want to do Ethernet
+--- a/net/ipv4/netfilter/Kconfig
++++ b/net/ipv4/netfilter/Kconfig
+@@ -72,6 +72,7 @@ endif # NF_TABLES_IPV4
+
+ config NF_TABLES_ARP
+ tristate "ARP nf_tables support"
++ select NETFILTER_FAMILY_ARP
+ help
+ This option enables the ARP support for nf_tables.
+
+@@ -392,6 +393,7 @@ endif # IP_NF_IPTABLES
+ config IP_NF_ARPTABLES
+ tristate "ARP tables support"
+ select NETFILTER_XTABLES
++ select NETFILTER_FAMILY_ARP
+ depends on NETFILTER_ADVANCED
+ help
+ arptables is a general, extensible packet identification framework.
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -12,6 +12,12 @@ config NETFILTER_INGRESS
+ config NETFILTER_NETLINK
+ tristate
+
++config NETFILTER_FAMILY_BRIDGE
++ bool
++
++config NETFILTER_FAMILY_ARP
++ bool
++
+ config NETFILTER_NETLINK_ACCT
+ tristate "Netfilter NFACCT over NFNETLINK interface"
+ depends on NETFILTER_ADVANCED
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -267,14 +267,18 @@ static struct nf_hook_entries __rcu **nf
+ switch (reg->pf) {
+ case NFPROTO_NETDEV:
+ break;
++#ifdef CONFIG_NETFILTER_FAMILY_ARP
+ case NFPROTO_ARP:
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= reg->hooknum))
+ return NULL;
+ return net->nf.hooks_arp + reg->hooknum;
++#endif
++#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ case NFPROTO_BRIDGE:
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= reg->hooknum))
+ return NULL;
+ return net->nf.hooks_bridge + reg->hooknum;
++#endif
+ case NFPROTO_IPV4:
+ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= reg->hooknum))
+ return NULL;
+@@ -573,8 +577,12 @@ static int __net_init netfilter_net_init
+ {
+ __netfilter_net_init(net->nf.hooks_ipv4, ARRAY_SIZE(net->nf.hooks_ipv4));
+ __netfilter_net_init(net->nf.hooks_ipv6, ARRAY_SIZE(net->nf.hooks_ipv6));
++#ifdef CONFIG_NETFILTER_FAMILY_ARP
+ __netfilter_net_init(net->nf.hooks_arp, ARRAY_SIZE(net->nf.hooks_arp));
++#endif
++#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ __netfilter_net_init(net->nf.hooks_bridge, ARRAY_SIZE(net->nf.hooks_bridge));
++#endif
+ #if IS_ENABLED(CONFIG_DECNET)
+ __netfilter_net_init(net->nf.hooks_decnet, ARRAY_SIZE(net->nf.hooks_decnet));
+ #endif
+--- a/net/netfilter/nf_queue.c
++++ b/net/netfilter/nf_queue.c
+@@ -204,8 +204,10 @@ repeat:
+ static struct nf_hook_entries *nf_hook_entries_head(const struct net *net, u8 pf, u8 hooknum)
+ {
+ switch (pf) {
++#ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ case NFPROTO_BRIDGE:
+ return rcu_dereference(net->nf.hooks_bridge[hooknum]);
++#endif
+ case NFPROTO_IPV4:
+ return rcu_dereference(net->nf.hooks_ipv4[hooknum]);
+ case NFPROTO_IPV6:
--- /dev/null
+From 62a0fe46e2aaba1812d3cbcae014a41539f9eb09 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sat, 9 Dec 2017 15:23:51 +0100
+Subject: [PATCH 09/11] netfilter: core: pass hook number, family and device to
+ nf_find_hook_list()
+
+Instead of passing struct nf_hook_ops, this is needed by follow up
+patches to handle NFPROTO_INET from the core.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ net/netfilter/core.c | 36 +++++++++++++++++++-----------------
+ 1 file changed, 19 insertions(+), 17 deletions(-)
+
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -262,36 +262,38 @@ out_assign:
+ return old;
+ }
+
+-static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg)
++static struct nf_hook_entries __rcu **
++nf_hook_entry_head(struct net *net, int pf, unsigned int hooknum,
++ struct net_device *dev)
+ {
+- switch (reg->pf) {
++ switch (pf) {
+ case NFPROTO_NETDEV:
+ break;
+ #ifdef CONFIG_NETFILTER_FAMILY_ARP
+ case NFPROTO_ARP:
+- if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= reg->hooknum))
++ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_arp) <= hooknum))
+ return NULL;
+- return net->nf.hooks_arp + reg->hooknum;
++ return net->nf.hooks_arp + hooknum;
+ #endif
+ #ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
+ case NFPROTO_BRIDGE:
+- if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= reg->hooknum))
++ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_bridge) <= hooknum))
+ return NULL;
+- return net->nf.hooks_bridge + reg->hooknum;
++ return net->nf.hooks_bridge + hooknum;
+ #endif
+ case NFPROTO_IPV4:
+- if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= reg->hooknum))
++ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv4) <= hooknum))
+ return NULL;
+- return net->nf.hooks_ipv4 + reg->hooknum;
++ return net->nf.hooks_ipv4 + hooknum;
+ case NFPROTO_IPV6:
+- if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= reg->hooknum))
++ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_ipv6) <= hooknum))
+ return NULL;
+- return net->nf.hooks_ipv6 + reg->hooknum;
++ return net->nf.hooks_ipv6 + hooknum;
+ #if IS_ENABLED(CONFIG_DECNET)
+ case NFPROTO_DECNET:
+- if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= reg->hooknum))
++ if (WARN_ON_ONCE(ARRAY_SIZE(net->nf.hooks_decnet) <= hooknum))
+ return NULL;
+- return net->nf.hooks_decnet + reg->hooknum;
++ return net->nf.hooks_decnet + hooknum;
+ #endif
+ default:
+ WARN_ON_ONCE(1);
+@@ -299,9 +301,9 @@ static struct nf_hook_entries __rcu **nf
+ }
+
+ #ifdef CONFIG_NETFILTER_INGRESS
+- if (reg->hooknum == NF_NETDEV_INGRESS) {
+- if (reg->dev && dev_net(reg->dev) == net)
+- return ®->dev->nf_hooks_ingress;
++ if (hooknum == NF_NETDEV_INGRESS) {
++ if (dev && dev_net(dev) == net)
++ return &dev->nf_hooks_ingress;
+ }
+ #endif
+ WARN_ON_ONCE(1);
+@@ -323,7 +325,7 @@ int nf_register_net_hook(struct net *net
+ return -EINVAL;
+ }
+
+- pp = nf_hook_entry_head(net, reg);
++ pp = nf_hook_entry_head(net, reg->pf, reg->hooknum, reg->dev);
+ if (!pp)
+ return -EINVAL;
+
+@@ -397,7 +399,7 @@ void nf_unregister_net_hook(struct net *
+ struct nf_hook_entries __rcu **pp;
+ struct nf_hook_entries *p;
+
+- pp = nf_hook_entry_head(net, reg);
++ pp = nf_hook_entry_head(net, reg->pf, reg->hooknum, reg->dev);
+ if (!pp)
+ return;
+
--- /dev/null
+From 3d3cdc38e8c265a9f9d3825e823e772872bca1b8 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sat, 9 Dec 2017 15:19:14 +0100
+Subject: [PATCH 01/11] netfilter: core: add nf_remove_net_hook
+
+Just a cleanup, __nf_unregister_net_hook() is used by a follow up patch
+when handling NFPROTO_INET as a real family from the core.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ net/netfilter/core.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -356,7 +356,7 @@ int nf_register_net_hook(struct net *net
+ EXPORT_SYMBOL(nf_register_net_hook);
+
+ /*
+- * __nf_unregister_net_hook - remove a hook from blob
++ * nf_remove_net_hook - remove a hook from blob
+ *
+ * @oldp: current address of hook blob
+ * @unreg: hook to unregister
+@@ -364,8 +364,8 @@ EXPORT_SYMBOL(nf_register_net_hook);
+ * This cannot fail, hook unregistration must always succeed.
+ * Therefore replace the to-be-removed hook with a dummy hook.
+ */
+-static void __nf_unregister_net_hook(struct nf_hook_entries *old,
+- const struct nf_hook_ops *unreg)
++static void nf_remove_net_hook(struct nf_hook_entries *old,
++ const struct nf_hook_ops *unreg)
+ {
+ struct nf_hook_ops **orig_ops;
+ bool found = false;
+@@ -411,7 +411,7 @@ void nf_unregister_net_hook(struct net *
+ return;
+ }
+
+- __nf_unregister_net_hook(p, reg);
++ nf_remove_net_hook(p, reg);
+
+ p = __nf_hook_entries_try_shrink(pp);
+ mutex_unlock(&nf_hook_mutex);
--- /dev/null
+From 30259408118f550f5969fda19c0d67020d21eda8 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sat, 9 Dec 2017 15:26:37 +0100
+Subject: [PATCH 10/11] netfilter: core: pass family as parameter to
+ nf_remove_net_hook()
+
+So static_key_slow_dec applies to the family behind NFPROTO_INET.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ net/netfilter/core.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -365,7 +365,7 @@ EXPORT_SYMBOL(nf_register_net_hook);
+ * Therefore replace the to-be-removed hook with a dummy hook.
+ */
+ static void nf_remove_net_hook(struct nf_hook_entries *old,
+- const struct nf_hook_ops *unreg)
++ const struct nf_hook_ops *unreg, int pf)
+ {
+ struct nf_hook_ops **orig_ops;
+ bool found = false;
+@@ -383,14 +383,14 @@ static void nf_remove_net_hook(struct nf
+
+ if (found) {
+ #ifdef CONFIG_NETFILTER_INGRESS
+- if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
++ if (pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS)
+ net_dec_ingress_queue();
+ #endif
+ #ifdef HAVE_JUMP_LABEL
+- static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]);
++ static_key_slow_dec(&nf_hooks_needed[pf][unreg->hooknum]);
+ #endif
+ } else {
+- WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum);
++ WARN_ONCE(1, "hook not found, pf %d num %d", pf, unreg->hooknum);
+ }
+ }
+
+@@ -411,7 +411,7 @@ void nf_unregister_net_hook(struct net *
+ return;
+ }
+
+- nf_remove_net_hook(p, reg);
++ nf_remove_net_hook(p, reg, reg->pf);
+
+ p = __nf_hook_entries_try_shrink(pp);
+ mutex_unlock(&nf_hook_mutex);
--- /dev/null
+From cb7ccd835ebb333669e400f99c650e4f3abf11c0 Mon Sep 17 00:00:00 2001
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sat, 9 Dec 2017 15:30:26 +0100
+Subject: [PATCH 11/11] netfilter: core: support for NFPROTO_INET hook
+ registration
+
+Expand NFPROTO_INET in two hook registrations, one for NFPROTO_IPV4 and
+another for NFPROTO_IPV6. Hence, we handle NFPROTO_INET from the core.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ net/netfilter/core.c | 53 +++++++++++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 44 insertions(+), 9 deletions(-)
+
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -310,12 +310,13 @@ nf_hook_entry_head(struct net *net, int
+ return NULL;
+ }
+
+-int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
++static int __nf_register_net_hook(struct net *net, int pf,
++ const struct nf_hook_ops *reg)
+ {
+ struct nf_hook_entries *p, *new_hooks;
+ struct nf_hook_entries __rcu **pp;
+
+- if (reg->pf == NFPROTO_NETDEV) {
++ if (pf == NFPROTO_NETDEV) {
+ #ifndef CONFIG_NETFILTER_INGRESS
+ if (reg->hooknum == NF_NETDEV_INGRESS)
+ return -EOPNOTSUPP;
+@@ -325,7 +326,7 @@ int nf_register_net_hook(struct net *net
+ return -EINVAL;
+ }
+
+- pp = nf_hook_entry_head(net, reg->pf, reg->hooknum, reg->dev);
++ pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
+ if (!pp)
+ return -EINVAL;
+
+@@ -343,17 +344,16 @@ int nf_register_net_hook(struct net *net
+
+ hooks_validate(new_hooks);
+ #ifdef CONFIG_NETFILTER_INGRESS
+- if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
++ if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
+ net_inc_ingress_queue();
+ #endif
+ #ifdef HAVE_JUMP_LABEL
+- static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
++ static_key_slow_inc(&nf_hooks_needed[pf][reg->hooknum]);
+ #endif
+ BUG_ON(p == new_hooks);
+ nf_hook_entries_free(p);
+ return 0;
+ }
+-EXPORT_SYMBOL(nf_register_net_hook);
+
+ /*
+ * nf_remove_net_hook - remove a hook from blob
+@@ -394,12 +394,13 @@ static void nf_remove_net_hook(struct nf
+ }
+ }
+
+-void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
++void __nf_unregister_net_hook(struct net *net, int pf,
++ const struct nf_hook_ops *reg)
+ {
+ struct nf_hook_entries __rcu **pp;
+ struct nf_hook_entries *p;
+
+- pp = nf_hook_entry_head(net, reg->pf, reg->hooknum, reg->dev);
++ pp = nf_hook_entry_head(net, pf, reg->hooknum, reg->dev);
+ if (!pp)
+ return;
+
+@@ -411,7 +412,7 @@ void nf_unregister_net_hook(struct net *
+ return;
+ }
+
+- nf_remove_net_hook(p, reg, reg->pf);
++ nf_remove_net_hook(p, reg, pf);
+
+ p = __nf_hook_entries_try_shrink(pp);
+ mutex_unlock(&nf_hook_mutex);
+@@ -421,8 +422,42 @@ void nf_unregister_net_hook(struct net *
+ nf_queue_nf_hook_drop(net);
+ nf_hook_entries_free(p);
+ }
++
++void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
++{
++ if (reg->pf == NFPROTO_INET) {
++ __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
++ __nf_unregister_net_hook(net, NFPROTO_IPV6, reg);
++ } else {
++ __nf_unregister_net_hook(net, reg->pf, reg);
++ }
++}
+ EXPORT_SYMBOL(nf_unregister_net_hook);
+
++int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
++{
++ int err;
++
++ if (reg->pf == NFPROTO_INET) {
++ err = __nf_register_net_hook(net, NFPROTO_IPV4, reg);
++ if (err < 0)
++ return err;
++
++ err = __nf_register_net_hook(net, NFPROTO_IPV6, reg);
++ if (err < 0) {
++ __nf_unregister_net_hook(net, NFPROTO_IPV4, reg);
++ return err;
++ }
++ } else {
++ err = __nf_register_net_hook(net, reg->pf, reg);
++ if (err < 0)
++ return err;
++ }
++
++ return 0;
++}
++EXPORT_SYMBOL(nf_register_net_hook);
++
+ int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
+ unsigned int n)
+ {
--- /dev/null
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sun, 10 Dec 2017 01:43:14 +0100
+Subject: [PATCH] netfilter: nf_tables: explicit nft_set_pktinfo() call from
+ hook path
+
+Instead of calling this function from the family specific variant, this
+reduces the code size in the fast path for the netdev, bridge and inet
+families. After this change, we must call nft_set_pktinfo() upfront from
+the chain hook indirection.
+
+Before:
+
+ text data bss dec hex filename
+ 2145 208 0 2353 931 net/netfilter/nf_tables_netdev.o
+
+After:
+
+ text data bss dec hex filename
+ 2125 208 0 2333 91d net/netfilter/nf_tables_netdev.o
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -54,8 +54,8 @@ static inline void nft_set_pktinfo(struc
+ pkt->xt.state = state;
+ }
+
+-static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt,
+- struct sk_buff *skb)
++static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
++ struct sk_buff *skb)
+ {
+ pkt->tprot_set = false;
+ pkt->tprot = 0;
+@@ -63,14 +63,6 @@ static inline void nft_set_pktinfo_proto
+ pkt->xt.fragoff = 0;
+ }
+
+-static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
+- struct sk_buff *skb,
+- const struct nf_hook_state *state)
+-{
+- nft_set_pktinfo(pkt, skb, state);
+- nft_set_pktinfo_proto_unspec(pkt, skb);
+-}
+-
+ /**
+ * struct nft_verdict - nf_tables verdict
+ *
+--- a/include/net/netfilter/nf_tables_ipv4.h
++++ b/include/net/netfilter/nf_tables_ipv4.h
+@@ -5,15 +5,11 @@
+ #include <net/netfilter/nf_tables.h>
+ #include <net/ip.h>
+
+-static inline void
+-nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
+- struct sk_buff *skb,
+- const struct nf_hook_state *state)
++static inline void nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
++ struct sk_buff *skb)
+ {
+ struct iphdr *ip;
+
+- nft_set_pktinfo(pkt, skb, state);
+-
+ ip = ip_hdr(pkt->skb);
+ pkt->tprot_set = true;
+ pkt->tprot = ip->protocol;
+@@ -21,10 +17,8 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo
+ pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
+ }
+
+-static inline int
+-__nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
+- struct sk_buff *skb,
+- const struct nf_hook_state *state)
++static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
++ struct sk_buff *skb)
+ {
+ struct iphdr *iph, _iph;
+ u32 len, thoff;
+@@ -52,14 +46,11 @@ __nft_set_pktinfo_ipv4_validate(struct n
+ return 0;
+ }
+
+-static inline void
+-nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
+- struct sk_buff *skb,
+- const struct nf_hook_state *state)
++static inline void nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
++ struct sk_buff *skb)
+ {
+- nft_set_pktinfo(pkt, skb, state);
+- if (__nft_set_pktinfo_ipv4_validate(pkt, skb, state) < 0)
+- nft_set_pktinfo_proto_unspec(pkt, skb);
++ if (__nft_set_pktinfo_ipv4_validate(pkt, skb) < 0)
++ nft_set_pktinfo_unspec(pkt, skb);
+ }
+
+ extern struct nft_af_info nft_af_ipv4;
+--- a/include/net/netfilter/nf_tables_ipv6.h
++++ b/include/net/netfilter/nf_tables_ipv6.h
+@@ -5,20 +5,16 @@
+ #include <linux/netfilter_ipv6/ip6_tables.h>
+ #include <net/ipv6.h>
+
+-static inline void
+-nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
+- struct sk_buff *skb,
+- const struct nf_hook_state *state)
++static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
++ struct sk_buff *skb)
+ {
+ unsigned int flags = IP6_FH_F_AUTH;
+ int protohdr, thoff = 0;
+ unsigned short frag_off;
+
+- nft_set_pktinfo(pkt, skb, state);
+-
+ protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
+ if (protohdr < 0) {
+- nft_set_pktinfo_proto_unspec(pkt, skb);
++ nft_set_pktinfo_unspec(pkt, skb);
+ return;
+ }
+
+@@ -28,10 +24,8 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo
+ pkt->xt.fragoff = frag_off;
+ }
+
+-static inline int
+-__nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
+- struct sk_buff *skb,
+- const struct nf_hook_state *state)
++static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
++ struct sk_buff *skb)
+ {
+ #if IS_ENABLED(CONFIG_IPV6)
+ unsigned int flags = IP6_FH_F_AUTH;
+@@ -68,14 +62,11 @@ __nft_set_pktinfo_ipv6_validate(struct n
+ #endif
+ }
+
+-static inline void
+-nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
+- struct sk_buff *skb,
+- const struct nf_hook_state *state)
++static inline void nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
++ struct sk_buff *skb)
+ {
+- nft_set_pktinfo(pkt, skb, state);
+- if (__nft_set_pktinfo_ipv6_validate(pkt, skb, state) < 0)
+- nft_set_pktinfo_proto_unspec(pkt, skb);
++ if (__nft_set_pktinfo_ipv6_validate(pkt, skb) < 0)
++ nft_set_pktinfo_unspec(pkt, skb);
+ }
+
+ extern struct nft_af_info nft_af_ipv6;
+--- a/net/bridge/netfilter/nf_tables_bridge.c
++++ b/net/bridge/netfilter/nf_tables_bridge.c
+@@ -25,15 +25,17 @@ nft_do_chain_bridge(void *priv,
+ {
+ struct nft_pktinfo pkt;
+
++ nft_set_pktinfo(&pkt, skb, state);
++
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+- nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
++ nft_set_pktinfo_ipv4_validate(&pkt, skb);
+ break;
+ case htons(ETH_P_IPV6):
+- nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
++ nft_set_pktinfo_ipv6_validate(&pkt, skb);
+ break;
+ default:
+- nft_set_pktinfo_unspec(&pkt, skb, state);
++ nft_set_pktinfo_unspec(&pkt, skb);
+ break;
+ }
+
+--- a/net/ipv4/netfilter/nf_tables_arp.c
++++ b/net/ipv4/netfilter/nf_tables_arp.c
+@@ -21,7 +21,8 @@ nft_do_chain_arp(void *priv,
+ {
+ struct nft_pktinfo pkt;
+
+- nft_set_pktinfo_unspec(&pkt, skb, state);
++ nft_set_pktinfo(&pkt, skb, state);
++ nft_set_pktinfo_unspec(&pkt, skb);
+
+ return nft_do_chain(&pkt, priv);
+ }
+--- a/net/ipv4/netfilter/nf_tables_ipv4.c
++++ b/net/ipv4/netfilter/nf_tables_ipv4.c
+@@ -24,7 +24,8 @@ static unsigned int nft_do_chain_ipv4(vo
+ {
+ struct nft_pktinfo pkt;
+
+- nft_set_pktinfo_ipv4(&pkt, skb, state);
++ nft_set_pktinfo(&pkt, skb, state);
++ nft_set_pktinfo_ipv4(&pkt, skb);
+
+ return nft_do_chain(&pkt, priv);
+ }
+--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
++++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+@@ -33,7 +33,8 @@ static unsigned int nft_nat_do_chain(voi
+ {
+ struct nft_pktinfo pkt;
+
+- nft_set_pktinfo_ipv4(&pkt, skb, state);
++ nft_set_pktinfo(&pkt, skb, state);
++ nft_set_pktinfo_ipv4(&pkt, skb);
+
+ return nft_do_chain(&pkt, priv);
+ }
+--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
++++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
+@@ -38,7 +38,8 @@ static unsigned int nf_route_table_hook(
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+- nft_set_pktinfo_ipv4(&pkt, skb, state);
++ nft_set_pktinfo(&pkt, skb, state);
++ nft_set_pktinfo_ipv4(&pkt, skb);
+
+ mark = skb->mark;
+ iph = ip_hdr(skb);
+--- a/net/ipv6/netfilter/nf_tables_ipv6.c
++++ b/net/ipv6/netfilter/nf_tables_ipv6.c
+@@ -22,7 +22,8 @@ static unsigned int nft_do_chain_ipv6(vo
+ {
+ struct nft_pktinfo pkt;
+
+- nft_set_pktinfo_ipv6(&pkt, skb, state);
++ nft_set_pktinfo(&pkt, skb, state);
++ nft_set_pktinfo_ipv6(&pkt, skb);
+
+ return nft_do_chain(&pkt, priv);
+ }
+--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
++++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+@@ -31,7 +31,8 @@ static unsigned int nft_nat_do_chain(voi
+ {
+ struct nft_pktinfo pkt;
+
+- nft_set_pktinfo_ipv6(&pkt, skb, state);
++ nft_set_pktinfo(&pkt, skb, state);
++ nft_set_pktinfo_ipv6(&pkt, skb);
+
+ return nft_do_chain(&pkt, priv);
+ }
+--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
++++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
+@@ -33,7 +33,8 @@ static unsigned int nf_route_table_hook(
+ u32 mark, flowlabel;
+ int err;
+
+- nft_set_pktinfo_ipv6(&pkt, skb, state);
++ nft_set_pktinfo(&pkt, skb, state);
++ nft_set_pktinfo_ipv6(&pkt, skb);
+
+ /* save source/dest address, mark, hoplimit, flowlabel, priority */
+ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+--- a/net/netfilter/nf_tables_netdev.c
++++ b/net/netfilter/nf_tables_netdev.c
+@@ -21,15 +21,17 @@ nft_do_chain_netdev(void *priv, struct s
+ {
+ struct nft_pktinfo pkt;
+
++ nft_set_pktinfo(&pkt, skb, state);
++
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+- nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
++ nft_set_pktinfo_ipv4_validate(&pkt, skb);
+ break;
+ case htons(ETH_P_IPV6):
+- nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
++ nft_set_pktinfo_ipv6_validate(&pkt, skb);
+ break;
+ default:
+- nft_set_pktinfo_unspec(&pkt, skb, state);
++ nft_set_pktinfo_unspec(&pkt, skb);
+ break;
+ }
+
--- /dev/null
+From: Florian Westphal <fw@strlen.de>
+Date: Fri, 8 Dec 2017 17:01:54 +0100
+Subject: [PATCH] netfilter: core: only allow one nat hook per hook point
+
+The netfilter NAT core cannot deal with more than one NAT hook per hook
+location (prerouting, input ...), because the NAT hooks install a NAT null
+binding in case the iptables nat table (iptable_nat hooks) or the
+corresponding nftables chain (nft nat hooks) doesn't specify a nat
+transformation.
+
+Null bindings are needed to detect port collsisions between NAT-ed and
+non-NAT-ed connections.
+
+This causes nftables NAT rules to not work when iptable_nat module is
+loaded, and vice versa because nat binding has already been attached
+when the second nat hook is consulted.
+
+The netfilter core is not really the correct location to handle this
+(hooks are just hooks, the core has no notion of what kinds of side
+ effects a hook implements), but its the only place where we can check
+for conflicts between both iptables hooks and nftables hooks without
+adding dependencies.
+
+So add nat annotation to hook_ops to describe those hooks that will
+add NAT bindings and then make core reject if such a hook already exists.
+The annotation fills a padding hole, in case further restrictions appar
+we might change this to a 'u8 type' instead of bool.
+
+iptables error if nft nat hook active:
+iptables -t nat -A POSTROUTING -j MASQUERADE
+iptables v1.4.21: can't initialize iptables table `nat': File exists
+Perhaps iptables or your kernel needs to be upgraded.
+
+nftables error if iptables nat table present:
+nft -f /etc/nftables/ipv4-nat
+/usr/etc/nftables/ipv4-nat:3:1-2: Error: Could not process rule: File exists
+table nat {
+^^
+
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -67,6 +67,7 @@ struct nf_hook_ops {
+ struct net_device *dev;
+ void *priv;
+ u_int8_t pf;
++ bool nat_hook;
+ unsigned int hooknum;
+ /* Hooks are ordered in ascending priority. */
+ int priority;
+--- a/net/ipv4/netfilter/iptable_nat.c
++++ b/net/ipv4/netfilter/iptable_nat.c
+@@ -72,6 +72,7 @@ static const struct nf_hook_ops nf_nat_i
+ {
+ .hook = iptable_nat_ipv4_in,
+ .pf = NFPROTO_IPV4,
++ .nat_hook = true,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_NAT_DST,
+ },
+@@ -79,6 +80,7 @@ static const struct nf_hook_ops nf_nat_i
+ {
+ .hook = iptable_nat_ipv4_out,
+ .pf = NFPROTO_IPV4,
++ .nat_hook = true,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_NAT_SRC,
+ },
+@@ -86,6 +88,7 @@ static const struct nf_hook_ops nf_nat_i
+ {
+ .hook = iptable_nat_ipv4_local_fn,
+ .pf = NFPROTO_IPV4,
++ .nat_hook = true,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_NAT_DST,
+ },
+@@ -93,6 +96,7 @@ static const struct nf_hook_ops nf_nat_i
+ {
+ .hook = iptable_nat_ipv4_fn,
+ .pf = NFPROTO_IPV4,
++ .nat_hook = true,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_NAT_SRC,
+ },
+--- a/net/ipv6/netfilter/ip6table_nat.c
++++ b/net/ipv6/netfilter/ip6table_nat.c
+@@ -74,6 +74,7 @@ static const struct nf_hook_ops nf_nat_i
+ {
+ .hook = ip6table_nat_in,
+ .pf = NFPROTO_IPV6,
++ .nat_hook = true,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP6_PRI_NAT_DST,
+ },
+@@ -81,6 +82,7 @@ static const struct nf_hook_ops nf_nat_i
+ {
+ .hook = ip6table_nat_out,
+ .pf = NFPROTO_IPV6,
++ .nat_hook = true,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP6_PRI_NAT_SRC,
+ },
+@@ -88,12 +90,14 @@ static const struct nf_hook_ops nf_nat_i
+ {
+ .hook = ip6table_nat_local_fn,
+ .pf = NFPROTO_IPV6,
++ .nat_hook = true,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP6_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = ip6table_nat_fn,
++ .nat_hook = true,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP6_PRI_NAT_SRC,
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -160,6 +160,12 @@ nf_hook_entries_grow(const struct nf_hoo
+ ++i;
+ continue;
+ }
++
++ if (reg->nat_hook && orig_ops[i]->nat_hook) {
++ kvfree(new);
++ return ERR_PTR(-EEXIST);
++ }
++
+ if (inserted || reg->priority > orig_ops[i]->priority) {
+ new_ops[nhooks] = (void *)orig_ops[i];
+ new->hooks[nhooks] = old->hooks[i];
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -1431,6 +1431,8 @@ static int nf_tables_addchain(struct nft
+ ops->hook = hookfn;
+ if (afi->hook_ops_init)
+ afi->hook_ops_init(ops, i);
++ if (basechain->type->type == NFT_CHAIN_T_NAT)
++ ops->nat_hook = true;
+ }
+
+ chain->flags |= NFT_BASE_CHAIN;
--- /dev/null
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sat, 9 Dec 2017 15:36:24 +0100
+Subject: [PATCH] netfilter: nf_tables_inet: don't use multihook infrastructure
+ anymore
+
+Use new native NFPROTO_INET support in netfilter core, this gets rid of
+ad-hoc code in the nf_tables API codebase.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/net/netfilter/nf_tables_ipv4.h
++++ b/include/net/netfilter/nf_tables_ipv4.h
+@@ -53,6 +53,4 @@ static inline void nft_set_pktinfo_ipv4_
+ nft_set_pktinfo_unspec(pkt, skb);
+ }
+
+-extern struct nft_af_info nft_af_ipv4;
+-
+ #endif
+--- a/include/net/netfilter/nf_tables_ipv6.h
++++ b/include/net/netfilter/nf_tables_ipv6.h
+@@ -69,6 +69,4 @@ static inline void nft_set_pktinfo_ipv6_
+ nft_set_pktinfo_unspec(pkt, skb);
+ }
+
+-extern struct nft_af_info nft_af_ipv6;
+-
+ #endif
+--- a/net/ipv4/netfilter/nf_tables_ipv4.c
++++ b/net/ipv4/netfilter/nf_tables_ipv4.c
+@@ -45,7 +45,7 @@ static unsigned int nft_ipv4_output(void
+ return nft_do_chain_ipv4(priv, skb, state);
+ }
+
+-struct nft_af_info nft_af_ipv4 __read_mostly = {
++static struct nft_af_info nft_af_ipv4 __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+@@ -58,7 +58,6 @@ struct nft_af_info nft_af_ipv4 __read_mo
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
+ },
+ };
+-EXPORT_SYMBOL_GPL(nft_af_ipv4);
+
+ static int nf_tables_ipv4_init_net(struct net *net)
+ {
+--- a/net/ipv6/netfilter/nf_tables_ipv6.c
++++ b/net/ipv6/netfilter/nf_tables_ipv6.c
+@@ -42,7 +42,7 @@ static unsigned int nft_ipv6_output(void
+ return nft_do_chain_ipv6(priv, skb, state);
+ }
+
+-struct nft_af_info nft_af_ipv6 __read_mostly = {
++static struct nft_af_info nft_af_ipv6 __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+@@ -55,7 +55,6 @@ struct nft_af_info nft_af_ipv6 __read_mo
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
+ },
+ };
+-EXPORT_SYMBOL_GPL(nft_af_ipv6);
+
+ static int nf_tables_ipv6_init_net(struct net *net)
+ {
+--- a/net/netfilter/nf_tables_inet.c
++++ b/net/netfilter/nf_tables_inet.c
+@@ -9,6 +9,7 @@
+ #include <linux/init.h>
+ #include <linux/module.h>
+ #include <linux/ip.h>
++#include <linux/ipv6.h>
+ #include <linux/netfilter_ipv4.h>
+ #include <linux/netfilter_ipv6.h>
+ #include <net/netfilter/nf_tables.h>
+@@ -16,26 +17,71 @@
+ #include <net/netfilter/nf_tables_ipv6.h>
+ #include <net/ip.h>
+
+-static void nft_inet_hook_ops_init(struct nf_hook_ops *ops, unsigned int n)
++static unsigned int nft_do_chain_inet(void *priv, struct sk_buff *skb,
++ const struct nf_hook_state *state)
+ {
+- struct nft_af_info *afi;
++ struct nft_pktinfo pkt;
+
+- if (n == 1)
+- afi = &nft_af_ipv4;
+- else
+- afi = &nft_af_ipv6;
+-
+- ops->pf = afi->family;
+- if (afi->hooks[ops->hooknum])
+- ops->hook = afi->hooks[ops->hooknum];
++ nft_set_pktinfo(&pkt, skb, state);
++
++ switch (state->pf) {
++ case NFPROTO_IPV4:
++ nft_set_pktinfo_ipv4(&pkt, skb);
++ break;
++ case NFPROTO_IPV6:
++ nft_set_pktinfo_ipv6(&pkt, skb);
++ break;
++ default:
++ break;
++ }
++
++ return nft_do_chain(&pkt, priv);
++}
++
++static unsigned int nft_inet_output(void *priv, struct sk_buff *skb,
++ const struct nf_hook_state *state)
++{
++ struct nft_pktinfo pkt;
++
++ nft_set_pktinfo(&pkt, skb, state);
++
++ switch (state->pf) {
++ case NFPROTO_IPV4:
++ if (unlikely(skb->len < sizeof(struct iphdr) ||
++ ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
++ if (net_ratelimit())
++ pr_info("ignoring short SOCK_RAW packet\n");
++ return NF_ACCEPT;
++ }
++ nft_set_pktinfo_ipv4(&pkt, skb);
++ break;
++ case NFPROTO_IPV6:
++ if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
++ if (net_ratelimit())
++ pr_info("ignoring short SOCK_RAW packet\n");
++ return NF_ACCEPT;
++ }
++ nft_set_pktinfo_ipv6(&pkt, skb);
++ break;
++ default:
++ break;
++ }
++
++ return nft_do_chain(&pkt, priv);
+ }
+
+ static struct nft_af_info nft_af_inet __read_mostly = {
+ .family = NFPROTO_INET,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+- .nops = 2,
+- .hook_ops_init = nft_inet_hook_ops_init,
++ .nops = 1,
++ .hooks = {
++ [NF_INET_LOCAL_IN] = nft_do_chain_inet,
++ [NF_INET_LOCAL_OUT] = nft_inet_output,
++ [NF_INET_FORWARD] = nft_do_chain_inet,
++ [NF_INET_PRE_ROUTING] = nft_do_chain_inet,
++ [NF_INET_POST_ROUTING] = nft_do_chain_inet,
++ },
+ };
+
+ static int __net_init nf_tables_inet_init_net(struct net *net)
--- /dev/null
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sat, 9 Dec 2017 15:40:25 +0100
+Subject: [PATCH] netfilter: nf_tables: remove multihook chains and families
+
+Since NFPROTO_INET is handled from the core, we don't need to maintain
+extra infrastructure in nf_tables to handle the double hook
+registration, one for IPv4 and another for IPv6.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -897,8 +897,6 @@ struct nft_stats {
+ struct u64_stats_sync syncp;
+ };
+
+-#define NFT_HOOK_OPS_MAX 2
+-
+ /**
+ * struct nft_base_chain - nf_tables base chain
+ *
+@@ -910,7 +908,7 @@ struct nft_stats {
+ * @dev_name: device name that this base chain is attached to (if any)
+ */
+ struct nft_base_chain {
+- struct nf_hook_ops ops[NFT_HOOK_OPS_MAX];
++ struct nf_hook_ops ops;
+ const struct nf_chain_type *type;
+ u8 policy;
+ u8 flags;
+@@ -971,8 +969,6 @@ enum nft_af_flags {
+ * @owner: module owner
+ * @tables: used internally
+ * @flags: family flags
+- * @nops: number of hook ops in this family
+- * @hook_ops_init: initialization function for chain hook ops
+ * @hooks: hookfn overrides for packet validation
+ */
+ struct nft_af_info {
+@@ -982,9 +978,6 @@ struct nft_af_info {
+ struct module *owner;
+ struct list_head tables;
+ u32 flags;
+- unsigned int nops;
+- void (*hook_ops_init)(struct nf_hook_ops *,
+- unsigned int);
+ nf_hookfn *hooks[NF_MAX_HOOKS];
+ };
+
+--- a/net/bridge/netfilter/nf_tables_bridge.c
++++ b/net/bridge/netfilter/nf_tables_bridge.c
+@@ -46,7 +46,6 @@ static struct nft_af_info nft_af_bridge
+ .family = NFPROTO_BRIDGE,
+ .nhooks = NF_BR_NUMHOOKS,
+ .owner = THIS_MODULE,
+- .nops = 1,
+ .hooks = {
+ [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
+ [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
+--- a/net/ipv4/netfilter/nf_tables_arp.c
++++ b/net/ipv4/netfilter/nf_tables_arp.c
+@@ -31,7 +31,6 @@ static struct nft_af_info nft_af_arp __r
+ .family = NFPROTO_ARP,
+ .nhooks = NF_ARP_NUMHOOKS,
+ .owner = THIS_MODULE,
+- .nops = 1,
+ .hooks = {
+ [NF_ARP_IN] = nft_do_chain_arp,
+ [NF_ARP_OUT] = nft_do_chain_arp,
+--- a/net/ipv4/netfilter/nf_tables_ipv4.c
++++ b/net/ipv4/netfilter/nf_tables_ipv4.c
+@@ -49,7 +49,6 @@ static struct nft_af_info nft_af_ipv4 __
+ .family = NFPROTO_IPV4,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+- .nops = 1,
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
+ [NF_INET_LOCAL_OUT] = nft_ipv4_output,
+--- a/net/ipv6/netfilter/nf_tables_ipv6.c
++++ b/net/ipv6/netfilter/nf_tables_ipv6.c
+@@ -46,7 +46,6 @@ static struct nft_af_info nft_af_ipv6 __
+ .family = NFPROTO_IPV6,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+- .nops = 1,
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
+ [NF_INET_LOCAL_OUT] = nft_ipv6_output,
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -139,29 +139,26 @@ static void nft_trans_destroy(struct nft
+ kfree(trans);
+ }
+
+-static int nf_tables_register_hooks(struct net *net,
+- const struct nft_table *table,
+- struct nft_chain *chain,
+- unsigned int hook_nops)
++static int nf_tables_register_hook(struct net *net,
++ const struct nft_table *table,
++ struct nft_chain *chain)
+ {
+ if (table->flags & NFT_TABLE_F_DORMANT ||
+ !nft_is_base_chain(chain))
+ return 0;
+
+- return nf_register_net_hooks(net, nft_base_chain(chain)->ops,
+- hook_nops);
++ return nf_register_net_hook(net, &nft_base_chain(chain)->ops);
+ }
+
+-static void nf_tables_unregister_hooks(struct net *net,
+- const struct nft_table *table,
+- struct nft_chain *chain,
+- unsigned int hook_nops)
++static void nf_tables_unregister_hook(struct net *net,
++ const struct nft_table *table,
++ struct nft_chain *chain)
+ {
+ if (table->flags & NFT_TABLE_F_DORMANT ||
+ !nft_is_base_chain(chain))
+ return;
+
+- nf_unregister_net_hooks(net, nft_base_chain(chain)->ops, hook_nops);
++ nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
+ }
+
+ static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
+@@ -624,8 +621,7 @@ static void _nf_tables_table_disable(str
+ if (cnt && i++ == cnt)
+ break;
+
+- nf_unregister_net_hooks(net, nft_base_chain(chain)->ops,
+- afi->nops);
++ nf_unregister_net_hook(net, &nft_base_chain(chain)->ops);
+ }
+ }
+
+@@ -642,8 +638,7 @@ static int nf_tables_table_enable(struct
+ if (!nft_is_base_chain(chain))
+ continue;
+
+- err = nf_register_net_hooks(net, nft_base_chain(chain)->ops,
+- afi->nops);
++ err = nf_register_net_hook(net, &nft_base_chain(chain)->ops);
+ if (err < 0)
+ goto err;
+
+@@ -1055,7 +1050,7 @@ static int nf_tables_fill_chain_info(str
+
+ if (nft_is_base_chain(chain)) {
+ const struct nft_base_chain *basechain = nft_base_chain(chain);
+- const struct nf_hook_ops *ops = &basechain->ops[0];
++ const struct nf_hook_ops *ops = &basechain->ops;
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, NFTA_CHAIN_HOOK);
+@@ -1283,8 +1278,8 @@ static void nf_tables_chain_destroy(stru
+ free_percpu(basechain->stats);
+ if (basechain->stats)
+ static_branch_dec(&nft_counters_enabled);
+- if (basechain->ops[0].dev != NULL)
+- dev_put(basechain->ops[0].dev);
++ if (basechain->ops.dev != NULL)
++ dev_put(basechain->ops.dev);
+ kfree(chain->name);
+ kfree(basechain);
+ } else {
+@@ -1380,7 +1375,6 @@ static int nf_tables_addchain(struct nft
+ struct nft_stats __percpu *stats;
+ struct net *net = ctx->net;
+ struct nft_chain *chain;
+- unsigned int i;
+ int err;
+
+ if (table->use == UINT_MAX)
+@@ -1419,21 +1413,18 @@ static int nf_tables_addchain(struct nft
+ basechain->type = hook.type;
+ chain = &basechain->chain;
+
+- for (i = 0; i < afi->nops; i++) {
+- ops = &basechain->ops[i];
+- ops->pf = family;
+- ops->hooknum = hook.num;
+- ops->priority = hook.priority;
+- ops->priv = chain;
+- ops->hook = afi->hooks[ops->hooknum];
+- ops->dev = hook.dev;
+- if (hookfn)
+- ops->hook = hookfn;
+- if (afi->hook_ops_init)
+- afi->hook_ops_init(ops, i);
+- if (basechain->type->type == NFT_CHAIN_T_NAT)
+- ops->nat_hook = true;
+- }
++ ops = &basechain->ops;
++ ops->pf = family;
++ ops->hooknum = hook.num;
++ ops->priority = hook.priority;
++ ops->priv = chain;
++ ops->hook = afi->hooks[ops->hooknum];
++ ops->dev = hook.dev;
++ if (hookfn)
++ ops->hook = hookfn;
++
++ if (basechain->type->type == NFT_CHAIN_T_NAT)
++ ops->nat_hook = true;
+
+ chain->flags |= NFT_BASE_CHAIN;
+ basechain->policy = policy;
+@@ -1451,7 +1442,7 @@ static int nf_tables_addchain(struct nft
+ goto err1;
+ }
+
+- err = nf_tables_register_hooks(net, table, chain, afi->nops);
++ err = nf_tables_register_hook(net, table, chain);
+ if (err < 0)
+ goto err1;
+
+@@ -1465,7 +1456,7 @@ static int nf_tables_addchain(struct nft
+
+ return 0;
+ err2:
+- nf_tables_unregister_hooks(net, table, chain, afi->nops);
++ nf_tables_unregister_hook(net, table, chain);
+ err1:
+ nf_tables_chain_destroy(chain);
+
+@@ -1478,13 +1469,12 @@ static int nf_tables_updchain(struct nft
+ const struct nlattr * const *nla = ctx->nla;
+ struct nft_table *table = ctx->table;
+ struct nft_chain *chain = ctx->chain;
+- struct nft_af_info *afi = ctx->afi;
+ struct nft_base_chain *basechain;
+ struct nft_stats *stats = NULL;
+ struct nft_chain_hook hook;
+ struct nf_hook_ops *ops;
+ struct nft_trans *trans;
+- int err, i;
++ int err;
+
+ if (nla[NFTA_CHAIN_HOOK]) {
+ if (!nft_is_base_chain(chain))
+@@ -1501,14 +1491,12 @@ static int nf_tables_updchain(struct nft
+ return -EBUSY;
+ }
+
+- for (i = 0; i < afi->nops; i++) {
+- ops = &basechain->ops[i];
+- if (ops->hooknum != hook.num ||
+- ops->priority != hook.priority ||
+- ops->dev != hook.dev) {
+- nft_chain_release_hook(&hook);
+- return -EBUSY;
+- }
++ ops = &basechain->ops;
++ if (ops->hooknum != hook.num ||
++ ops->priority != hook.priority ||
++ ops->dev != hook.dev) {
++ nft_chain_release_hook(&hook);
++ return -EBUSY;
+ }
+ nft_chain_release_hook(&hook);
+ }
+@@ -5135,10 +5123,9 @@ static int nf_tables_commit(struct net *
+ case NFT_MSG_DELCHAIN:
+ list_del_rcu(&trans->ctx.chain->list);
+ nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
+- nf_tables_unregister_hooks(trans->ctx.net,
+- trans->ctx.table,
+- trans->ctx.chain,
+- trans->ctx.afi->nops);
++ nf_tables_unregister_hook(trans->ctx.net,
++ trans->ctx.table,
++ trans->ctx.chain);
+ break;
+ case NFT_MSG_NEWRULE:
+ nft_clear(trans->ctx.net, nft_trans_rule(trans));
+@@ -5275,10 +5262,9 @@ static int nf_tables_abort(struct net *n
+ } else {
+ trans->ctx.table->use--;
+ list_del_rcu(&trans->ctx.chain->list);
+- nf_tables_unregister_hooks(trans->ctx.net,
+- trans->ctx.table,
+- trans->ctx.chain,
+- trans->ctx.afi->nops);
++ nf_tables_unregister_hook(trans->ctx.net,
++ trans->ctx.table,
++ trans->ctx.chain);
+ }
+ break;
+ case NFT_MSG_DELCHAIN:
+@@ -5381,7 +5367,7 @@ int nft_chain_validate_hooks(const struc
+ if (nft_is_base_chain(chain)) {
+ basechain = nft_base_chain(chain);
+
+- if ((1 << basechain->ops[0].hooknum) & hook_flags)
++ if ((1 << basechain->ops.hooknum) & hook_flags)
+ return 0;
+
+ return -EOPNOTSUPP;
+@@ -5863,8 +5849,7 @@ int __nft_release_basechain(struct nft_c
+
+ BUG_ON(!nft_is_base_chain(ctx->chain));
+
+- nf_tables_unregister_hooks(ctx->net, ctx->chain->table, ctx->chain,
+- ctx->afi->nops);
++ nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
+ list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
+ list_del(&rule->list);
+ ctx->chain->use--;
+@@ -5893,8 +5878,7 @@ static void __nft_release_afinfo(struct
+
+ list_for_each_entry_safe(table, nt, &afi->tables, list) {
+ list_for_each_entry(chain, &table->chains, list)
+- nf_tables_unregister_hooks(net, table, chain,
+- afi->nops);
++ nf_tables_unregister_hook(net, table, chain);
+ /* No packets are walking on these chains anymore. */
+ ctx.table = table;
+ list_for_each_entry(chain, &table->chains, list) {
+--- a/net/netfilter/nf_tables_inet.c
++++ b/net/netfilter/nf_tables_inet.c
+@@ -74,7 +74,6 @@ static struct nft_af_info nft_af_inet __
+ .family = NFPROTO_INET,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+- .nops = 1,
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_inet,
+ [NF_INET_LOCAL_OUT] = nft_inet_output,
+--- a/net/netfilter/nf_tables_netdev.c
++++ b/net/netfilter/nf_tables_netdev.c
+@@ -43,7 +43,6 @@ static struct nft_af_info nft_af_netdev
+ .nhooks = NF_NETDEV_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .flags = NFT_AF_NEEDS_DEV,
+- .nops = 1,
+ .hooks = {
+ [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
+ },
+@@ -98,7 +97,7 @@ static void nft_netdev_event(unsigned lo
+ __nft_release_basechain(ctx);
+ break;
+ case NETDEV_CHANGENAME:
+- if (dev->ifindex != basechain->ops[0].dev->ifindex)
++ if (dev->ifindex != basechain->ops.dev->ifindex)
+ return;
+
+ strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
+--- a/net/netfilter/nft_compat.c
++++ b/net/netfilter/nft_compat.c
+@@ -186,7 +186,7 @@ nft_target_set_tgchk_param(struct xt_tgc
+ if (nft_is_base_chain(ctx->chain)) {
+ const struct nft_base_chain *basechain =
+ nft_base_chain(ctx->chain);
+- const struct nf_hook_ops *ops = &basechain->ops[0];
++ const struct nf_hook_ops *ops = &basechain->ops;
+
+ par->hook_mask = 1 << ops->hooknum;
+ } else {
+@@ -317,7 +317,7 @@ static int nft_target_validate(const str
+ if (nft_is_base_chain(ctx->chain)) {
+ const struct nft_base_chain *basechain =
+ nft_base_chain(ctx->chain);
+- const struct nf_hook_ops *ops = &basechain->ops[0];
++ const struct nf_hook_ops *ops = &basechain->ops;
+
+ hook_mask = 1 << ops->hooknum;
+ if (target->hooks && !(hook_mask & target->hooks))
+@@ -414,7 +414,7 @@ nft_match_set_mtchk_param(struct xt_mtch
+ if (nft_is_base_chain(ctx->chain)) {
+ const struct nft_base_chain *basechain =
+ nft_base_chain(ctx->chain);
+- const struct nf_hook_ops *ops = &basechain->ops[0];
++ const struct nf_hook_ops *ops = &basechain->ops;
+
+ par->hook_mask = 1 << ops->hooknum;
+ } else {
+@@ -564,7 +564,7 @@ static int nft_match_validate(const stru
+ if (nft_is_base_chain(ctx->chain)) {
+ const struct nft_base_chain *basechain =
+ nft_base_chain(ctx->chain);
+- const struct nf_hook_ops *ops = &basechain->ops[0];
++ const struct nf_hook_ops *ops = &basechain->ops;
+
+ hook_mask = 1 << ops->hooknum;
+ if (match->hooks && !(hook_mask & match->hooks))
--- /dev/null
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Mon, 27 Nov 2017 21:55:14 +0100
+Subject: [PATCH] netfilter: move checksum indirection to struct nf_ipv6_ops
+
+We cannot make a direct call to nf_ip6_checksum() because that would
+result in autoloading the 'ipv6' module because of symbol dependencies.
+Therefore, define checksum indirection in nf_ipv6_ops where this really
+belongs to.
+
+For IPv4, we can indeed make a direct function call, which is faster,
+given IPv4 is built-in in the networking code by default. Still,
+CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline
+stub for IPv4 in such case.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ create mode 100644 net/netfilter/utils.c
+
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -311,8 +311,6 @@ struct nf_queue_entry;
+
+ struct nf_afinfo {
+ unsigned short family;
+- __sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
+- unsigned int dataoff, u_int8_t protocol);
+ __sum16 (*checksum_partial)(struct sk_buff *skb,
+ unsigned int hook,
+ unsigned int dataoff,
+@@ -333,20 +331,9 @@ static inline const struct nf_afinfo *nf
+ return rcu_dereference(nf_afinfo[family]);
+ }
+
+-static inline __sum16
+-nf_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff,
+- u_int8_t protocol, unsigned short family)
+-{
+- const struct nf_afinfo *afinfo;
+- __sum16 csum = 0;
+-
+- rcu_read_lock();
+- afinfo = nf_get_afinfo(family);
+- if (afinfo)
+- csum = afinfo->checksum(skb, hook, dataoff, protocol);
+- rcu_read_unlock();
+- return csum;
+-}
++__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
++ unsigned int dataoff, u_int8_t protocol,
++ unsigned short family);
+
+ static inline __sum16
+ nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
+--- a/include/linux/netfilter_ipv4.h
++++ b/include/linux/netfilter_ipv4.h
+@@ -7,6 +7,16 @@
+ #include <uapi/linux/netfilter_ipv4.h>
+
+ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
++
++#ifdef CONFIG_INET
+ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol);
++#else
++static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
++ unsigned int dataoff, u_int8_t protocol)
++{
++ return 0;
++}
++#endif /* CONFIG_INET */
++
+ #endif /*__LINUX_IP_NETFILTER_H*/
+--- a/include/linux/netfilter_ipv6.h
++++ b/include/linux/netfilter_ipv6.h
+@@ -19,6 +19,8 @@ struct nf_ipv6_ops {
+ void (*route_input)(struct sk_buff *skb);
+ int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct net *, struct sock *, struct sk_buff *));
++ __sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
++ unsigned int dataoff, u_int8_t protocol);
+ };
+
+ #ifdef CONFIG_NETFILTER
+--- a/net/bridge/netfilter/nf_tables_bridge.c
++++ b/net/bridge/netfilter/nf_tables_bridge.c
+@@ -106,12 +106,6 @@ static int nf_br_reroute(struct net *net
+ return 0;
+ }
+
+-static __sum16 nf_br_checksum(struct sk_buff *skb, unsigned int hook,
+- unsigned int dataoff, u_int8_t protocol)
+-{
+- return 0;
+-}
+-
+ static __sum16 nf_br_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol)
+@@ -127,7 +121,6 @@ static int nf_br_route(struct net *net,
+
+ static const struct nf_afinfo nf_br_afinfo = {
+ .family = AF_BRIDGE,
+- .checksum = nf_br_checksum,
+ .checksum_partial = nf_br_checksum_partial,
+ .route = nf_br_route,
+ .saveroute = nf_br_saveroute,
+--- a/net/ipv4/netfilter.c
++++ b/net/ipv4/netfilter.c
+@@ -188,7 +188,6 @@ static int nf_ip_route(struct net *net,
+
+ static const struct nf_afinfo nf_ip_afinfo = {
+ .family = AF_INET,
+- .checksum = nf_ip_checksum,
+ .checksum_partial = nf_ip_checksum_partial,
+ .route = nf_ip_route,
+ .saveroute = nf_ip_saveroute,
+--- a/net/ipv6/netfilter.c
++++ b/net/ipv6/netfilter.c
+@@ -193,12 +193,12 @@ static __sum16 nf_ip6_checksum_partial(s
+ static const struct nf_ipv6_ops ipv6ops = {
+ .chk_addr = ipv6_chk_addr,
+ .route_input = ip6_route_input,
+- .fragment = ip6_fragment
++ .fragment = ip6_fragment,
++ .checksum = nf_ip6_checksum,
+ };
+
+ static const struct nf_afinfo nf_ip6_afinfo = {
+ .family = AF_INET6,
+- .checksum = nf_ip6_checksum,
+ .checksum_partial = nf_ip6_checksum_partial,
+ .route = nf_ip6_route,
+ .saveroute = nf_ip6_saveroute,
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -1,5 +1,5 @@
+ # SPDX-License-Identifier: GPL-2.0
+-netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o
++netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o utils.o
+
+ nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o
+ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
+--- /dev/null
++++ b/net/netfilter/utils.c
+@@ -0,0 +1,26 @@
++#include <linux/kernel.h>
++#include <linux/netfilter.h>
++#include <linux/netfilter_ipv4.h>
++#include <linux/netfilter_ipv6.h>
++
++__sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
++ unsigned int dataoff, u_int8_t protocol,
++ unsigned short family)
++{
++ const struct nf_ipv6_ops *v6ops;
++ __sum16 csum = 0;
++
++ switch (family) {
++ case AF_INET:
++ csum = nf_ip_checksum(skb, hook, dataoff, protocol);
++ break;
++ case AF_INET6:
++ v6ops = rcu_dereference(nf_ipv6_ops);
++ if (v6ops)
++ csum = v6ops->checksum(skb, hook, dataoff, protocol);
++ break;
++ }
++
++ return csum;
++}
++EXPORT_SYMBOL_GPL(nf_checksum);
--- /dev/null
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Wed, 20 Dec 2017 16:04:18 +0100
+Subject: [PATCH] netfilter: move checksum_partial indirection to struct
+ nf_ipv6_ops
+
+We cannot make a direct call to nf_ip6_checksum_partial() because that
+would result in autoloading the 'ipv6' module because of symbol
+dependencies. Therefore, define checksum_partial indirection in
+nf_ipv6_ops where this really belongs to.
+
+For IPv4, we can indeed make a direct function call, which is faster,
+given IPv4 is built-in in the networking code by default. Still,
+CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline
+stub for IPv4 in such case.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -311,11 +311,6 @@ struct nf_queue_entry;
+
+ struct nf_afinfo {
+ unsigned short family;
+- __sum16 (*checksum_partial)(struct sk_buff *skb,
+- unsigned int hook,
+- unsigned int dataoff,
+- unsigned int len,
+- u_int8_t protocol);
+ int (*route)(struct net *net, struct dst_entry **dst,
+ struct flowi *fl, bool strict);
+ void (*saveroute)(const struct sk_buff *skb,
+@@ -335,22 +330,9 @@ __sum16 nf_checksum(struct sk_buff *skb,
+ unsigned int dataoff, u_int8_t protocol,
+ unsigned short family);
+
+-static inline __sum16
+-nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
+- unsigned int dataoff, unsigned int len,
+- u_int8_t protocol, unsigned short family)
+-{
+- const struct nf_afinfo *afinfo;
+- __sum16 csum = 0;
+-
+- rcu_read_lock();
+- afinfo = nf_get_afinfo(family);
+- if (afinfo)
+- csum = afinfo->checksum_partial(skb, hook, dataoff, len,
+- protocol);
+- rcu_read_unlock();
+- return csum;
+-}
++__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
++ unsigned int dataoff, unsigned int len,
++ u_int8_t protocol, unsigned short family);
+
+ int nf_register_afinfo(const struct nf_afinfo *afinfo);
+ void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
+--- a/include/linux/netfilter_ipv4.h
++++ b/include/linux/netfilter_ipv4.h
+@@ -11,12 +11,23 @@ int ip_route_me_harder(struct net *net,
+ #ifdef CONFIG_INET
+ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol);
++__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
++ unsigned int dataoff, unsigned int len,
++ u_int8_t protocol);
+ #else
+ static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol)
+ {
+ return 0;
+ }
++static inline __sum16 nf_ip_checksum_partial(struct sk_buff *skb,
++ unsigned int hook,
++ unsigned int dataoff,
++ unsigned int len,
++ u_int8_t protocol)
++{
++ return 0;
++}
+ #endif /* CONFIG_INET */
+
+ #endif /*__LINUX_IP_NETFILTER_H*/
+--- a/include/linux/netfilter_ipv6.h
++++ b/include/linux/netfilter_ipv6.h
+@@ -21,6 +21,9 @@ struct nf_ipv6_ops {
+ int (*output)(struct net *, struct sock *, struct sk_buff *));
+ __sum16 (*checksum)(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol);
++ __sum16 (*checksum_partial)(struct sk_buff *skb, unsigned int hook,
++ unsigned int dataoff, unsigned int len,
++ u_int8_t protocol);
+ };
+
+ #ifdef CONFIG_NETFILTER
+--- a/net/bridge/netfilter/nf_tables_bridge.c
++++ b/net/bridge/netfilter/nf_tables_bridge.c
+@@ -106,13 +106,6 @@ static int nf_br_reroute(struct net *net
+ return 0;
+ }
+
+-static __sum16 nf_br_checksum_partial(struct sk_buff *skb, unsigned int hook,
+- unsigned int dataoff, unsigned int len,
+- u_int8_t protocol)
+-{
+- return 0;
+-}
+-
+ static int nf_br_route(struct net *net, struct dst_entry **dst,
+ struct flowi *fl, bool strict __always_unused)
+ {
+@@ -121,7 +114,6 @@ static int nf_br_route(struct net *net,
+
+ static const struct nf_afinfo nf_br_afinfo = {
+ .family = AF_BRIDGE,
+- .checksum_partial = nf_br_checksum_partial,
+ .route = nf_br_route,
+ .saveroute = nf_br_saveroute,
+ .reroute = nf_br_reroute,
+--- a/net/ipv4/netfilter.c
++++ b/net/ipv4/netfilter.c
+@@ -155,9 +155,9 @@ __sum16 nf_ip_checksum(struct sk_buff *s
+ }
+ EXPORT_SYMBOL(nf_ip_checksum);
+
+-static __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
+- unsigned int dataoff, unsigned int len,
+- u_int8_t protocol)
++__sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
++ unsigned int dataoff, unsigned int len,
++ u_int8_t protocol)
+ {
+ const struct iphdr *iph = ip_hdr(skb);
+ __sum16 csum = 0;
+@@ -175,6 +175,7 @@ static __sum16 nf_ip_checksum_partial(st
+ }
+ return csum;
+ }
++EXPORT_SYMBOL_GPL(nf_ip_checksum_partial);
+
+ static int nf_ip_route(struct net *net, struct dst_entry **dst,
+ struct flowi *fl, bool strict __always_unused)
+@@ -188,7 +189,6 @@ static int nf_ip_route(struct net *net,
+
+ static const struct nf_afinfo nf_ip_afinfo = {
+ .family = AF_INET,
+- .checksum_partial = nf_ip_checksum_partial,
+ .route = nf_ip_route,
+ .saveroute = nf_ip_saveroute,
+ .reroute = nf_ip_reroute,
+--- a/net/ipv6/netfilter.c
++++ b/net/ipv6/netfilter.c
+@@ -191,15 +191,15 @@ static __sum16 nf_ip6_checksum_partial(s
+ };
+
+ static const struct nf_ipv6_ops ipv6ops = {
+- .chk_addr = ipv6_chk_addr,
+- .route_input = ip6_route_input,
+- .fragment = ip6_fragment,
+- .checksum = nf_ip6_checksum,
++ .chk_addr = ipv6_chk_addr,
++ .route_input = ip6_route_input,
++ .fragment = ip6_fragment,
++ .checksum = nf_ip6_checksum,
++ .checksum_partial = nf_ip6_checksum_partial,
+ };
+
+ static const struct nf_afinfo nf_ip6_afinfo = {
+ .family = AF_INET6,
+- .checksum_partial = nf_ip6_checksum_partial,
+ .route = nf_ip6_route,
+ .saveroute = nf_ip6_saveroute,
+ .reroute = nf_ip6_reroute,
+--- a/net/netfilter/utils.c
++++ b/net/netfilter/utils.c
+@@ -24,3 +24,27 @@ __sum16 nf_checksum(struct sk_buff *skb,
+ return csum;
+ }
+ EXPORT_SYMBOL_GPL(nf_checksum);
++
++__sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
++ unsigned int dataoff, unsigned int len,
++ u_int8_t protocol, unsigned short family)
++{
++ const struct nf_ipv6_ops *v6ops;
++ __sum16 csum = 0;
++
++ switch (family) {
++ case AF_INET:
++ csum = nf_ip_checksum_partial(skb, hook, dataoff, len,
++ protocol);
++ break;
++ case AF_INET6:
++ v6ops = rcu_dereference(nf_ipv6_ops);
++ if (v6ops)
++ csum = v6ops->checksum_partial(skb, hook, dataoff, len,
++ protocol);
++ break;
++ }
++
++ return csum;
++}
++EXPORT_SYMBOL_GPL(nf_checksum_partial);
--- /dev/null
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Wed, 20 Dec 2017 16:12:55 +0100
+Subject: [PATCH] netfilter: remove saveroute indirection in struct nf_afinfo
+
+This is only used by nf_queue.c and this function comes with no symbol
+dependencies with IPv6, it just refers to structure layouts. Therefore,
+we can replace it by a direct function call from where it belongs.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -313,8 +313,6 @@ struct nf_afinfo {
+ unsigned short family;
+ int (*route)(struct net *net, struct dst_entry **dst,
+ struct flowi *fl, bool strict);
+- void (*saveroute)(const struct sk_buff *skb,
+- struct nf_queue_entry *entry);
+ int (*reroute)(struct net *net, struct sk_buff *skb,
+ const struct nf_queue_entry *entry);
+ int route_key_size;
+--- a/include/linux/netfilter_ipv4.h
++++ b/include/linux/netfilter_ipv4.h
+@@ -6,6 +6,16 @@
+
+ #include <uapi/linux/netfilter_ipv4.h>
+
++/* Extra routing may needed on local out, as the QUEUE target never returns
++ * control to the table.
++ */
++struct ip_rt_info {
++ __be32 daddr;
++ __be32 saddr;
++ u_int8_t tos;
++ u_int32_t mark;
++};
++
+ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
+
+ #ifdef CONFIG_INET
+--- a/include/linux/netfilter_ipv6.h
++++ b/include/linux/netfilter_ipv6.h
+@@ -9,6 +9,15 @@
+
+ #include <uapi/linux/netfilter_ipv6.h>
+
++/* Extra routing may needed on local out, as the QUEUE target never returns
++ * control to the table.
++ */
++struct ip6_rt_info {
++ struct in6_addr daddr;
++ struct in6_addr saddr;
++ u_int32_t mark;
++};
++
+ /*
+ * Hook functions for ipv6 to allow xt_* modules to be built-in even
+ * if IPv6 is a module.
+--- a/net/bridge/netfilter/nf_tables_bridge.c
++++ b/net/bridge/netfilter/nf_tables_bridge.c
+@@ -95,11 +95,6 @@ static const struct nf_chain_type filter
+ (1 << NF_BR_POST_ROUTING),
+ };
+
+-static void nf_br_saveroute(const struct sk_buff *skb,
+- struct nf_queue_entry *entry)
+-{
+-}
+-
+ static int nf_br_reroute(struct net *net, struct sk_buff *skb,
+ const struct nf_queue_entry *entry)
+ {
+@@ -115,7 +110,6 @@ static int nf_br_route(struct net *net,
+ static const struct nf_afinfo nf_br_afinfo = {
+ .family = AF_BRIDGE,
+ .route = nf_br_route,
+- .saveroute = nf_br_saveroute,
+ .reroute = nf_br_reroute,
+ .route_key_size = 0,
+ };
+--- a/net/ipv4/netfilter.c
++++ b/net/ipv4/netfilter.c
+@@ -80,33 +80,6 @@ int ip_route_me_harder(struct net *net,
+ }
+ EXPORT_SYMBOL(ip_route_me_harder);
+
+-/*
+- * Extra routing may needed on local out, as the QUEUE target never
+- * returns control to the table.
+- */
+-
+-struct ip_rt_info {
+- __be32 daddr;
+- __be32 saddr;
+- u_int8_t tos;
+- u_int32_t mark;
+-};
+-
+-static void nf_ip_saveroute(const struct sk_buff *skb,
+- struct nf_queue_entry *entry)
+-{
+- struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
+-
+- if (entry->state.hook == NF_INET_LOCAL_OUT) {
+- const struct iphdr *iph = ip_hdr(skb);
+-
+- rt_info->tos = iph->tos;
+- rt_info->daddr = iph->daddr;
+- rt_info->saddr = iph->saddr;
+- rt_info->mark = skb->mark;
+- }
+-}
+-
+ static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
+ const struct nf_queue_entry *entry)
+ {
+@@ -190,7 +163,6 @@ static int nf_ip_route(struct net *net,
+ static const struct nf_afinfo nf_ip_afinfo = {
+ .family = AF_INET,
+ .route = nf_ip_route,
+- .saveroute = nf_ip_saveroute,
+ .reroute = nf_ip_reroute,
+ .route_key_size = sizeof(struct ip_rt_info),
+ };
+--- a/net/ipv6/netfilter.c
++++ b/net/ipv6/netfilter.c
+@@ -69,31 +69,6 @@ int ip6_route_me_harder(struct net *net,
+ }
+ EXPORT_SYMBOL(ip6_route_me_harder);
+
+-/*
+- * Extra routing may needed on local out, as the QUEUE target never
+- * returns control to the table.
+- */
+-
+-struct ip6_rt_info {
+- struct in6_addr daddr;
+- struct in6_addr saddr;
+- u_int32_t mark;
+-};
+-
+-static void nf_ip6_saveroute(const struct sk_buff *skb,
+- struct nf_queue_entry *entry)
+-{
+- struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
+-
+- if (entry->state.hook == NF_INET_LOCAL_OUT) {
+- const struct ipv6hdr *iph = ipv6_hdr(skb);
+-
+- rt_info->daddr = iph->daddr;
+- rt_info->saddr = iph->saddr;
+- rt_info->mark = skb->mark;
+- }
+-}
+-
+ static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
+ const struct nf_queue_entry *entry)
+ {
+@@ -201,7 +176,6 @@ static const struct nf_ipv6_ops ipv6ops
+ static const struct nf_afinfo nf_ip6_afinfo = {
+ .family = AF_INET6,
+ .route = nf_ip6_route,
+- .saveroute = nf_ip6_saveroute,
+ .reroute = nf_ip6_reroute,
+ .route_key_size = sizeof(struct ip6_rt_info),
+ };
+--- a/net/netfilter/nf_queue.c
++++ b/net/netfilter/nf_queue.c
+@@ -10,6 +10,8 @@
+ #include <linux/proc_fs.h>
+ #include <linux/skbuff.h>
+ #include <linux/netfilter.h>
++#include <linux/netfilter_ipv4.h>
++#include <linux/netfilter_ipv6.h>
+ #include <linux/netfilter_bridge.h>
+ #include <linux/seq_file.h>
+ #include <linux/rcupdate.h>
+@@ -108,6 +110,35 @@ void nf_queue_nf_hook_drop(struct net *n
+ }
+ EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop);
+
++static void nf_ip_saveroute(const struct sk_buff *skb,
++ struct nf_queue_entry *entry)
++{
++ struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
++
++ if (entry->state.hook == NF_INET_LOCAL_OUT) {
++ const struct iphdr *iph = ip_hdr(skb);
++
++ rt_info->tos = iph->tos;
++ rt_info->daddr = iph->daddr;
++ rt_info->saddr = iph->saddr;
++ rt_info->mark = skb->mark;
++ }
++}
++
++static void nf_ip6_saveroute(const struct sk_buff *skb,
++ struct nf_queue_entry *entry)
++{
++ struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
++
++ if (entry->state.hook == NF_INET_LOCAL_OUT) {
++ const struct ipv6hdr *iph = ipv6_hdr(skb);
++
++ rt_info->daddr = iph->daddr;
++ rt_info->saddr = iph->saddr;
++ rt_info->mark = skb->mark;
++ }
++}
++
+ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
+ const struct nf_hook_entries *entries,
+ unsigned int index, unsigned int queuenum)
+@@ -144,7 +175,16 @@ static int __nf_queue(struct sk_buff *sk
+
+ nf_queue_entry_get_refs(entry);
+ skb_dst_force(skb);
+- afinfo->saveroute(skb, entry);
++
++ switch (entry->state.pf) {
++ case AF_INET:
++ nf_ip_saveroute(skb, entry);
++ break;
++ case AF_INET6:
++ nf_ip6_saveroute(skb, entry);
++ break;
++ }
++
+ status = qh->outfn(entry, queuenum);
+
+ if (status < 0) {
--- /dev/null
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Mon, 27 Nov 2017 22:29:52 +0100
+Subject: [PATCH] netfilter: move route indirection to struct nf_ipv6_ops
+
+We cannot make a direct call to nf_ip6_route() because that would result
+in autoloading the 'ipv6' module because of symbol dependencies.
+Therefore, define route indirection in nf_ipv6_ops where this really
+belongs to.
+
+For IPv4, we can indeed make a direct function call, which is faster,
+given IPv4 is built-in in the networking code by default. Still,
+CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline
+stub for IPv4 in such case.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -311,8 +311,6 @@ struct nf_queue_entry;
+
+ struct nf_afinfo {
+ unsigned short family;
+- int (*route)(struct net *net, struct dst_entry **dst,
+- struct flowi *fl, bool strict);
+ int (*reroute)(struct net *net, struct sk_buff *skb,
+ const struct nf_queue_entry *entry);
+ int route_key_size;
+@@ -331,6 +329,8 @@ __sum16 nf_checksum(struct sk_buff *skb,
+ __sum16 nf_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol, unsigned short family);
++int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
++ bool strict, unsigned short family);
+
+ int nf_register_afinfo(const struct nf_afinfo *afinfo);
+ void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
+--- a/include/linux/netfilter_ipv4.h
++++ b/include/linux/netfilter_ipv4.h
+@@ -24,6 +24,8 @@ __sum16 nf_ip_checksum(struct sk_buff *s
+ __sum16 nf_ip_checksum_partial(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol);
++int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
++ bool strict);
+ #else
+ static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol)
+@@ -38,6 +40,11 @@ static inline __sum16 nf_ip_checksum_par
+ {
+ return 0;
+ }
++static inline int nf_ip_route(struct net *net, struct dst_entry **dst,
++ struct flowi *fl, bool strict)
++{
++ return -EOPNOTSUPP;
++}
+ #endif /* CONFIG_INET */
+
+ #endif /*__LINUX_IP_NETFILTER_H*/
+--- a/include/linux/netfilter_ipv6.h
++++ b/include/linux/netfilter_ipv6.h
+@@ -33,6 +33,8 @@ struct nf_ipv6_ops {
+ __sum16 (*checksum_partial)(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, unsigned int len,
+ u_int8_t protocol);
++ int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl,
++ bool strict);
+ };
+
+ #ifdef CONFIG_NETFILTER
+--- a/net/bridge/netfilter/nf_tables_bridge.c
++++ b/net/bridge/netfilter/nf_tables_bridge.c
+@@ -101,15 +101,8 @@ static int nf_br_reroute(struct net *net
+ return 0;
+ }
+
+-static int nf_br_route(struct net *net, struct dst_entry **dst,
+- struct flowi *fl, bool strict __always_unused)
+-{
+- return 0;
+-}
+-
+ static const struct nf_afinfo nf_br_afinfo = {
+ .family = AF_BRIDGE,
+- .route = nf_br_route,
+ .reroute = nf_br_reroute,
+ .route_key_size = 0,
+ };
+--- a/net/ipv4/netfilter.c
++++ b/net/ipv4/netfilter.c
+@@ -150,8 +150,8 @@ __sum16 nf_ip_checksum_partial(struct sk
+ }
+ EXPORT_SYMBOL_GPL(nf_ip_checksum_partial);
+
+-static int nf_ip_route(struct net *net, struct dst_entry **dst,
+- struct flowi *fl, bool strict __always_unused)
++int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
++ bool strict __always_unused)
+ {
+ struct rtable *rt = ip_route_output_key(net, &fl->u.ip4);
+ if (IS_ERR(rt))
+@@ -159,10 +159,10 @@ static int nf_ip_route(struct net *net,
+ *dst = &rt->dst;
+ return 0;
+ }
++EXPORT_SYMBOL_GPL(nf_ip_route);
+
+ static const struct nf_afinfo nf_ip_afinfo = {
+ .family = AF_INET,
+- .route = nf_ip_route,
+ .reroute = nf_ip_reroute,
+ .route_key_size = sizeof(struct ip_rt_info),
+ };
+--- a/net/ipv6/netfilter.c
++++ b/net/ipv6/netfilter.c
+@@ -171,11 +171,11 @@ static const struct nf_ipv6_ops ipv6ops
+ .fragment = ip6_fragment,
+ .checksum = nf_ip6_checksum,
+ .checksum_partial = nf_ip6_checksum_partial,
++ .route = nf_ip6_route,
+ };
+
+ static const struct nf_afinfo nf_ip6_afinfo = {
+ .family = AF_INET6,
+- .route = nf_ip6_route,
+ .reroute = nf_ip6_reroute,
+ .route_key_size = sizeof(struct ip6_rt_info),
+ };
+--- a/net/ipv6/netfilter/nft_fib_ipv6.c
++++ b/net/ipv6/netfilter/nft_fib_ipv6.c
+@@ -60,7 +60,6 @@ static u32 __nft_fib6_eval_type(const st
+ {
+ const struct net_device *dev = NULL;
+ const struct nf_ipv6_ops *v6ops;
+- const struct nf_afinfo *afinfo;
+ int route_err, addrtype;
+ struct rt6_info *rt;
+ struct flowi6 fl6 = {
+@@ -69,8 +68,8 @@ static u32 __nft_fib6_eval_type(const st
+ };
+ u32 ret = 0;
+
+- afinfo = nf_get_afinfo(NFPROTO_IPV6);
+- if (!afinfo)
++ v6ops = nf_get_ipv6_ops();
++ if (!v6ops)
+ return RTN_UNREACHABLE;
+
+ if (priv->flags & NFTA_FIB_F_IIF)
+@@ -80,12 +79,11 @@ static u32 __nft_fib6_eval_type(const st
+
+ nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
+
+- v6ops = nf_get_ipv6_ops();
+- if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
++ if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
+ ret = RTN_LOCAL;
+
+- route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt,
+- flowi6_to_flowi(&fl6), false);
++ route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt,
++ flowi6_to_flowi(&fl6), false);
+ if (route_err)
+ goto err;
+
+--- a/net/netfilter/nf_conntrack_h323_main.c
++++ b/net/netfilter/nf_conntrack_h323_main.c
+@@ -24,6 +24,7 @@
+ #include <linux/skbuff.h>
+ #include <net/route.h>
+ #include <net/ip6_route.h>
++#include <linux/netfilter_ipv6.h>
+
+ #include <net/netfilter/nf_conntrack.h>
+ #include <net/netfilter/nf_conntrack_core.h>
+@@ -732,14 +733,8 @@ static int callforward_do_filter(struct
+ const union nf_inet_addr *dst,
+ u_int8_t family)
+ {
+- const struct nf_afinfo *afinfo;
+ int ret = 0;
+
+- /* rcu_read_lock()ed by nf_hook_thresh */
+- afinfo = nf_get_afinfo(family);
+- if (!afinfo)
+- return 0;
+-
+ switch (family) {
+ case AF_INET: {
+ struct flowi4 fl1, fl2;
+@@ -750,10 +745,10 @@ static int callforward_do_filter(struct
+
+ memset(&fl2, 0, sizeof(fl2));
+ fl2.daddr = dst->ip;
+- if (!afinfo->route(net, (struct dst_entry **)&rt1,
+- flowi4_to_flowi(&fl1), false)) {
+- if (!afinfo->route(net, (struct dst_entry **)&rt2,
+- flowi4_to_flowi(&fl2), false)) {
++ if (!nf_ip_route(net, (struct dst_entry **)&rt1,
++ flowi4_to_flowi(&fl1), false)) {
++ if (!nf_ip_route(net, (struct dst_entry **)&rt2,
++ flowi4_to_flowi(&fl2), false)) {
+ if (rt_nexthop(rt1, fl1.daddr) ==
+ rt_nexthop(rt2, fl2.daddr) &&
+ rt1->dst.dev == rt2->dst.dev)
+@@ -766,18 +761,23 @@ static int callforward_do_filter(struct
+ }
+ #if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)
+ case AF_INET6: {
+- struct flowi6 fl1, fl2;
++ const struct nf_ipv6_ops *v6ops;
+ struct rt6_info *rt1, *rt2;
++ struct flowi6 fl1, fl2;
++
++ v6ops = nf_get_ipv6_ops();
++ if (!v6ops)
++ return 0;
+
+ memset(&fl1, 0, sizeof(fl1));
+ fl1.daddr = src->in6;
+
+ memset(&fl2, 0, sizeof(fl2));
+ fl2.daddr = dst->in6;
+- if (!afinfo->route(net, (struct dst_entry **)&rt1,
+- flowi6_to_flowi(&fl1), false)) {
+- if (!afinfo->route(net, (struct dst_entry **)&rt2,
+- flowi6_to_flowi(&fl2), false)) {
++ if (!v6ops->route(net, (struct dst_entry **)&rt1,
++ flowi6_to_flowi(&fl1), false)) {
++ if (!v6ops->route(net, (struct dst_entry **)&rt2,
++ flowi6_to_flowi(&fl2), false)) {
+ if (ipv6_addr_equal(rt6_nexthop(rt1, &fl1.daddr),
+ rt6_nexthop(rt2, &fl2.daddr)) &&
+ rt1->dst.dev == rt2->dst.dev)
+--- a/net/netfilter/nft_rt.c
++++ b/net/netfilter/nft_rt.c
+@@ -27,7 +27,7 @@ static u16 get_tcpmss(const struct nft_p
+ {
+ u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst);
+ const struct sk_buff *skb = pkt->skb;
+- const struct nf_afinfo *ai;
++ struct dst_entry *dst = NULL;
+ struct flowi fl;
+
+ memset(&fl, 0, sizeof(fl));
+@@ -43,15 +43,10 @@ static u16 get_tcpmss(const struct nft_p
+ break;
+ }
+
+- ai = nf_get_afinfo(nft_pf(pkt));
+- if (ai) {
+- struct dst_entry *dst = NULL;
+-
+- ai->route(nft_net(pkt), &dst, &fl, false);
+- if (dst) {
+- mtu = min(mtu, dst_mtu(dst));
+- dst_release(dst);
+- }
++ nf_route(nft_net(pkt), &dst, &fl, false, nft_pf(pkt));
++ if (dst) {
++ mtu = min(mtu, dst_mtu(dst));
++ dst_release(dst);
+ }
+
+ if (mtu <= minlen || mtu > 0xffff)
+--- a/net/netfilter/utils.c
++++ b/net/netfilter/utils.c
+@@ -48,3 +48,24 @@ __sum16 nf_checksum_partial(struct sk_bu
+ return csum;
+ }
+ EXPORT_SYMBOL_GPL(nf_checksum_partial);
++
++int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
++ bool strict, unsigned short family)
++{
++ const struct nf_ipv6_ops *v6ops;
++ int ret = 0;
++
++ switch (family) {
++ case AF_INET:
++ ret = nf_ip_route(net, dst, fl, strict);
++ break;
++ case AF_INET6:
++ v6ops = rcu_dereference(nf_ipv6_ops);
++ if (v6ops)
++ ret = v6ops->route(net, dst, fl, strict);
++ break;
++ }
++
++ return ret;
++}
++EXPORT_SYMBOL_GPL(nf_route);
+--- a/net/netfilter/xt_TCPMSS.c
++++ b/net/netfilter/xt_TCPMSS.c
+@@ -48,7 +48,6 @@ static u_int32_t tcpmss_reverse_mtu(stru
+ unsigned int family)
+ {
+ struct flowi fl;
+- const struct nf_afinfo *ai;
+ struct rtable *rt = NULL;
+ u_int32_t mtu = ~0U;
+
+@@ -62,10 +61,8 @@ static u_int32_t tcpmss_reverse_mtu(stru
+ memset(fl6, 0, sizeof(*fl6));
+ fl6->daddr = ipv6_hdr(skb)->saddr;
+ }
+- ai = nf_get_afinfo(family);
+- if (ai != NULL)
+- ai->route(net, (struct dst_entry **)&rt, &fl, false);
+
++ nf_route(net, (struct dst_entry **)&rt, &fl, false, family);
+ if (rt != NULL) {
+ mtu = dst_mtu(&rt->dst);
+ dst_release(&rt->dst);
+--- a/net/netfilter/xt_addrtype.c
++++ b/net/netfilter/xt_addrtype.c
+@@ -36,7 +36,7 @@ MODULE_ALIAS("ip6t_addrtype");
+ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
+ const struct in6_addr *addr, u16 mask)
+ {
+- const struct nf_afinfo *afinfo;
++ const struct nf_ipv6_ops *v6ops;
+ struct flowi6 flow;
+ struct rt6_info *rt;
+ u32 ret = 0;
+@@ -47,17 +47,14 @@ static u32 match_lookup_rt6(struct net *
+ if (dev)
+ flow.flowi6_oif = dev->ifindex;
+
+- afinfo = nf_get_afinfo(NFPROTO_IPV6);
+- if (afinfo != NULL) {
+- const struct nf_ipv6_ops *v6ops;
+-
++ v6ops = nf_get_ipv6_ops();
++ if (v6ops) {
+ if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
+- v6ops = nf_get_ipv6_ops();
+- if (v6ops && v6ops->chk_addr(net, addr, dev, true))
++ if (v6ops->chk_addr(net, addr, dev, true))
+ ret = XT_ADDRTYPE_LOCAL;
+ }
+- route_err = afinfo->route(net, (struct dst_entry **)&rt,
+- flowi6_to_flowi(&flow), false);
++ route_err = v6ops->route(net, (struct dst_entry **)&rt,
++ flowi6_to_flowi(&flow), false);
+ } else {
+ route_err = 1;
+ }
--- /dev/null
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Mon, 27 Nov 2017 22:50:26 +0100
+Subject: [PATCH] netfilter: move reroute indirection to struct nf_ipv6_ops
+
+We cannot make a direct call to nf_ip6_reroute() because that would result
+in autoloading the 'ipv6' module because of symbol dependencies.
+Therefore, define reroute indirection in nf_ipv6_ops where this really
+belongs to.
+
+For IPv4, we can indeed make a direct function call, which is faster,
+given IPv4 is built-in in the networking code by default. Still,
+CONFIG_INET=n and CONFIG_NETFILTER=y is possible, so define empty inline
+stub for IPv4 in such case.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -311,8 +311,6 @@ struct nf_queue_entry;
+
+ struct nf_afinfo {
+ unsigned short family;
+- int (*reroute)(struct net *net, struct sk_buff *skb,
+- const struct nf_queue_entry *entry);
+ int route_key_size;
+ };
+
+@@ -331,6 +329,7 @@ __sum16 nf_checksum_partial(struct sk_bu
+ u_int8_t protocol, unsigned short family);
+ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
+ bool strict, unsigned short family);
++int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry);
+
+ int nf_register_afinfo(const struct nf_afinfo *afinfo);
+ void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
+--- a/include/linux/netfilter_ipv4.h
++++ b/include/linux/netfilter_ipv4.h
+@@ -18,6 +18,8 @@ struct ip_rt_info {
+
+ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type);
+
++struct nf_queue_entry;
++
+ #ifdef CONFIG_INET
+ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol);
+@@ -26,6 +28,7 @@ __sum16 nf_ip_checksum_partial(struct sk
+ u_int8_t protocol);
+ int nf_ip_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
+ bool strict);
++int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry);
+ #else
+ static inline __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol)
+@@ -45,6 +48,11 @@ static inline int nf_ip_route(struct net
+ {
+ return -EOPNOTSUPP;
+ }
++static inline int nf_ip_reroute(struct sk_buff *skb,
++ const struct nf_queue_entry *entry)
++{
++ return -EOPNOTSUPP;
++}
+ #endif /* CONFIG_INET */
+
+ #endif /*__LINUX_IP_NETFILTER_H*/
+--- a/include/linux/netfilter_ipv6.h
++++ b/include/linux/netfilter_ipv6.h
+@@ -18,6 +18,8 @@ struct ip6_rt_info {
+ u_int32_t mark;
+ };
+
++struct nf_queue_entry;
++
+ /*
+ * Hook functions for ipv6 to allow xt_* modules to be built-in even
+ * if IPv6 is a module.
+@@ -35,6 +37,7 @@ struct nf_ipv6_ops {
+ u_int8_t protocol);
+ int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl,
+ bool strict);
++ int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry);
+ };
+
+ #ifdef CONFIG_NETFILTER
+--- a/net/bridge/netfilter/nf_tables_bridge.c
++++ b/net/bridge/netfilter/nf_tables_bridge.c
+@@ -95,15 +95,8 @@ static const struct nf_chain_type filter
+ (1 << NF_BR_POST_ROUTING),
+ };
+
+-static int nf_br_reroute(struct net *net, struct sk_buff *skb,
+- const struct nf_queue_entry *entry)
+-{
+- return 0;
+-}
+-
+ static const struct nf_afinfo nf_br_afinfo = {
+ .family = AF_BRIDGE,
+- .reroute = nf_br_reroute,
+ .route_key_size = 0,
+ };
+
+--- a/net/ipv4/netfilter.c
++++ b/net/ipv4/netfilter.c
+@@ -80,8 +80,7 @@ int ip_route_me_harder(struct net *net,
+ }
+ EXPORT_SYMBOL(ip_route_me_harder);
+
+-static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
+- const struct nf_queue_entry *entry)
++int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
+ {
+ const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+@@ -92,10 +91,12 @@ static int nf_ip_reroute(struct net *net
+ skb->mark == rt_info->mark &&
+ iph->daddr == rt_info->daddr &&
+ iph->saddr == rt_info->saddr))
+- return ip_route_me_harder(net, skb, RTN_UNSPEC);
++ return ip_route_me_harder(entry->state.net, skb,
++ RTN_UNSPEC);
+ }
+ return 0;
+ }
++EXPORT_SYMBOL_GPL(nf_ip_reroute);
+
+ __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol)
+@@ -163,7 +164,6 @@ EXPORT_SYMBOL_GPL(nf_ip_route);
+
+ static const struct nf_afinfo nf_ip_afinfo = {
+ .family = AF_INET,
+- .reroute = nf_ip_reroute,
+ .route_key_size = sizeof(struct ip_rt_info),
+ };
+
+--- a/net/ipv6/netfilter.c
++++ b/net/ipv6/netfilter.c
+@@ -69,7 +69,7 @@ int ip6_route_me_harder(struct net *net,
+ }
+ EXPORT_SYMBOL(ip6_route_me_harder);
+
+-static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
++static int nf_ip6_reroute(struct sk_buff *skb,
+ const struct nf_queue_entry *entry)
+ {
+ struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
+@@ -79,7 +79,7 @@ static int nf_ip6_reroute(struct net *ne
+ if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
+ !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
+ skb->mark != rt_info->mark)
+- return ip6_route_me_harder(net, skb);
++ return ip6_route_me_harder(entry->state.net, skb);
+ }
+ return 0;
+ }
+@@ -172,11 +172,11 @@ static const struct nf_ipv6_ops ipv6ops
+ .checksum = nf_ip6_checksum,
+ .checksum_partial = nf_ip6_checksum_partial,
+ .route = nf_ip6_route,
++ .reroute = nf_ip6_reroute,
+ };
+
+ static const struct nf_afinfo nf_ip6_afinfo = {
+ .family = AF_INET6,
+- .reroute = nf_ip6_reroute,
+ .route_key_size = sizeof(struct ip6_rt_info),
+ };
+
+--- a/net/netfilter/nf_queue.c
++++ b/net/netfilter/nf_queue.c
+@@ -266,7 +266,6 @@ void nf_reinject(struct nf_queue_entry *
+ const struct nf_hook_entry *hook_entry;
+ const struct nf_hook_entries *hooks;
+ struct sk_buff *skb = entry->skb;
+- const struct nf_afinfo *afinfo;
+ const struct net *net;
+ unsigned int i;
+ int err;
+@@ -293,8 +292,7 @@ void nf_reinject(struct nf_queue_entry *
+ verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state);
+
+ if (verdict == NF_ACCEPT) {
+- afinfo = nf_get_afinfo(entry->state.pf);
+- if (!afinfo || afinfo->reroute(entry->state.net, skb, entry) < 0)
++ if (nf_reroute(skb, entry) < 0)
+ verdict = NF_DROP;
+ }
+
+--- a/net/netfilter/utils.c
++++ b/net/netfilter/utils.c
+@@ -2,6 +2,7 @@
+ #include <linux/netfilter.h>
+ #include <linux/netfilter_ipv4.h>
+ #include <linux/netfilter_ipv6.h>
++#include <net/netfilter/nf_queue.h>
+
+ __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol,
+@@ -69,3 +70,21 @@ int nf_route(struct net *net, struct dst
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(nf_route);
++
++int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
++{
++ const struct nf_ipv6_ops *v6ops;
++ int ret = 0;
++
++ switch (entry->state.pf) {
++ case AF_INET:
++ ret = nf_ip_reroute(skb, entry);
++ break;
++ case AF_INET6:
++ v6ops = rcu_dereference(nf_ipv6_ops);
++ if (v6ops)
++ ret = v6ops->reroute(skb, entry);
++ break;
++ }
++ return ret;
++}
--- /dev/null
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Mon, 27 Nov 2017 22:58:37 +0100
+Subject: [PATCH] netfilter: remove route_key_size field in struct nf_afinfo
+
+This is only needed by nf_queue, place this code where it belongs.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -311,7 +311,6 @@ struct nf_queue_entry;
+
+ struct nf_afinfo {
+ unsigned short family;
+- int route_key_size;
+ };
+
+ extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO];
+--- a/net/ipv4/netfilter.c
++++ b/net/ipv4/netfilter.c
+@@ -164,7 +164,6 @@ EXPORT_SYMBOL_GPL(nf_ip_route);
+
+ static const struct nf_afinfo nf_ip_afinfo = {
+ .family = AF_INET,
+- .route_key_size = sizeof(struct ip_rt_info),
+ };
+
+ static int __init ipv4_netfilter_init(void)
+--- a/net/ipv6/netfilter.c
++++ b/net/ipv6/netfilter.c
+@@ -177,7 +177,6 @@ static const struct nf_ipv6_ops ipv6ops
+
+ static const struct nf_afinfo nf_ip6_afinfo = {
+ .family = AF_INET6,
+- .route_key_size = sizeof(struct ip6_rt_info),
+ };
+
+ int __init ipv6_netfilter_init(void)
+--- a/net/netfilter/nf_queue.c
++++ b/net/netfilter/nf_queue.c
+@@ -15,6 +15,8 @@
+ #include <linux/netfilter_bridge.h>
+ #include <linux/seq_file.h>
+ #include <linux/rcupdate.h>
++#include <linux/netfilter_ipv4.h>
++#include <linux/netfilter_ipv6.h>
+ #include <net/protocol.h>
+ #include <net/netfilter/nf_queue.h>
+ #include <net/dst.h>
+@@ -145,9 +147,9 @@ static int __nf_queue(struct sk_buff *sk
+ {
+ int status = -ENOENT;
+ struct nf_queue_entry *entry = NULL;
+- const struct nf_afinfo *afinfo;
+ const struct nf_queue_handler *qh;
+ struct net *net = state->net;
++ unsigned int route_key_size;
+
+ /* QUEUE == DROP if no one is waiting, to be safe. */
+ qh = rcu_dereference(net->nf.queue_handler);
+@@ -156,11 +158,19 @@ static int __nf_queue(struct sk_buff *sk
+ goto err;
+ }
+
+- afinfo = nf_get_afinfo(state->pf);
+- if (!afinfo)
+- goto err;
++ switch (state->pf) {
++ case AF_INET:
++ route_key_size = sizeof(struct ip_rt_info);
++ break;
++ case AF_INET6:
++ route_key_size = sizeof(struct ip6_rt_info);
++ break;
++ default:
++ route_key_size = 0;
++ break;
++ }
+
+- entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
++ entry = kmalloc(sizeof(*entry) + route_key_size, GFP_ATOMIC);
+ if (!entry) {
+ status = -ENOMEM;
+ goto err;
+@@ -170,7 +180,7 @@ static int __nf_queue(struct sk_buff *sk
+ .skb = skb,
+ .state = *state,
+ .hook_index = index,
+- .size = sizeof(*entry) + afinfo->route_key_size,
++ .size = sizeof(*entry) + route_key_size,
+ };
+
+ nf_queue_entry_get_refs(entry);
--- /dev/null
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sat, 9 Dec 2017 17:05:53 +0100
+Subject: [PATCH] netfilter: remove struct nf_afinfo and its helper functions
+
+This abstraction has no clients anymore, remove it.
+
+This is what remains from previous authors, so correct copyright
+statement after recent modifications and code removal.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/linux/netfilter.h
++++ b/include/linux/netfilter.h
+@@ -309,16 +309,6 @@ int skb_make_writable(struct sk_buff *sk
+ struct flowi;
+ struct nf_queue_entry;
+
+-struct nf_afinfo {
+- unsigned short family;
+-};
+-
+-extern const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO];
+-static inline const struct nf_afinfo *nf_get_afinfo(unsigned short family)
+-{
+- return rcu_dereference(nf_afinfo[family]);
+-}
+-
+ __sum16 nf_checksum(struct sk_buff *skb, unsigned int hook,
+ unsigned int dataoff, u_int8_t protocol,
+ unsigned short family);
+@@ -330,9 +320,6 @@ int nf_route(struct net *net, struct dst
+ bool strict, unsigned short family);
+ int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry);
+
+-int nf_register_afinfo(const struct nf_afinfo *afinfo);
+-void nf_unregister_afinfo(const struct nf_afinfo *afinfo);
+-
+ #include <net/flow.h>
+ extern void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
+
+--- a/net/bridge/netfilter/nf_tables_bridge.c
++++ b/net/bridge/netfilter/nf_tables_bridge.c
+@@ -95,30 +95,23 @@ static const struct nf_chain_type filter
+ (1 << NF_BR_POST_ROUTING),
+ };
+
+-static const struct nf_afinfo nf_br_afinfo = {
+- .family = AF_BRIDGE,
+- .route_key_size = 0,
+-};
+-
+ static int __init nf_tables_bridge_init(void)
+ {
+ int ret;
+
+- nf_register_afinfo(&nf_br_afinfo);
+ ret = nft_register_chain_type(&filter_bridge);
+ if (ret < 0)
+- goto err1;
++ return ret;
+
+ ret = register_pernet_subsys(&nf_tables_bridge_net_ops);
+ if (ret < 0)
+- goto err2;
++ goto err_register_subsys;
+
+ return ret;
+
+-err2:
++err_register_subsys:
+ nft_unregister_chain_type(&filter_bridge);
+-err1:
+- nf_unregister_afinfo(&nf_br_afinfo);
++
+ return ret;
+ }
+
+@@ -126,7 +119,6 @@ static void __exit nf_tables_bridge_exit
+ {
+ unregister_pernet_subsys(&nf_tables_bridge_net_ops);
+ nft_unregister_chain_type(&filter_bridge);
+- nf_unregister_afinfo(&nf_br_afinfo);
+ }
+
+ module_init(nf_tables_bridge_init);
+--- a/net/ipv4/netfilter.c
++++ b/net/ipv4/netfilter.c
+@@ -161,13 +161,3 @@ int nf_ip_route(struct net *net, struct
+ return 0;
+ }
+ EXPORT_SYMBOL_GPL(nf_ip_route);
+-
+-static const struct nf_afinfo nf_ip_afinfo = {
+- .family = AF_INET,
+-};
+-
+-static int __init ipv4_netfilter_init(void)
+-{
+- return nf_register_afinfo(&nf_ip_afinfo);
+-}
+-subsys_initcall(ipv4_netfilter_init);
+--- a/net/ipv6/netfilter.c
++++ b/net/ipv6/netfilter.c
+@@ -175,14 +175,10 @@ static const struct nf_ipv6_ops ipv6ops
+ .reroute = nf_ip6_reroute,
+ };
+
+-static const struct nf_afinfo nf_ip6_afinfo = {
+- .family = AF_INET6,
+-};
+-
+ int __init ipv6_netfilter_init(void)
+ {
+ RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
+- return nf_register_afinfo(&nf_ip6_afinfo);
++ return 0;
+ }
+
+ /* This can be called from inet6_init() on errors, so it cannot
+@@ -191,5 +187,4 @@ int __init ipv6_netfilter_init(void)
+ void ipv6_netfilter_fini(void)
+ {
+ RCU_INIT_POINTER(nf_ipv6_ops, NULL);
+- nf_unregister_afinfo(&nf_ip6_afinfo);
+ }
+--- a/net/netfilter/core.c
++++ b/net/netfilter/core.c
+@@ -4,8 +4,7 @@
+ * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
+ * way.
+ *
+- * Rusty Russell (C)2000 -- This code is GPL.
+- * Patrick McHardy (c) 2006-2012
++ * This code is GPL.
+ */
+ #include <linux/kernel.h>
+ #include <linux/netfilter.h>
+@@ -28,34 +27,12 @@
+
+ #include "nf_internals.h"
+
+-static DEFINE_MUTEX(afinfo_mutex);
+-
+-const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
+-EXPORT_SYMBOL(nf_afinfo);
+ const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
+ EXPORT_SYMBOL_GPL(nf_ipv6_ops);
+
+ DEFINE_PER_CPU(bool, nf_skb_duplicated);
+ EXPORT_SYMBOL_GPL(nf_skb_duplicated);
+
+-int nf_register_afinfo(const struct nf_afinfo *afinfo)
+-{
+- mutex_lock(&afinfo_mutex);
+- RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
+- mutex_unlock(&afinfo_mutex);
+- return 0;
+-}
+-EXPORT_SYMBOL_GPL(nf_register_afinfo);
+-
+-void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
+-{
+- mutex_lock(&afinfo_mutex);
+- RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL);
+- mutex_unlock(&afinfo_mutex);
+- synchronize_rcu();
+-}
+-EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
+-
+ #ifdef HAVE_JUMP_LABEL
+ struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+ EXPORT_SYMBOL(nf_hooks_needed);
--- /dev/null
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sun, 10 Dec 2017 01:42:58 +0100
+Subject: [PATCH] netfilter: nf_tables_arp: don't set forward chain
+
+46928a0b49f3 ("netfilter: nf_tables: remove multihook chains and
+families") already removed this, this is a leftover.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/net/ipv4/netfilter/nf_tables_arp.c
++++ b/net/ipv4/netfilter/nf_tables_arp.c
+@@ -34,7 +34,6 @@ static struct nft_af_info nft_af_arp __r
+ .hooks = {
+ [NF_ARP_IN] = nft_do_chain_arp,
+ [NF_ARP_OUT] = nft_do_chain_arp,
+- [NF_ARP_FORWARD] = nft_do_chain_arp,
+ },
+ };
+
--- /dev/null
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sat, 9 Dec 2017 15:43:17 +0100
+Subject: [PATCH] netfilter: nf_tables: remove hooks from family definition
+
+They don't belong to the family definition, move them to the filter
+chain type definition instead.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/include/net/netfilter/nf_tables.h
++++ b/include/net/netfilter/nf_tables.h
+@@ -875,7 +875,7 @@ enum nft_chain_type {
+ * @family: address family
+ * @owner: module owner
+ * @hook_mask: mask of valid hooks
+- * @hooks: hookfn overrides
++ * @hooks: array of hook functions
+ */
+ struct nf_chain_type {
+ const char *name;
+@@ -969,7 +969,6 @@ enum nft_af_flags {
+ * @owner: module owner
+ * @tables: used internally
+ * @flags: family flags
+- * @hooks: hookfn overrides for packet validation
+ */
+ struct nft_af_info {
+ struct list_head list;
+@@ -978,7 +977,6 @@ struct nft_af_info {
+ struct module *owner;
+ struct list_head tables;
+ u32 flags;
+- nf_hookfn *hooks[NF_MAX_HOOKS];
+ };
+
+ int nft_register_afinfo(struct net *, struct nft_af_info *);
+--- a/net/bridge/netfilter/nf_tables_bridge.c
++++ b/net/bridge/netfilter/nf_tables_bridge.c
+@@ -46,13 +46,6 @@ static struct nft_af_info nft_af_bridge
+ .family = NFPROTO_BRIDGE,
+ .nhooks = NF_BR_NUMHOOKS,
+ .owner = THIS_MODULE,
+- .hooks = {
+- [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
+- [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
+- [NF_BR_FORWARD] = nft_do_chain_bridge,
+- [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
+- [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
+- },
+ };
+
+ static int nf_tables_bridge_init_net(struct net *net)
+@@ -93,6 +86,13 @@ static const struct nf_chain_type filter
+ (1 << NF_BR_FORWARD) |
+ (1 << NF_BR_LOCAL_OUT) |
+ (1 << NF_BR_POST_ROUTING),
++ .hooks = {
++ [NF_BR_PRE_ROUTING] = nft_do_chain_bridge,
++ [NF_BR_LOCAL_IN] = nft_do_chain_bridge,
++ [NF_BR_FORWARD] = nft_do_chain_bridge,
++ [NF_BR_LOCAL_OUT] = nft_do_chain_bridge,
++ [NF_BR_POST_ROUTING] = nft_do_chain_bridge,
++ },
+ };
+
+ static int __init nf_tables_bridge_init(void)
+--- a/net/ipv4/netfilter/nf_tables_arp.c
++++ b/net/ipv4/netfilter/nf_tables_arp.c
+@@ -31,10 +31,6 @@ static struct nft_af_info nft_af_arp __r
+ .family = NFPROTO_ARP,
+ .nhooks = NF_ARP_NUMHOOKS,
+ .owner = THIS_MODULE,
+- .hooks = {
+- [NF_ARP_IN] = nft_do_chain_arp,
+- [NF_ARP_OUT] = nft_do_chain_arp,
+- },
+ };
+
+ static int nf_tables_arp_init_net(struct net *net)
+@@ -72,6 +68,10 @@ static const struct nf_chain_type filter
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_ARP_IN) |
+ (1 << NF_ARP_OUT),
++ .hooks = {
++ [NF_ARP_IN] = nft_do_chain_arp,
++ [NF_ARP_OUT] = nft_do_chain_arp,
++ },
+ };
+
+ static int __init nf_tables_arp_init(void)
+--- a/net/ipv4/netfilter/nf_tables_ipv4.c
++++ b/net/ipv4/netfilter/nf_tables_ipv4.c
+@@ -49,13 +49,6 @@ static struct nft_af_info nft_af_ipv4 __
+ .family = NFPROTO_IPV4,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+- .hooks = {
+- [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
+- [NF_INET_LOCAL_OUT] = nft_ipv4_output,
+- [NF_INET_FORWARD] = nft_do_chain_ipv4,
+- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
+- [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
+- },
+ };
+
+ static int nf_tables_ipv4_init_net(struct net *net)
+@@ -96,6 +89,13 @@ static const struct nf_chain_type filter
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
++ .hooks = {
++ [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
++ [NF_INET_LOCAL_OUT] = nft_ipv4_output,
++ [NF_INET_FORWARD] = nft_do_chain_ipv4,
++ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
++ [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
++ },
+ };
+
+ static int __init nf_tables_ipv4_init(void)
+--- a/net/ipv6/netfilter/nf_tables_ipv6.c
++++ b/net/ipv6/netfilter/nf_tables_ipv6.c
+@@ -46,13 +46,6 @@ static struct nft_af_info nft_af_ipv6 __
+ .family = NFPROTO_IPV6,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+- .hooks = {
+- [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
+- [NF_INET_LOCAL_OUT] = nft_ipv6_output,
+- [NF_INET_FORWARD] = nft_do_chain_ipv6,
+- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
+- [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
+- },
+ };
+
+ static int nf_tables_ipv6_init_net(struct net *net)
+@@ -93,6 +86,13 @@ static const struct nf_chain_type filter
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
++ .hooks = {
++ [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
++ [NF_INET_LOCAL_OUT] = nft_ipv6_output,
++ [NF_INET_FORWARD] = nft_do_chain_ipv6,
++ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
++ [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
++ },
+ };
+
+ static int __init nf_tables_ipv6_init(void)
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -1383,7 +1383,6 @@ static int nf_tables_addchain(struct nft
+ if (nla[NFTA_CHAIN_HOOK]) {
+ struct nft_chain_hook hook;
+ struct nf_hook_ops *ops;
+- nf_hookfn *hookfn;
+
+ err = nft_chain_parse_hook(net, nla, afi, &hook, create);
+ if (err < 0)
+@@ -1409,7 +1408,6 @@ static int nf_tables_addchain(struct nft
+ static_branch_inc(&nft_counters_enabled);
+ }
+
+- hookfn = hook.type->hooks[hook.num];
+ basechain->type = hook.type;
+ chain = &basechain->chain;
+
+@@ -1418,10 +1416,8 @@ static int nf_tables_addchain(struct nft
+ ops->hooknum = hook.num;
+ ops->priority = hook.priority;
+ ops->priv = chain;
+- ops->hook = afi->hooks[ops->hooknum];
++ ops->hook = hook.type->hooks[ops->hooknum];
+ ops->dev = hook.dev;
+- if (hookfn)
+- ops->hook = hookfn;
+
+ if (basechain->type->type == NFT_CHAIN_T_NAT)
+ ops->nat_hook = true;
+--- a/net/netfilter/nf_tables_inet.c
++++ b/net/netfilter/nf_tables_inet.c
+@@ -74,13 +74,6 @@ static struct nft_af_info nft_af_inet __
+ .family = NFPROTO_INET,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+- .hooks = {
+- [NF_INET_LOCAL_IN] = nft_do_chain_inet,
+- [NF_INET_LOCAL_OUT] = nft_inet_output,
+- [NF_INET_FORWARD] = nft_do_chain_inet,
+- [NF_INET_PRE_ROUTING] = nft_do_chain_inet,
+- [NF_INET_POST_ROUTING] = nft_do_chain_inet,
+- },
+ };
+
+ static int __net_init nf_tables_inet_init_net(struct net *net)
+@@ -121,6 +114,13 @@ static const struct nf_chain_type filter
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
++ .hooks = {
++ [NF_INET_LOCAL_IN] = nft_do_chain_inet,
++ [NF_INET_LOCAL_OUT] = nft_inet_output,
++ [NF_INET_FORWARD] = nft_do_chain_inet,
++ [NF_INET_PRE_ROUTING] = nft_do_chain_inet,
++ [NF_INET_POST_ROUTING] = nft_do_chain_inet,
++ },
+ };
+
+ static int __init nf_tables_inet_init(void)
+--- a/net/netfilter/nf_tables_netdev.c
++++ b/net/netfilter/nf_tables_netdev.c
+@@ -43,9 +43,6 @@ static struct nft_af_info nft_af_netdev
+ .nhooks = NF_NETDEV_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .flags = NFT_AF_NEEDS_DEV,
+- .hooks = {
+- [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
+- },
+ };
+
+ static int nf_tables_netdev_init_net(struct net *net)
+@@ -82,6 +79,9 @@ static const struct nf_chain_type nft_fi
+ .family = NFPROTO_NETDEV,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_NETDEV_INGRESS),
++ .hooks = {
++ [NF_NETDEV_INGRESS] = nft_do_chain_netdev,
++ },
+ };
+
+ static void nft_netdev_event(unsigned long event, struct net_device *dev,
--- /dev/null
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+Date: Sat, 30 Dec 2017 22:41:46 +0100
+Subject: [PATCH] netfilter: remove defensive check on malformed packets from
+ raw sockets
+
+Users cannot forge malformed IPv4/IPv6 headers via raw sockets that they
+can inject into the stack. Specifically, not for IPv4 since 55888dfb6ba7
+("AF_RAW: Augment raw_send_hdrinc to expand skb to fit iphdr->ihl
+(v2)"). IPv6 raw sockets also ensure that packets have a well-formed
+IPv6 header available in the skbuff.
+
+At quick glance, br_netfilter also validates layer 3 headers and it
+drops malformed both IPv4 and IPv6 packets.
+
+Therefore, let's remove this defensive check all over the place.
+
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+
+--- a/net/ipv4/netfilter/iptable_filter.c
++++ b/net/ipv4/netfilter/iptable_filter.c
+@@ -38,12 +38,6 @@ static unsigned int
+ iptable_filter_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+ {
+- if (state->hook == NF_INET_LOCAL_OUT &&
+- (skb->len < sizeof(struct iphdr) ||
+- ip_hdrlen(skb) < sizeof(struct iphdr)))
+- /* root is playing with raw sockets. */
+- return NF_ACCEPT;
+-
+ return ipt_do_table(skb, state, state->net->ipv4.iptable_filter);
+ }
+
+--- a/net/ipv4/netfilter/iptable_mangle.c
++++ b/net/ipv4/netfilter/iptable_mangle.c
+@@ -49,11 +49,6 @@ ipt_mangle_out(struct sk_buff *skb, cons
+ u_int32_t mark;
+ int err;
+
+- /* root is playing with raw sockets. */
+- if (skb->len < sizeof(struct iphdr) ||
+- ip_hdrlen(skb) < sizeof(struct iphdr))
+- return NF_ACCEPT;
+-
+ /* Save things which could affect route */
+ mark = skb->mark;
+ iph = ip_hdr(skb);
+--- a/net/ipv4/netfilter/iptable_raw.c
++++ b/net/ipv4/netfilter/iptable_raw.c
+@@ -26,12 +26,6 @@ static unsigned int
+ iptable_raw_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+ {
+- if (state->hook == NF_INET_LOCAL_OUT &&
+- (skb->len < sizeof(struct iphdr) ||
+- ip_hdrlen(skb) < sizeof(struct iphdr)))
+- /* root is playing with raw sockets. */
+- return NF_ACCEPT;
+-
+ return ipt_do_table(skb, state, state->net->ipv4.iptable_raw);
+ }
+
+--- a/net/ipv4/netfilter/iptable_security.c
++++ b/net/ipv4/netfilter/iptable_security.c
+@@ -43,12 +43,6 @@ static unsigned int
+ iptable_security_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+ {
+- if (state->hook == NF_INET_LOCAL_OUT &&
+- (skb->len < sizeof(struct iphdr) ||
+- ip_hdrlen(skb) < sizeof(struct iphdr)))
+- /* Somebody is playing with raw sockets. */
+- return NF_ACCEPT;
+-
+ return ipt_do_table(skb, state, state->net->ipv4.iptable_security);
+ }
+
+--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
++++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+@@ -154,11 +154,6 @@ static unsigned int ipv4_conntrack_local
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+ {
+- /* root is playing with raw sockets. */
+- if (skb->len < sizeof(struct iphdr) ||
+- ip_hdrlen(skb) < sizeof(struct iphdr))
+- return NF_ACCEPT;
+-
+ if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *tmpl;
+--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
++++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+@@ -355,11 +355,6 @@ nf_nat_ipv4_out(void *priv, struct sk_bu
+ #endif
+ unsigned int ret;
+
+- /* root is playing with raw sockets. */
+- if (skb->len < sizeof(struct iphdr) ||
+- ip_hdrlen(skb) < sizeof(struct iphdr))
+- return NF_ACCEPT;
+-
+ ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
+ #ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+@@ -395,11 +390,6 @@ nf_nat_ipv4_local_fn(void *priv, struct
+ unsigned int ret;
+ int err;
+
+- /* root is playing with raw sockets. */
+- if (skb->len < sizeof(struct iphdr) ||
+- ip_hdrlen(skb) < sizeof(struct iphdr))
+- return NF_ACCEPT;
+-
+ ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+--- a/net/ipv4/netfilter/nf_tables_ipv4.c
++++ b/net/ipv4/netfilter/nf_tables_ipv4.c
+@@ -30,21 +30,6 @@ static unsigned int nft_do_chain_ipv4(vo
+ return nft_do_chain(&pkt, priv);
+ }
+
+-static unsigned int nft_ipv4_output(void *priv,
+- struct sk_buff *skb,
+- const struct nf_hook_state *state)
+-{
+- if (unlikely(skb->len < sizeof(struct iphdr) ||
+- ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
+- if (net_ratelimit())
+- pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
+- "packet\n");
+- return NF_ACCEPT;
+- }
+-
+- return nft_do_chain_ipv4(priv, skb, state);
+-}
+-
+ static struct nft_af_info nft_af_ipv4 __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .nhooks = NF_INET_NUMHOOKS,
+@@ -91,7 +76,7 @@ static const struct nf_chain_type filter
+ (1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
+- [NF_INET_LOCAL_OUT] = nft_ipv4_output,
++ [NF_INET_LOCAL_OUT] = nft_do_chain_ipv4,
+ [NF_INET_FORWARD] = nft_do_chain_ipv4,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
+--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
++++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
+@@ -33,11 +33,6 @@ static unsigned int nf_route_table_hook(
+ const struct iphdr *iph;
+ int err;
+
+- /* root is playing with raw sockets. */
+- if (skb->len < sizeof(struct iphdr) ||
+- ip_hdrlen(skb) < sizeof(struct iphdr))
+- return NF_ACCEPT;
+-
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb);
+
+--- a/net/ipv6/netfilter/ip6table_mangle.c
++++ b/net/ipv6/netfilter/ip6table_mangle.c
+@@ -42,14 +42,6 @@ ip6t_mangle_out(struct sk_buff *skb, con
+ u_int8_t hop_limit;
+ u_int32_t flowlabel, mark;
+ int err;
+-#if 0
+- /* root is playing with raw sockets. */
+- if (skb->len < sizeof(struct iphdr) ||
+- ip_hdrlen(skb) < sizeof(struct iphdr)) {
+- net_warn_ratelimited("ip6t_hook: happy cracking\n");
+- return NF_ACCEPT;
+- }
+-#endif
+
+ /* save source/dest address, mark, hoplimit, flowlabel, priority, */
+ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
+--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
++++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+@@ -176,11 +176,6 @@ static unsigned int ipv6_conntrack_local
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+ {
+- /* root is playing with raw sockets. */
+- if (skb->len < sizeof(struct ipv6hdr)) {
+- net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
+- return NF_ACCEPT;
+- }
+ return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
+ }
+
+--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
++++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+@@ -372,10 +372,6 @@ nf_nat_ipv6_out(void *priv, struct sk_bu
+ #endif
+ unsigned int ret;
+
+- /* root is playing with raw sockets. */
+- if (skb->len < sizeof(struct ipv6hdr))
+- return NF_ACCEPT;
+-
+ ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
+ #ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+@@ -411,10 +407,6 @@ nf_nat_ipv6_local_fn(void *priv, struct
+ unsigned int ret;
+ int err;
+
+- /* root is playing with raw sockets. */
+- if (skb->len < sizeof(struct ipv6hdr))
+- return NF_ACCEPT;
+-
+ ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+--- a/net/ipv6/netfilter/nf_tables_ipv6.c
++++ b/net/ipv6/netfilter/nf_tables_ipv6.c
+@@ -28,20 +28,6 @@ static unsigned int nft_do_chain_ipv6(vo
+ return nft_do_chain(&pkt, priv);
+ }
+
+-static unsigned int nft_ipv6_output(void *priv,
+- struct sk_buff *skb,
+- const struct nf_hook_state *state)
+-{
+- if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
+- if (net_ratelimit())
+- pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
+- "packet\n");
+- return NF_ACCEPT;
+- }
+-
+- return nft_do_chain_ipv6(priv, skb, state);
+-}
+-
+ static struct nft_af_info nft_af_ipv6 __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .nhooks = NF_INET_NUMHOOKS,
+@@ -88,7 +74,7 @@ static const struct nf_chain_type filter
+ (1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
+- [NF_INET_LOCAL_OUT] = nft_ipv6_output,
++ [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6,
+ [NF_INET_FORWARD] = nft_do_chain_ipv6,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
+--- a/net/netfilter/nf_tables_inet.c
++++ b/net/netfilter/nf_tables_inet.c
+@@ -38,38 +38,6 @@ static unsigned int nft_do_chain_inet(vo
+ return nft_do_chain(&pkt, priv);
+ }
+
+-static unsigned int nft_inet_output(void *priv, struct sk_buff *skb,
+- const struct nf_hook_state *state)
+-{
+- struct nft_pktinfo pkt;
+-
+- nft_set_pktinfo(&pkt, skb, state);
+-
+- switch (state->pf) {
+- case NFPROTO_IPV4:
+- if (unlikely(skb->len < sizeof(struct iphdr) ||
+- ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
+- if (net_ratelimit())
+- pr_info("ignoring short SOCK_RAW packet\n");
+- return NF_ACCEPT;
+- }
+- nft_set_pktinfo_ipv4(&pkt, skb);
+- break;
+- case NFPROTO_IPV6:
+- if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
+- if (net_ratelimit())
+- pr_info("ignoring short SOCK_RAW packet\n");
+- return NF_ACCEPT;
+- }
+- nft_set_pktinfo_ipv6(&pkt, skb);
+- break;
+- default:
+- break;
+- }
+-
+- return nft_do_chain(&pkt, priv);
+-}
+-
+ static struct nft_af_info nft_af_inet __read_mostly = {
+ .family = NFPROTO_INET,
+ .nhooks = NF_INET_NUMHOOKS,
+@@ -116,7 +84,7 @@ static const struct nf_chain_type filter
+ (1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_inet,
+- [NF_INET_LOCAL_OUT] = nft_inet_output,
++ [NF_INET_LOCAL_OUT] = nft_do_chain_inet,
+ [NF_INET_FORWARD] = nft_do_chain_inet,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_inet,
+ [NF_INET_POST_ROUTING] = nft_do_chain_inet,