kernel: Backport upstream flowtable patches from 5.15
[openwrt/openwrt.git] / target / linux / generic / backport-5.10 / 613-v5.15-02-netfilter-conntrack-remove-offload_pickup-sysctl-aga.patch
diff --git a/target/linux/generic/backport-5.10/613-v5.15-02-netfilter-conntrack-remove-offload_pickup-sysctl-aga.patch b/target/linux/generic/backport-5.10/613-v5.15-02-netfilter-conntrack-remove-offload_pickup-sysctl-aga.patch
new file mode 100644 (file)
index 0000000..0ed23e9
--- /dev/null
@@ -0,0 +1,184 @@
+From 4592ee7f525c4683ec9e290381601fdee50ae110 Mon Sep 17 00:00:00 2001
+From: Florian Westphal <fw@strlen.de>
+Date: Wed, 4 Aug 2021 15:02:15 +0200
+Subject: [PATCH] netfilter: conntrack: remove offload_pickup sysctl again
+
+These two sysctls were added because the hardcoded defaults (2 minutes,
+tcp, 30 seconds, udp) turned out to be too low for some setups.
+
+They appeared in 5.14-rc1 so it should be fine to remove it again.
+
+Marcelo convinced me that there should be no difference between a flow
+that was offloaded vs. a flow that was not wrt. timeout handling.
+Thus the default is changed to those for TCP established and UDP stream,
+5 days and 120 seconds, respectively.
+
+Marcelo also suggested to account for the timeout value used for the
+offloading, this avoids increase beyond the value in the conntrack-sysctl
+and will also instantly expire the conntrack entry with altered sysctls.
+
+Example:
+   nf_conntrack_udp_timeout_stream=60
+   nf_flowtable_udp_timeout=60
+
+This will remove offloaded udp flows after one minute, rather than two.
+
+An earlier version of this patch also cleared the ASSURED bit to
+allow nf_conntrack to evict the entry via early_drop (i.e., table full).
+However, it looks like we can safely assume that connection timed out
+via HW is still in established state, so this isn't needed.
+
+Quoting Oz:
+ [..] the hardware sends all packets with a set FIN flags to sw.
+ [..] Connections that are aged in hardware are expected to be in the
+ established state.
+
+In case it turns out that back-to-sw-path transition can occur for
+'dodgy' connections too (e.g., one side disappeared while software-path
+would have been in RETRANS timeout), we can adjust this later.
+
+Cc: Oz Shlomo <ozsh@nvidia.com>
+Cc: Paul Blakey <paulb@nvidia.com>
+Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Reviewed-by: Oz Shlomo <ozsh@nvidia.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+---
+ Documentation/networking/nf_conntrack-sysctl.rst | 10 ----------
+ include/net/netns/conntrack.h                    |  2 --
+ net/netfilter/nf_conntrack_proto_tcp.c           |  1 -
+ net/netfilter/nf_conntrack_proto_udp.c           |  1 -
+ net/netfilter/nf_conntrack_standalone.c          | 16 ----------------
+ net/netfilter/nf_flow_table_core.c               | 11 ++++++++---
+ 6 files changed, 8 insertions(+), 33 deletions(-)
+
+--- a/include/net/netns/conntrack.h
++++ b/include/net/netns/conntrack.h
+@@ -29,7 +29,6 @@ struct nf_tcp_net {
+       int tcp_max_retrans;
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       unsigned int offload_timeout;
+-      unsigned int offload_pickup;
+ #endif
+ };
+@@ -43,7 +42,6 @@ struct nf_udp_net {
+       unsigned int timeouts[UDP_CT_MAX];
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       unsigned int offload_timeout;
+-      unsigned int offload_pickup;
+ #endif
+ };
+--- a/net/netfilter/nf_conntrack_proto_tcp.c
++++ b/net/netfilter/nf_conntrack_proto_tcp.c
+@@ -1450,7 +1450,6 @@ void nf_conntrack_tcp_init_net(struct ne
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       tn->offload_timeout = 30 * HZ;
+-      tn->offload_pickup = 120 * HZ;
+ #endif
+ }
+--- a/net/netfilter/nf_conntrack_proto_udp.c
++++ b/net/netfilter/nf_conntrack_proto_udp.c
+@@ -276,7 +276,6 @@ void nf_conntrack_udp_init_net(struct ne
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       un->offload_timeout = 30 * HZ;
+-      un->offload_pickup = 30 * HZ;
+ #endif
+ }
+--- a/net/netfilter/nf_conntrack_standalone.c
++++ b/net/netfilter/nf_conntrack_standalone.c
+@@ -569,7 +569,6 @@ enum nf_ct_sysctl_index {
+       NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
+-      NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP,
+ #endif
+       NF_SYSCTL_CT_PROTO_TCP_LOOSE,
+       NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
+@@ -578,7 +577,6 @@ enum nf_ct_sysctl_index {
+       NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
+-      NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP,
+ #endif
+       NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
+       NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
+@@ -773,12 +771,6 @@ static struct ctl_table nf_ct_sysctl_tab
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_jiffies,
+       },
+-      [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP] = {
+-              .procname       = "nf_flowtable_tcp_pickup",
+-              .maxlen         = sizeof(unsigned int),
+-              .mode           = 0644,
+-              .proc_handler   = proc_dointvec_jiffies,
+-      },
+ #endif
+       [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
+               .procname       = "nf_conntrack_tcp_loose",
+@@ -821,12 +813,6 @@ static struct ctl_table nf_ct_sysctl_tab
+               .mode           = 0644,
+               .proc_handler   = proc_dointvec_jiffies,
+       },
+-      [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP] = {
+-              .procname       = "nf_flowtable_udp_pickup",
+-              .maxlen         = sizeof(unsigned int),
+-              .mode           = 0644,
+-              .proc_handler   = proc_dointvec_jiffies,
+-      },
+ #endif
+       [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
+               .procname       = "nf_conntrack_icmp_timeout",
+@@ -1006,7 +992,6 @@ static void nf_conntrack_standalone_init
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
+-      table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD_PICKUP].data = &tn->offload_pickup;
+ #endif
+ }
+@@ -1098,7 +1083,6 @@ static int nf_conntrack_standalone_init_
+       table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
+ #if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
+       table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
+-      table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD_PICKUP].data = &un->offload_pickup;
+ #endif
+       nf_conntrack_standalone_init_tcp_sysctl(net, table);
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -182,20 +182,25 @@ static void flow_offload_fixup_ct_timeou
+ {
+       struct net *net = nf_ct_net(ct);
+       int l4num = nf_ct_protonum(ct);
+-      unsigned int timeout;
++      s32 timeout;
+       if (l4num == IPPROTO_TCP) {
+               struct nf_tcp_net *tn = nf_tcp_pernet(net);
+-              timeout = tn->offload_pickup;
++              timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
++              timeout -= tn->offload_timeout;
+       } else if (l4num == IPPROTO_UDP) {
+               struct nf_udp_net *tn = nf_udp_pernet(net);
+-              timeout = tn->offload_pickup;
++              timeout = tn->timeouts[UDP_CT_REPLIED];
++              timeout -= tn->offload_timeout;
+       } else {
+               return;
+       }
++      if (timeout < 0)
++              timeout = 0;
++
+       if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
+               WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
+ }