kernel: add minimal TCP state tracking to flow offload support
authorFelix Fietkau <nbd@nbd.name>
Sun, 25 Feb 2018 14:48:23 +0000 (15:48 +0100)
committerFelix Fietkau <nbd@nbd.name>
Sun, 25 Feb 2018 15:14:23 +0000 (16:14 +0100)
Fixes issues with connections hanging after >30 seconds idle time

Signed-off-by: Felix Fietkau <nbd@nbd.name>
target/linux/generic/backport-4.14/360-netfilter-nf_flow_table-make-flow_offload_dead-inlin.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/361-netfilter-nf_flow_table-add-a-new-flow-state-for-tea.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/362-netfilter-nf_flow_table-in-flow_offload_lookup-skip-.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/363-netfilter-nf_flow_table-add-support-for-sending-flow.patch [new file with mode: 0644]
target/linux/generic/backport-4.14/364-netfilter-nf_flow_table-tear-down-TCP-flows-if-RST-o.patch [new file with mode: 0644]

diff --git a/target/linux/generic/backport-4.14/360-netfilter-nf_flow_table-make-flow_offload_dead-inlin.patch b/target/linux/generic/backport-4.14/360-netfilter-nf_flow_table-make-flow_offload_dead-inlin.patch
new file mode 100644 (file)
index 0000000..d33400c
--- /dev/null
@@ -0,0 +1,38 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sun, 25 Feb 2018 15:37:27 +0100
+Subject: [PATCH] netfilter: nf_flow_table: make flow_offload_dead inline
+
+It is too trivial to keep as a separate exported function
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -103,7 +103,10 @@ void nf_flow_table_cleanup(struct net *n
+ int nf_flow_table_init(struct nf_flowtable *flow_table);
+ void nf_flow_table_free(struct nf_flowtable *flow_table);
+-void flow_offload_dead(struct flow_offload *flow);
++static inline void flow_offload_dead(struct flow_offload *flow)
++{
++      flow->flags |= FLOW_OFFLOAD_DYING;
++}
+ int nf_flow_snat_port(const struct flow_offload *flow,
+                     struct sk_buff *skb, unsigned int thoff,
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -113,12 +113,6 @@ void flow_offload_free(struct flow_offlo
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_free);
+-void flow_offload_dead(struct flow_offload *flow)
+-{
+-      flow->flags |= FLOW_OFFLOAD_DYING;
+-}
+-EXPORT_SYMBOL_GPL(flow_offload_dead);
+-
+ static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
+ {
+       const struct flow_offload_tuple *tuple = data;
diff --git a/target/linux/generic/backport-4.14/361-netfilter-nf_flow_table-add-a-new-flow-state-for-tea.patch b/target/linux/generic/backport-4.14/361-netfilter-nf_flow_table-add-a-new-flow-state-for-tea.patch
new file mode 100644 (file)
index 0000000..83751d8
--- /dev/null
@@ -0,0 +1,74 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sun, 25 Feb 2018 15:38:31 +0100
+Subject: [PATCH] netfilter: nf_flow_table: add a new flow state for
+ tearing down offloading
+
+Will be used to tear down the offload entry while keeping the conntrack
+entry alive.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/include/net/netfilter/nf_flow_table.h
++++ b/include/net/netfilter/nf_flow_table.h
+@@ -68,6 +68,7 @@ struct flow_offload_tuple_rhash {
+ #define FLOW_OFFLOAD_SNAT     0x1
+ #define FLOW_OFFLOAD_DNAT     0x2
+ #define FLOW_OFFLOAD_DYING    0x4
++#define FLOW_OFFLOAD_TEARDOWN 0x8
+ struct flow_offload {
+       struct flow_offload_tuple_rhash         tuplehash[FLOW_OFFLOAD_DIR_MAX];
+@@ -108,6 +109,11 @@ static inline void flow_offload_dead(str
+       flow->flags |= FLOW_OFFLOAD_DYING;
+ }
++static inline void flow_offload_teardown(struct flow_offload *flow)
++{
++      flow->flags |= FLOW_OFFLOAD_TEARDOWN;
++}
++
+ int nf_flow_snat_port(const struct flow_offload *flow,
+                     struct sk_buff *skb, unsigned int thoff,
+                     u8 protocol, enum flow_offload_tuple_dir dir);
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -226,11 +226,6 @@ static inline bool nf_flow_has_expired(c
+       return (__s32)(flow->timeout - (u32)jiffies) <= 0;
+ }
+-static inline bool nf_flow_is_dying(const struct flow_offload *flow)
+-{
+-      return flow->flags & FLOW_OFFLOAD_DYING;
+-}
+-
+ static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
+ {
+       struct flow_offload_tuple_rhash *tuplehash;
+@@ -258,7 +253,8 @@ static int nf_flow_offload_gc_step(struc
+               flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+               if (nf_flow_has_expired(flow) ||
+-                  nf_flow_is_dying(flow))
++                  (flow->flags & (FLOW_OFFLOAD_DYING |
++                                  FLOW_OFFLOAD_TEARDOWN)))
+                       flow_offload_del(flow_table, flow);
+       }
+ out:
+@@ -419,10 +415,14 @@ static void nf_flow_table_do_cleanup(str
+ {
+       struct net_device *dev = data;
+-      if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
++      if (!dev) {
++              flow_offload_teardown(flow);
+               return;
++      }
+-      flow_offload_dead(flow);
++      if (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
++          flow->tuplehash[1].tuple.iifidx == dev->ifindex)
++              flow_offload_dead(flow);
+ }
+ static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
diff --git a/target/linux/generic/backport-4.14/362-netfilter-nf_flow_table-in-flow_offload_lookup-skip-.patch b/target/linux/generic/backport-4.14/362-netfilter-nf_flow_table-in-flow_offload_lookup-skip-.patch
new file mode 100644 (file)
index 0000000..e6d7dd8
--- /dev/null
@@ -0,0 +1,36 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sun, 25 Feb 2018 15:39:56 +0100
+Subject: [PATCH] netfilter: nf_flow_table: in flow_offload_lookup, skip
+ entries being deleted
+
+Preparation for sending flows back to the slow path
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -178,8 +178,21 @@ struct flow_offload_tuple_rhash *
+ flow_offload_lookup(struct nf_flowtable *flow_table,
+                   struct flow_offload_tuple *tuple)
+ {
+-      return rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
+-                                    nf_flow_offload_rhash_params);
++      struct flow_offload_tuple_rhash *tuplehash;
++      struct flow_offload *flow;
++      int dir;
++
++      tuplehash = rhashtable_lookup_fast(&flow_table->rhashtable, tuple,
++                                         nf_flow_offload_rhash_params);
++      if (!tuplehash)
++              return NULL;
++
++      dir = tuplehash->tuple.dir;
++      flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
++      if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
++              return NULL;
++
++      return tuplehash;
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_lookup);
diff --git a/target/linux/generic/backport-4.14/363-netfilter-nf_flow_table-add-support-for-sending-flow.patch b/target/linux/generic/backport-4.14/363-netfilter-nf_flow_table-add-support-for-sending-flow.patch
new file mode 100644 (file)
index 0000000..b4d80a9
--- /dev/null
@@ -0,0 +1,64 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sun, 25 Feb 2018 15:41:11 +0100
+Subject: [PATCH] netfilter: nf_flow_table: add support for sending flows
+ back to the slow path
+
+Reset the timeout. For TCP, also set the state to indicate to use the
+next incoming packets to reset window tracking.
+This allows the slow path to take over again once the offload state has
+been torn down
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -100,6 +100,36 @@ err_ct_refcnt:
+ }
+ EXPORT_SYMBOL_GPL(flow_offload_alloc);
++static void flow_offload_fixup_ct_state(struct nf_conn *ct)
++{
++      const struct nf_conntrack_l4proto *l4proto;
++      struct net *net = nf_ct_net(ct);
++      unsigned int *timeouts;
++      unsigned int timeout;
++      int l4num;
++
++      l4num = nf_ct_protonum(ct);
++      l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), l4num);
++      if (!l4proto)
++              return;
++
++      timeouts = l4proto->get_timeouts(net);
++      if (!timeouts)
++              return;
++
++      if (l4num == IPPROTO_TCP) {
++              timeout = timeouts[TCP_CONNTRACK_ESTABLISHED];
++              ct->proto.tcp.state = TCP_CONNTRACK_IGNORE;
++      } else if (l4num == IPPROTO_UDP) {
++              timeout = timeouts[UDP_CT_REPLIED];
++      } else {
++              return;
++      }
++
++      ct->timeout = nfct_time_stamp + timeout;
++      clear_bit(IPS_OFFLOAD_BIT, &ct->status);
++}
++
+ void flow_offload_free(struct flow_offload *flow)
+ {
+       struct flow_offload_entry *e;
+@@ -107,7 +137,10 @@ void flow_offload_free(struct flow_offlo
+       dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
+       dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
+       e = container_of(flow, struct flow_offload_entry, flow);
+-      nf_ct_delete(e->ct, 0, 0);
++      if (flow->flags & FLOW_OFFLOAD_DYING)
++              nf_ct_delete(e->ct, 0, 0);
++      else
++              flow_offload_fixup_ct_state(e->ct);
+       nf_ct_put(e->ct);
+       kfree_rcu(e, rcu_head);
+ }
diff --git a/target/linux/generic/backport-4.14/364-netfilter-nf_flow_table-tear-down-TCP-flows-if-RST-o.patch b/target/linux/generic/backport-4.14/364-netfilter-nf_flow_table-tear-down-TCP-flows-if-RST-o.patch
new file mode 100644 (file)
index 0000000..8b0024c
--- /dev/null
@@ -0,0 +1,81 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Sun, 25 Feb 2018 15:42:58 +0100
+Subject: [PATCH] netfilter: nf_flow_table: tear down TCP flows if RST or
+ FIN was seen
+
+Allow the slow path to handle the shutdown of the connection with proper
+timeouts
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/netfilter/nf_flow_table_ip.c
++++ b/net/netfilter/nf_flow_table_ip.c
+@@ -15,6 +15,23 @@
+ #include <linux/tcp.h>
+ #include <linux/udp.h>
++static int nf_flow_tcp_state_check(struct flow_offload *flow,
++                                 struct sk_buff *skb, unsigned int thoff)
++{
++      struct tcphdr *tcph;
++
++      if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
++              return -1;
++
++      tcph = (void *)(skb_network_header(skb) + thoff);
++      if (unlikely(tcph->fin || tcph->rst)) {
++              flow_offload_teardown(flow);
++              return -1;
++      }
++
++      return 0;
++}
++
+ static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
+                             __be32 addr, __be32 new_addr)
+ {
+@@ -118,10 +135,9 @@ static int nf_flow_dnat_ip(const struct
+ }
+ static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
+-                        enum flow_offload_tuple_dir dir)
++                        unsigned int thoff, enum flow_offload_tuple_dir dir)
+ {
+       struct iphdr *iph = ip_hdr(skb);
+-      unsigned int thoff = iph->ihl * 4;
+       if (flow->flags & FLOW_OFFLOAD_SNAT &&
+           (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
+@@ -201,6 +217,7 @@ nf_flow_offload_ip_hook(void *priv, stru
+       struct flow_offload *flow;
+       struct net_device *outdev;
+       const struct rtable *rt;
++      unsigned int thoff;
+       struct iphdr *iph;
+       __be32 nexthop;
+@@ -229,8 +246,12 @@ nf_flow_offload_ip_hook(void *priv, stru
+       if (skb_try_make_writable(skb, sizeof(*iph)))
+               return NF_DROP;
++      thoff = ip_hdr(skb)->ihl * 4;
++      if (nf_flow_tcp_state_check(flow, skb, thoff))
++              return NF_ACCEPT;
++
+       if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+-          nf_flow_nat_ip(flow, skb, dir) < 0)
++          nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
+               return NF_DROP;
+       flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+@@ -438,6 +459,9 @@ nf_flow_offload_ipv6_hook(void *priv, st
+       if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
+               return NF_ACCEPT;
++      if (nf_flow_tcp_state_check(flow, skb, sizeof(*ip6h)))
++              return NF_ACCEPT;
++
+       if (skb_try_make_writable(skb, sizeof(*ip6h)))
+               return NF_DROP;