kernel: fix conntrack leak for flow_offload connections
authorFelix Fietkau <nbd@nbd.name>
Wed, 13 Jun 2018 10:46:54 +0000 (12:46 +0200)
committerFelix Fietkau <nbd@nbd.name>
Wed, 13 Jun 2018 10:54:45 +0000 (12:54 +0200)
This was caused by a race condition between offload teardown and
conntrack gc bumping the timeout of offloaded connections

Signed-off-by: Felix Fietkau <nbd@nbd.name>
target/linux/generic/backport-4.14/370-netfilter-nf_flow_table-fix-offloaded-connection-tim.patch [new file with mode: 0644]
target/linux/generic/pending-4.14/640-netfilter-nf_flow_table-add-hardware-offload-support.patch
target/linux/generic/pending-4.14/645-netfilter-nf_flow_table-rework-hardware-offload-time.patch

diff --git a/target/linux/generic/backport-4.14/370-netfilter-nf_flow_table-fix-offloaded-connection-tim.patch b/target/linux/generic/backport-4.14/370-netfilter-nf_flow_table-fix-offloaded-connection-tim.patch
new file mode 100644 (file)
index 0000000..e68395c
--- /dev/null
@@ -0,0 +1,110 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Wed, 13 Jun 2018 12:33:39 +0200
+Subject: [PATCH] netfilter: nf_flow_table: fix offloaded connection timeout
+ corner case
+
+The full teardown of offloaded flows is deferred to a gc work item,
+however processing of packets by netfilter needs to happen immediately
+after a teardown is requested, because the conntrack state needs to be
+fixed up.
+
+Since the IPS_OFFLOAD_BIT is still kept until the teardown is complete,
+the netfilter conntrack gc can accidentally bump the timeout of a
+connection where offload was just stopped, causing a conntrack entry
+leak.
+
+Fix this by moving the conntrack timeout bumping from conntrack core to
+the nf_flow_offload and add a check to prevent bogus timeout bumps.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -978,18 +978,6 @@ static bool gc_worker_can_early_drop(con
+       return false;
+ }
+-#define       DAY     (86400 * HZ)
+-
+-/* Set an arbitrary timeout large enough not to ever expire, this save
+- * us a check for the IPS_OFFLOAD_BIT from the packet path via
+- * nf_ct_is_expired().
+- */
+-static void nf_ct_offload_timeout(struct nf_conn *ct)
+-{
+-      if (nf_ct_expires(ct) < DAY / 2)
+-              ct->timeout = nfct_time_stamp + DAY;
+-}
+-
+ static void gc_worker(struct work_struct *work)
+ {
+       unsigned int min_interval = max(HZ / GC_MAX_BUCKETS_DIV, 1u);
+@@ -1026,10 +1014,8 @@ static void gc_worker(struct work_struct
+                       tmp = nf_ct_tuplehash_to_ctrack(h);
+                       scanned++;
+-                      if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
+-                              nf_ct_offload_timeout(tmp);
++                      if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
+                               continue;
+-                      }
+                       if (nf_ct_is_expired(tmp)) {
+                               nf_ct_gc_expired(tmp);
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -185,8 +185,27 @@ static const struct rhashtable_params nf
+       .automatic_shrinking    = true,
+ };
++#define       DAY     (86400 * HZ)
++
++/* Set an arbitrary timeout large enough not to ever expire, this save
++ * us a check for the IPS_OFFLOAD_BIT from the packet path via
++ * nf_ct_is_expired().
++ */
++static void nf_ct_offload_timeout(struct flow_offload *flow)
++{
++      struct flow_offload_entry *entry;
++      struct nf_conn *ct;
++
++      entry = container_of(flow, struct flow_offload_entry, flow);
++      ct = entry->ct;
++
++      if (nf_ct_expires(ct) < DAY / 2)
++              ct->timeout = nfct_time_stamp + DAY;
++}
++
+ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
+ {
++      nf_ct_offload_timeout(flow);
+       flow->timeout = (u32)jiffies;
+       rhashtable_insert_fast(&flow_table->rhashtable,
+@@ -307,6 +326,8 @@ static int nf_flow_offload_gc_step(struc
+       rhashtable_walk_start(&hti);
+       while ((tuplehash = rhashtable_walk_next(&hti))) {
++              bool teardown;
++
+               if (IS_ERR(tuplehash)) {
+                       err = PTR_ERR(tuplehash);
+                       if (err != -EAGAIN)
+@@ -319,9 +340,13 @@ static int nf_flow_offload_gc_step(struc
+               flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
+-              if (nf_flow_has_expired(flow) ||
+-                  (flow->flags & (FLOW_OFFLOAD_DYING |
+-                                  FLOW_OFFLOAD_TEARDOWN)))
++              teardown = flow->flags & (FLOW_OFFLOAD_DYING |
++                                        FLOW_OFFLOAD_TEARDOWN);
++
++              if (!teardown)
++                      nf_ct_offload_timeout(flow);
++
++              if (nf_flow_has_expired(flow) || teardown)
+                       flow_offload_del(flow_table, flow);
+       }
+ out:
index df48db04eb25fcb2ed1b0a82b1fc2d0aeac5723c..ad77215843e96be6235b80ecb333a81fac0971e8 100644 (file)
@@ -156,7 +156,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
  obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 --- a/net/netfilter/nf_flow_table_core.c
 +++ b/net/netfilter/nf_flow_table_core.c
-@@ -199,10 +199,16 @@ int flow_offload_add(struct nf_flowtable
+@@ -218,10 +218,16 @@ int flow_offload_add(struct nf_flowtable
  }
  EXPORT_SYMBOL_GPL(flow_offload_add);
  
@@ -173,7 +173,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
  
        rhashtable_remove_fast(&flow_table->rhashtable,
                               &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
-@@ -214,6 +220,9 @@ static void flow_offload_del(struct nf_f
+@@ -233,6 +239,9 @@ static void flow_offload_del(struct nf_f
        e = container_of(flow, struct flow_offload_entry, flow);
        clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
  
@@ -183,32 +183,17 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
        flow_offload_free(flow);
  }
  
-@@ -307,6 +316,7 @@ static int nf_flow_offload_gc_step(struc
-       rhashtable_walk_start(&hti);
+@@ -346,6 +355,9 @@ static int nf_flow_offload_gc_step(struc
+               if (!teardown)
+                       nf_ct_offload_timeout(flow);
  
-       while ((tuplehash = rhashtable_walk_next(&hti))) {
-+              bool teardown;
-               if (IS_ERR(tuplehash)) {
-                       err = PTR_ERR(tuplehash);
-                       if (err != -EAGAIN)
-@@ -319,9 +329,13 @@ static int nf_flow_offload_gc_step(struc
-               flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
--              if (nf_flow_has_expired(flow) ||
--                  (flow->flags & (FLOW_OFFLOAD_DYING |
--                                  FLOW_OFFLOAD_TEARDOWN)))
-+              teardown = flow->flags & (FLOW_OFFLOAD_DYING |
-+                                        FLOW_OFFLOAD_TEARDOWN);
-+
 +              if (nf_flow_in_hw(flow) && !teardown)
 +                      continue;
 +
-+              if (nf_flow_has_expired(flow) || teardown)
+               if (nf_flow_has_expired(flow) || teardown)
                        flow_offload_del(flow_table, flow);
        }
- out:
-@@ -456,10 +470,43 @@ int nf_flow_dnat_port(const struct flow_
+@@ -481,10 +493,43 @@ int nf_flow_dnat_port(const struct flow_
  }
  EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
  
@@ -252,7 +237,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
        INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
  
        err = rhashtable_init(&flowtable->rhashtable,
-@@ -497,6 +544,8 @@ static void nf_flow_table_iterate_cleanu
+@@ -522,6 +567,8 @@ static void nf_flow_table_iterate_cleanu
  {
        nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
        flush_delayed_work(&flowtable->gc_work);
@@ -261,7 +246,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
  }
  
  void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
-@@ -510,6 +559,26 @@ void nf_flow_table_cleanup(struct net *n
+@@ -535,6 +582,26 @@ void nf_flow_table_cleanup(struct net *n
  }
  EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
  
@@ -288,7 +273,7 @@ Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
  void nf_flow_table_free(struct nf_flowtable *flow_table)
  {
        mutex_lock(&flowtable_lock);
-@@ -519,9 +588,58 @@ void nf_flow_table_free(struct nf_flowta
+@@ -544,9 +611,58 @@ void nf_flow_table_free(struct nf_flowta
        nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
        WARN_ON(!nf_flow_offload_gc_step(flow_table));
        rhashtable_destroy(&flow_table->rhashtable);
index 8da15bc33682dc593231d0580b3f7281d91f46b5..2b3725f81e05be7798292ee30e0ce5080d02812b 100644 (file)
@@ -26,9 +26,9 @@ Signed-off-by: Felix Fietkau <nbd@nbd.name>
        struct flow_offload_tuple_rhash         tuplehash[FLOW_OFFLOAD_DIR_MAX];
 --- a/net/netfilter/nf_flow_table_core.c
 +++ b/net/netfilter/nf_flow_table_core.c
-@@ -332,7 +332,7 @@ static int nf_flow_offload_gc_step(struc
-               teardown = flow->flags & (FLOW_OFFLOAD_DYING |
-                                         FLOW_OFFLOAD_TEARDOWN);
+@@ -355,7 +355,7 @@ static int nf_flow_offload_gc_step(struc
+               if (!teardown)
+                       nf_ct_offload_timeout(flow);
  
 -              if (nf_flow_in_hw(flow) && !teardown)
 +              if ((flow->flags & FLOW_OFFLOAD_KEEP) && !teardown)