kernel: delete Linux 5.4 config and patches
[openwrt/openwrt.git] / target / linux / generic / backport-5.4 / 080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch
diff --git a/target/linux/generic/backport-5.4/080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch b/target/linux/generic/backport-5.4/080-wireguard-0133-wireguard-allowedips-remove-nodes-in-O-1.patch
deleted file mode 100644 (file)
index 78da24e..0000000
+++ /dev/null
@@ -1,237 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: "Jason A. Donenfeld" <Jason@zx2c4.com>
-Date: Fri, 4 Jun 2021 17:17:36 +0200
-Subject: [PATCH] wireguard: allowedips: remove nodes in O(1)
-
-commit f634f418c227c912e7ea95a3299efdc9b10e4022 upstream.
-
-Previously, deleting peers would require traversing the entire trie in
-order to rebalance nodes and safely free them. This meant that removing
-1000 peers from a trie with a half million nodes would take an extremely
-long time, during which we're holding the rtnl lock. Large-scale users
-were reporting 200ms latencies added to the networking stack as a whole
-every time their userspace software would queue up significant removals.
-That's a serious situation.
-
-This commit fixes that by maintaining a double pointer to the parent's
-bit pointer for each node, and then using the already existing node list
-belonging to each peer to go directly to the node, fix up its pointers,
-and free it with RCU. This means removal is O(1) instead of O(n), and we
-don't use gobs of stack.
-
-The removal algorithm has the same downside as the code that it fixes:
-it won't collapse needlessly long runs of fillers.  We can enhance that
-in the future if it ever becomes a problem. This commit documents that
-limitation with a TODO comment in code, a small but meaningful
-improvement over the prior situation.
-
-Currently the biggest flaw, which the next commit addresses, is that
-because this increases the node size on 64-bit machines from 60 bytes to
-68 bytes. 60 rounds up to 64, but 68 rounds up to 128. So we wind up
-using twice as much memory per node, because of power-of-two
-allocations, which is a big bummer. We'll need to figure something out
-there.
-
-Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
-Cc: stable@vger.kernel.org
-Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
----
- drivers/net/wireguard/allowedips.c | 132 ++++++++++++-----------------
- drivers/net/wireguard/allowedips.h |   9 +-
- 2 files changed, 57 insertions(+), 84 deletions(-)
-
---- a/drivers/net/wireguard/allowedips.c
-+++ b/drivers/net/wireguard/allowedips.c
-@@ -66,60 +66,6 @@ static void root_remove_peer_lists(struc
-       }
- }
--static void walk_remove_by_peer(struct allowedips_node __rcu **top,
--                              struct wg_peer *peer, struct mutex *lock)
--{
--#define REF(p) rcu_access_pointer(p)
--#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock))
--#define PUSH(p) ({                                                             \
--              WARN_ON(IS_ENABLED(DEBUG) && len >= 128);                      \
--              stack[len++] = p;                                              \
--      })
--
--      struct allowedips_node __rcu **stack[128], **nptr;
--      struct allowedips_node *node, *prev;
--      unsigned int len;
--
--      if (unlikely(!peer || !REF(*top)))
--              return;
--
--      for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) {
--              nptr = stack[len - 1];
--              node = DEREF(nptr);
--              if (!node) {
--                      --len;
--                      continue;
--              }
--              if (!prev || REF(prev->bit[0]) == node ||
--                  REF(prev->bit[1]) == node) {
--                      if (REF(node->bit[0]))
--                              PUSH(&node->bit[0]);
--                      else if (REF(node->bit[1]))
--                              PUSH(&node->bit[1]);
--              } else if (REF(node->bit[0]) == prev) {
--                      if (REF(node->bit[1]))
--                              PUSH(&node->bit[1]);
--              } else {
--                      if (rcu_dereference_protected(node->peer,
--                              lockdep_is_held(lock)) == peer) {
--                              RCU_INIT_POINTER(node->peer, NULL);
--                              list_del_init(&node->peer_list);
--                              if (!node->bit[0] || !node->bit[1]) {
--                                      rcu_assign_pointer(*nptr, DEREF(
--                                             &node->bit[!REF(node->bit[0])]));
--                                      kfree_rcu(node, rcu);
--                                      node = DEREF(nptr);
--                              }
--                      }
--                      --len;
--              }
--      }
--
--#undef REF
--#undef DEREF
--#undef PUSH
--}
--
- static unsigned int fls128(u64 a, u64 b)
- {
-       return a ? fls64(a) + 64U : fls64(b);
-@@ -224,6 +170,7 @@ static int add(struct allowedips_node __
-               RCU_INIT_POINTER(node->peer, peer);
-               list_add_tail(&node->peer_list, &peer->allowedips_list);
-               copy_and_assign_cidr(node, key, cidr, bits);
-+              rcu_assign_pointer(node->parent_bit, trie);
-               rcu_assign_pointer(*trie, node);
-               return 0;
-       }
-@@ -243,9 +190,9 @@ static int add(struct allowedips_node __
-       if (!node) {
-               down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
-       } else {
--              down = rcu_dereference_protected(CHOOSE_NODE(node, key),
--                                               lockdep_is_held(lock));
-+              down = rcu_dereference_protected(CHOOSE_NODE(node, key), lockdep_is_held(lock));
-               if (!down) {
-+                      rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, key));
-                       rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
-                       return 0;
-               }
-@@ -254,29 +201,37 @@ static int add(struct allowedips_node __
-       parent = node;
-       if (newnode->cidr == cidr) {
-+              rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(newnode, down->bits));
-               rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
--              if (!parent)
-+              if (!parent) {
-+                      rcu_assign_pointer(newnode->parent_bit, trie);
-                       rcu_assign_pointer(*trie, newnode);
--              else
--                      rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits),
--                                         newnode);
--      } else {
--              node = kzalloc(sizeof(*node), GFP_KERNEL);
--              if (unlikely(!node)) {
--                      list_del(&newnode->peer_list);
--                      kfree(newnode);
--                      return -ENOMEM;
-+              } else {
-+                      rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(parent, newnode->bits));
-+                      rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits), newnode);
-               }
--              INIT_LIST_HEAD(&node->peer_list);
--              copy_and_assign_cidr(node, newnode->bits, cidr, bits);
-+              return 0;
-+      }
-+
-+      node = kzalloc(sizeof(*node), GFP_KERNEL);
-+      if (unlikely(!node)) {
-+              list_del(&newnode->peer_list);
-+              kfree(newnode);
-+              return -ENOMEM;
-+      }
-+      INIT_LIST_HEAD(&node->peer_list);
-+      copy_and_assign_cidr(node, newnode->bits, cidr, bits);
--              rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
--              rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
--              if (!parent)
--                      rcu_assign_pointer(*trie, node);
--              else
--                      rcu_assign_pointer(CHOOSE_NODE(parent, node->bits),
--                                         node);
-+      rcu_assign_pointer(down->parent_bit, &CHOOSE_NODE(node, down->bits));
-+      rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
-+      rcu_assign_pointer(newnode->parent_bit, &CHOOSE_NODE(node, newnode->bits));
-+      rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
-+      if (!parent) {
-+              rcu_assign_pointer(node->parent_bit, trie);
-+              rcu_assign_pointer(*trie, node);
-+      } else {
-+              rcu_assign_pointer(node->parent_bit, &CHOOSE_NODE(parent, node->bits));
-+              rcu_assign_pointer(CHOOSE_NODE(parent, node->bits), node);
-       }
-       return 0;
- }
-@@ -335,9 +290,30 @@ int wg_allowedips_insert_v6(struct allow
- void wg_allowedips_remove_by_peer(struct allowedips *table,
-                                 struct wg_peer *peer, struct mutex *lock)
- {
-+      struct allowedips_node *node, *child, *tmp;
-+
-+      if (list_empty(&peer->allowedips_list))
-+              return;
-       ++table->seq;
--      walk_remove_by_peer(&table->root4, peer, lock);
--      walk_remove_by_peer(&table->root6, peer, lock);
-+      list_for_each_entry_safe(node, tmp, &peer->allowedips_list, peer_list) {
-+              list_del_init(&node->peer_list);
-+              RCU_INIT_POINTER(node->peer, NULL);
-+              if (node->bit[0] && node->bit[1])
-+                      continue;
-+              child = rcu_dereference_protected(
-+                              node->bit[!rcu_access_pointer(node->bit[0])],
-+                              lockdep_is_held(lock));
-+              if (child)
-+                      child->parent_bit = node->parent_bit;
-+              *rcu_dereference_protected(node->parent_bit, lockdep_is_held(lock)) = child;
-+              kfree_rcu(node, rcu);
-+
-+              /* TODO: Note that we currently don't walk up and down in order to
-+               * free any potential filler nodes. This means that this function
-+               * doesn't free up as much as it could, which could be revisited
-+               * at some point.
-+               */
-+      }
- }
- int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr)
---- a/drivers/net/wireguard/allowedips.h
-+++ b/drivers/net/wireguard/allowedips.h
-@@ -15,14 +15,11 @@ struct wg_peer;
- struct allowedips_node {
-       struct wg_peer __rcu *peer;
-       struct allowedips_node __rcu *bit[2];
--      /* While it may seem scandalous that we waste space for v4,
--       * we're alloc'ing to the nearest power of 2 anyway, so this
--       * doesn't actually make a difference.
--       */
--      u8 bits[16] __aligned(__alignof(u64));
-       u8 cidr, bit_at_a, bit_at_b, bitlen;
-+      u8 bits[16] __aligned(__alignof(u64));
--      /* Keep rarely used list at bottom to be beyond cache line. */
-+      /* Keep rarely used members at bottom to be beyond cache line. */
-+      struct allowedips_node *__rcu *parent_bit; /* XXX: this puts us at 68->128 bytes instead of 60->64 bytes!! */
-       union {
-               struct list_head peer_list;
-               struct rcu_head rcu;