kernel: replace threaded NAPI implementation in 5.10 with upstream backport
[openwrt/openwrt.git] / target / linux / generic / backport-5.10 / 603-v5.12-net-fix-race-between-napi-kthread-mode-and-busy-poll.patch
diff --git a/target/linux/generic/backport-5.10/603-v5.12-net-fix-race-between-napi-kthread-mode-and-busy-poll.patch b/target/linux/generic/backport-5.10/603-v5.12-net-fix-race-between-napi-kthread-mode-and-busy-poll.patch
new file mode 100644 (file)
index 0000000..e0fd09b
--- /dev/null
@@ -0,0 +1,93 @@
+From: Wei Wang <weiwan@google.com>
+Date: Mon, 1 Mar 2021 17:21:13 -0800
+Subject: [PATCH] net: fix race between napi kthread mode and busy poll
+
+Currently, napi_thread_wait() checks for NAPI_STATE_SCHED bit to
+determine if the kthread owns this napi and could call napi->poll() on
+it. However, if socket busy poll is enabled, it is possible that the
+busy poll thread grabs this SCHED bit (after the previous napi->poll()
+invokes napi_complete_done() and clears SCHED bit) and tries to poll
+on the same napi. napi_disable() could grab the SCHED bit as well.
+This patch tries to fix this race by adding a new bit
+NAPI_STATE_SCHED_THREADED in napi->state. This bit gets set in
+____napi_schedule() if the threaded mode is enabled, and gets cleared
+in napi_complete_done(), and we only poll the napi in kthread if this
+bit is set. This helps distinguish the ownership of the napi between
+kthread and other scenarios and fixes the race issue.
+
+Fixes: 29863d41bb6e ("net: implement threaded-able napi poll loop support")
+Reported-by: Martin Zaharinov <micron10@gmail.com>
+Suggested-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Wei Wang <weiwan@google.com>
+Cc: Alexander Duyck <alexanderduyck@fb.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
+---
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -359,6 +359,7 @@ enum {
+       NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */
+       NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */
+       NAPI_STATE_THREADED,            /* The poll is performed inside its own thread*/
++      NAPI_STATE_SCHED_THREADED,      /* Napi is currently scheduled in threaded mode */
+ };
+ enum {
+@@ -370,6 +371,7 @@ enum {
+       NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL),
+       NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL),
+       NAPIF_STATE_THREADED     = BIT(NAPI_STATE_THREADED),
++      NAPIF_STATE_SCHED_THREADED      = BIT(NAPI_STATE_SCHED_THREADED),
+ };
+ enum gro_result {
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -4275,6 +4275,8 @@ static inline void ____napi_schedule(str
+                */
+               thread = READ_ONCE(napi->thread);
+               if (thread) {
++                      if (thread->state != TASK_INTERRUPTIBLE)
++                              set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
+                       wake_up_process(thread);
+                       return;
+               }
+@@ -6495,7 +6497,8 @@ bool napi_complete_done(struct napi_stru
+               WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
+-              new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
++              new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
++                            NAPIF_STATE_SCHED_THREADED);
+               /* If STATE_MISSED was set, leave STATE_SCHED set,
+                * because we will call napi->poll() one more time.
+@@ -6931,16 +6934,25 @@ static int napi_poll(struct napi_struct
+ static int napi_thread_wait(struct napi_struct *napi)
+ {
++      bool woken = false;
++
+       set_current_state(TASK_INTERRUPTIBLE);
+       while (!kthread_should_stop() && !napi_disable_pending(napi)) {
+-              if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
++              /* Testing SCHED_THREADED bit here to make sure the current
++               * kthread owns this napi and could poll on this napi.
++               * Testing SCHED bit is not enough because SCHED bit might be
++               * set by some other busy poll thread or by napi_disable().
++               */
++              if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
+                       WARN_ON(!list_empty(&napi->poll_list));
+                       __set_current_state(TASK_RUNNING);
+                       return 0;
+               }
+               schedule();
++              /* woken being true indicates this thread owns this napi. */
++              woken = true;
+               set_current_state(TASK_INTERRUPTIBLE);
+       }
+       __set_current_state(TASK_RUNNING);