generic: net: tcp: backport tcp tx performance patches
[openwrt/openwrt.git] / target / linux / generic / backport-4.9 / 024-8-tcp-tsq-move-tsq_flags-close-to-sk_wmem_alloc.patch
diff --git a/target/linux/generic/backport-4.9/024-8-tcp-tsq-move-tsq_flags-close-to-sk_wmem_alloc.patch b/target/linux/generic/backport-4.9/024-8-tcp-tsq-move-tsq_flags-close-to-sk_wmem_alloc.patch
new file mode 100644 (file)
index 0000000..545fe60
--- /dev/null
@@ -0,0 +1,176 @@
+From 7aa5470c2c09265902b5e4289afa82e4e7c2987e Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 3 Dec 2016 11:14:57 -0800
+Subject: [PATCH 08/10] tcp: tsq: move tsq_flags close to sk_wmem_alloc
+
+tsq_flags being in the same cache line than sk_wmem_alloc
+makes a lot of sense. Both fields are changed from tcp_wfree()
+and more generally by various TSQ related functions.
+
+Prior patch made room in struct sock and added sk_tsq_flags,
+this patch deletes tsq_flags from struct tcp_sock.
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ include/linux/tcp.h   |  1 -
+ net/ipv4/tcp.c        |  4 ++--
+ net/ipv4/tcp_ipv4.c   |  2 +-
+ net/ipv4/tcp_output.c | 24 +++++++++++-------------
+ net/ipv4/tcp_timer.c  |  4 ++--
+ net/ipv6/tcp_ipv6.c   |  2 +-
+ 6 files changed, 17 insertions(+), 20 deletions(-)
+
+--- a/include/linux/tcp.h
++++ b/include/linux/tcp.h
+@@ -192,7 +192,6 @@ struct tcp_sock {
+       u32     tsoffset;       /* timestamp offset */
+       struct list_head tsq_node; /* anchor in tsq_tasklet.head list */
+-      unsigned long   tsq_flags;
+       /* Data for direct copy to user */
+       struct {
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -665,9 +665,9 @@ static void tcp_push(struct sock *sk, in
+       if (tcp_should_autocork(sk, skb, size_goal)) {
+               /* avoid atomic op if TSQ_THROTTLED bit is already set */
+-              if (!test_bit(TSQ_THROTTLED, &tp->tsq_flags)) {
++              if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) {
+                       NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
+-                      set_bit(TSQ_THROTTLED, &tp->tsq_flags);
++                      set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
+               }
+               /* It is possible TX completion already happened
+                * before we set TSQ_THROTTLED.
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -446,7 +446,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb
+                       if (!sock_owned_by_user(sk)) {
+                               tcp_v4_mtu_reduced(sk);
+                       } else {
+-                              if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
++                              if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags))
+                                       sock_hold(sk);
+                       }
+                       goto out;
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -767,14 +767,15 @@ static void tcp_tasklet_func(unsigned lo
+       list_for_each_safe(q, n, &list) {
+               tp = list_entry(q, struct tcp_sock, tsq_node);
+               list_del(&tp->tsq_node);
+-              clear_bit(TSQ_QUEUED, &tp->tsq_flags);
+               sk = (struct sock *)tp;
++              clear_bit(TSQ_QUEUED, &sk->sk_tsq_flags);
++
+               if (!sk->sk_lock.owned &&
+-                  test_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags)) {
++                  test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) {
+                       bh_lock_sock(sk);
+                       if (!sock_owned_by_user(sk)) {
+-                              clear_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
++                              clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
+                               tcp_tsq_handler(sk);
+                       }
+                       bh_unlock_sock(sk);
+@@ -797,16 +798,15 @@ static void tcp_tasklet_func(unsigned lo
+  */
+ void tcp_release_cb(struct sock *sk)
+ {
+-      struct tcp_sock *tp = tcp_sk(sk);
+       unsigned long flags, nflags;
+       /* perform an atomic operation only if at least one flag is set */
+       do {
+-              flags = tp->tsq_flags;
++              flags = sk->sk_tsq_flags;
+               if (!(flags & TCP_DEFERRED_ALL))
+                       return;
+               nflags = flags & ~TCP_DEFERRED_ALL;
+-      } while (cmpxchg(&tp->tsq_flags, flags, nflags) != flags);
++      } while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
+       if (flags & TCPF_TSQ_DEFERRED)
+               tcp_tsq_handler(sk);
+@@ -878,7 +878,7 @@ void tcp_wfree(struct sk_buff *skb)
+       if (wmem >= SKB_TRUESIZE(1) && this_cpu_ksoftirqd() == current)
+               goto out;
+-      for (oval = READ_ONCE(tp->tsq_flags);; oval = nval) {
++      for (oval = READ_ONCE(sk->sk_tsq_flags);; oval = nval) {
+               struct tsq_tasklet *tsq;
+               bool empty;
+@@ -886,7 +886,7 @@ void tcp_wfree(struct sk_buff *skb)
+                       goto out;
+               nval = (oval & ~TSQF_THROTTLED) | TSQF_QUEUED | TCPF_TSQ_DEFERRED;
+-              nval = cmpxchg(&tp->tsq_flags, oval, nval);
++              nval = cmpxchg(&sk->sk_tsq_flags, oval, nval);
+               if (nval != oval)
+                       continue;
+@@ -2093,7 +2093,7 @@ static bool tcp_small_queue_check(struct
+                   skb->prev == sk->sk_write_queue.next)
+                       return false;
+-              set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags);
++              set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
+               /* It is possible TX completion already happened
+                * before we set TSQ_THROTTLED, so we must
+                * test again the condition.
+@@ -2191,8 +2191,8 @@ static bool tcp_write_xmit(struct sock *
+                   unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
+                       break;
+-              if (test_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags))
+-                      clear_bit(TCP_TSQ_DEFERRED, &tp->tsq_flags);
++              if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags))
++                      clear_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags);
+               if (tcp_small_queue_check(sk, skb, 0))
+                       break;
+@@ -3495,8 +3495,6 @@ void tcp_send_ack(struct sock *sk)
+       /* We do not want pure acks influencing TCP Small Queues or fq/pacing
+        * too much.
+        * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784
+-       * We also avoid tcp_wfree() overhead (cache line miss accessing
+-       * tp->tsq_flags) by using regular sock_wfree()
+        */
+       skb_set_tcp_pure_ack(buff);
+--- a/net/ipv4/tcp_timer.c
++++ b/net/ipv4/tcp_timer.c
+@@ -311,7 +311,7 @@ static void tcp_delack_timer(unsigned lo
+               inet_csk(sk)->icsk_ack.blocked = 1;
+               __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED);
+               /* deleguate our work to tcp_release_cb() */
+-              if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
++              if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags))
+                       sock_hold(sk);
+       }
+       bh_unlock_sock(sk);
+@@ -594,7 +594,7 @@ static void tcp_write_timer(unsigned lon
+               tcp_write_timer_handler(sk);
+       } else {
+               /* delegate our work to tcp_release_cb() */
+-              if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags))
++              if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &sk->sk_tsq_flags))
+                       sock_hold(sk);
+       }
+       bh_unlock_sock(sk);
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -404,7 +404,7 @@ static void tcp_v6_err(struct sk_buff *s
+               if (!sock_owned_by_user(sk))
+                       tcp_v6_mtu_reduced(sk);
+               else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
+-                                         &tp->tsq_flags))
++                                         &sk->sk_tsq_flags))
+                       sock_hold(sk);
+               goto out;
+       }