musl: restore lock skipping for mostly-singlethreaded programs, and related patches
authorMatthias Schiffer <mschiffer@universe-factory.net>
Sat, 23 May 2020 19:17:15 +0000 (21:17 +0200)
committerMatthias Schiffer <mschiffer@universe-factory.net>
Tue, 26 May 2020 21:45:17 +0000 (23:45 +0200)
The remainder of the patch series proposed by upstream [2] for the locking
synchronization issue [1].

[1] https://www.openwall.com/lists/musl/2020/05/22/3
[2] https://www.openwall.com/lists/musl/2020/05/22/10

Signed-off-by: Matthias Schiffer <mschiffer@universe-factory.net>
toolchain/musl/patches/500-0001-reorder-thread-list-unlink-in-pthread_exit-after-all.patch [new file with mode: 0644]
toolchain/musl/patches/500-0003-cut-down-size-of-some-libc-struct-members.patch [new file with mode: 0644]
toolchain/musl/patches/500-0004-restore-lock-skipping-for-processes-that-return-to-s.patch [new file with mode: 0644]

diff --git a/toolchain/musl/patches/500-0001-reorder-thread-list-unlink-in-pthread_exit-after-all.patch b/toolchain/musl/patches/500-0001-reorder-thread-list-unlink-in-pthread_exit-after-all.patch
new file mode 100644 (file)
index 0000000..d47f2f4
--- /dev/null
@@ -0,0 +1,51 @@
+From 4d5aa20a94a2d3fae3e69289dc23ecafbd0c16c4 Mon Sep 17 00:00:00 2001
+From: Rich Felker <dalias@aerifal.cx>
+Date: Fri, 22 May 2020 17:35:14 -0400
+Subject: [PATCH 1/4] reorder thread list unlink in pthread_exit after all
+ locks
+
+since the backend for LOCK() skips locking if single-threaded, it's
+unsafe to make the process appear single-threaded before the last use
+of lock.
+
+this fixes potential unsynchronized access to a linked list via
+__dl_thread_cleanup.
+---
+ src/thread/pthread_create.c | 19 +++++++++++--------
+ 1 file changed, 11 insertions(+), 8 deletions(-)
+
+--- a/src/thread/pthread_create.c
++++ b/src/thread/pthread_create.c
+@@ -90,14 +90,7 @@ _Noreturn void __pthread_exit(void *resu
+               exit(0);
+       }
+-      /* At this point we are committed to thread termination. Unlink
+-       * the thread from the list. This change will not be visible
+-       * until the lock is released, which only happens after SYS_exit
+-       * has been called, via the exit futex address pointing at the lock. */
+-      libc.threads_minus_1--;
+-      self->next->prev = self->prev;
+-      self->prev->next = self->next;
+-      self->prev = self->next = self;
++      /* At this point we are committed to thread termination. */
+       /* Process robust list in userspace to handle non-pshared mutexes
+        * and the detached thread case where the robust list head will
+@@ -121,6 +114,16 @@ _Noreturn void __pthread_exit(void *resu
+       __do_orphaned_stdio_locks();
+       __dl_thread_cleanup();
++      /* Last, unlink thread from the list. This change will not be visible
++       * until the lock is released, which only happens after SYS_exit
++       * has been called, via the exit futex address pointing at the lock.
++       * This needs to happen after any possible calls to LOCK() that might
++       * skip locking if libc.threads_minus_1 is zero. */
++      libc.threads_minus_1--;
++      self->next->prev = self->prev;
++      self->prev->next = self->next;
++      self->prev = self->next = self;
++
+       /* This atomic potentially competes with a concurrent pthread_detach
+        * call; the loser is responsible for freeing thread resources. */
+       int state = a_cas(&self->detach_state, DT_JOINABLE, DT_EXITING);
diff --git a/toolchain/musl/patches/500-0003-cut-down-size-of-some-libc-struct-members.patch b/toolchain/musl/patches/500-0003-cut-down-size-of-some-libc-struct-members.patch
new file mode 100644 (file)
index 0000000..6650434
--- /dev/null
@@ -0,0 +1,25 @@
+From f12888e9eb9eed60cc266b899dcafecb4752964a Mon Sep 17 00:00:00 2001
+From: Rich Felker <dalias@aerifal.cx>
+Date: Fri, 22 May 2020 17:25:38 -0400
+Subject: [PATCH 3/4] cut down size of some libc struct members
+
+these are all flags that can be single-byte values.
+---
+ src/internal/libc.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/src/internal/libc.h
++++ b/src/internal/libc.h
+@@ -18,9 +18,9 @@ struct tls_module {
+ };
+ struct __libc {
+-      int can_do_threads;
+-      int threaded;
+-      int secure;
++      char can_do_threads;
++      char threaded;
++      char secure;
+       int threads_minus_1;
+       size_t *auxv;
+       struct tls_module *tls_head;
diff --git a/toolchain/musl/patches/500-0004-restore-lock-skipping-for-processes-that-return-to-s.patch b/toolchain/musl/patches/500-0004-restore-lock-skipping-for-processes-that-return-to-s.patch
new file mode 100644 (file)
index 0000000..83a6d02
--- /dev/null
@@ -0,0 +1,90 @@
+From 8d81ba8c0bc6fe31136cb15c9c82ef4c24965040 Mon Sep 17 00:00:00 2001
+From: Rich Felker <dalias@aerifal.cx>
+Date: Fri, 22 May 2020 17:45:47 -0400
+Subject: [PATCH 4/4] restore lock-skipping for processes that return to
+ single-threaded state
+
+the design used here relies on the barrier provided by the first lock
+operation after the process returns to single-threaded state to
+synchronize with actions by the last thread that exited. by storing
+the intent to change modes in the same object used to detect whether
+locking is needed, it's possible to avoid an extra (possibly costly)
+memory load after the lock is taken.
+---
+ src/internal/libc.h         | 1 +
+ src/malloc/malloc.c         | 5 ++++-
+ src/thread/__lock.c         | 4 +++-
+ src/thread/pthread_create.c | 8 ++++----
+ 4 files changed, 12 insertions(+), 6 deletions(-)
+
+--- a/src/internal/libc.h
++++ b/src/internal/libc.h
+@@ -21,6 +21,7 @@ struct __libc {
+       char can_do_threads;
+       char threaded;
+       char secure;
++      volatile signed char need_locks;
+       int threads_minus_1;
+       size_t *auxv;
+       struct tls_module *tls_head;
+--- a/src/malloc/malloc.c
++++ b/src/malloc/malloc.c
+@@ -26,8 +26,11 @@ int __malloc_replaced;
+ static inline void lock(volatile int *lk)
+ {
+-      if (libc.threaded)
++      int need_locks = libc.need_locks;
++      if (need_locks) {
+               while(a_swap(lk, 1)) __wait(lk, lk+1, 1, 1);
++              if (need_locks < 0) libc.need_locks = 0;
++      }
+ }
+ static inline void unlock(volatile int *lk)
+--- a/src/thread/__lock.c
++++ b/src/thread/__lock.c
+@@ -18,9 +18,11 @@
+ void __lock(volatile int *l)
+ {
+-      if (!libc.threaded) return;
++      int need_locks = libc.need_locks;
++      if (!need_locks) return;
+       /* fast path: INT_MIN for the lock, +1 for the congestion */
+       int current = a_cas(l, 0, INT_MIN + 1);
++      if (need_locks < 0) libc.need_locks = 0;
+       if (!current) return;
+       /* A first spin loop, for medium congestion. */
+       for (unsigned i = 0; i < 10; ++i) {
+--- a/src/thread/pthread_create.c
++++ b/src/thread/pthread_create.c
+@@ -118,8 +118,8 @@ _Noreturn void __pthread_exit(void *resu
+        * until the lock is released, which only happens after SYS_exit
+        * has been called, via the exit futex address pointing at the lock.
+        * This needs to happen after any possible calls to LOCK() that might
+-       * skip locking if libc.threads_minus_1 is zero. */
+-      libc.threads_minus_1--;
++       * skip locking if process appears single-threaded. */
++      if (!--libc.threads_minus_1) libc.need_locks = -1;
+       self->next->prev = self->prev;
+       self->prev->next = self->next;
+       self->prev = self->next = self;
+@@ -339,7 +339,7 @@ int __pthread_create(pthread_t *restrict
+               ~(1UL<<((SIGCANCEL-1)%(8*sizeof(long))));
+       __tl_lock();
+-      libc.threads_minus_1++;
++      if (!libc.threads_minus_1++) libc.need_locks = 1;
+       ret = __clone((c11 ? start_c11 : start), stack, flags, args, &new->tid, TP_ADJ(new), &__thread_list_lock);
+       /* All clone failures translate to EAGAIN. If explicit scheduling
+@@ -363,7 +363,7 @@ int __pthread_create(pthread_t *restrict
+               new->next->prev = new;
+               new->prev->next = new;
+       } else {
+-              libc.threads_minus_1--;
++              if (!--libc.threads_minus_1) libc.need_locks = 0;
+       }
+       __tl_unlock();
+       __restore_sigs(&set);