musl: fix mips vdso handling
[openwrt/staging/chunkeey.git] / toolchain / musl / patches / 000-update-to-git-2016-01-22.patch
1 --- a/.gitignore
2 +++ b/.gitignore
3 @@ -5,9 +5,6 @@
4 *.so.1
5 arch/*/bits/alltypes.h
6 config.mak
7 -include/bits
8 -tools/musl-gcc
9 -tools/musl-clang
10 -tools/ld.musl-clang
11 lib/musl-gcc.specs
12 src/internal/version.h
13 +/obj/
14 --- a/Makefile
15 +++ b/Makefile
16 @@ -8,6 +8,7 @@
17 # Do not make changes here.
18 #
19
20 +srcdir = .
21 exec_prefix = /usr/local
22 bindir = $(exec_prefix)/bin
23
24 @@ -16,31 +17,38 @@ includedir = $(prefix)/include
25 libdir = $(prefix)/lib
26 syslibdir = /lib
27
28 -SRCS = $(sort $(wildcard src/*/*.c arch/$(ARCH)/src/*.c))
29 -OBJS = $(SRCS:.c=.o)
30 +BASE_SRCS = $(sort $(wildcard $(srcdir)/src/*/*.c $(srcdir)/arch/$(ARCH)/src/*.[csS]))
31 +BASE_OBJS = $(patsubst $(srcdir)/%,%.o,$(basename $(BASE_SRCS)))
32 +ARCH_SRCS = $(wildcard $(srcdir)/src/*/$(ARCH)/*.[csS])
33 +ARCH_OBJS = $(patsubst $(srcdir)/%,%.o,$(basename $(ARCH_SRCS)))
34 +REPLACED_OBJS = $(sort $(subst /$(ARCH)/,/,$(ARCH_OBJS)))
35 +OBJS = $(addprefix obj/, $(filter-out $(REPLACED_OBJS), $(sort $(BASE_OBJS) $(ARCH_OBJS))))
36 LOBJS = $(OBJS:.o=.lo)
37 -GENH = include/bits/alltypes.h
38 -GENH_INT = src/internal/version.h
39 -IMPH = src/internal/stdio_impl.h src/internal/pthread_impl.h src/internal/libc.h
40 +GENH = obj/include/bits/alltypes.h
41 +GENH_INT = obj/src/internal/version.h
42 +IMPH = $(addprefix $(srcdir)/, src/internal/stdio_impl.h src/internal/pthread_impl.h src/internal/libc.h)
43
44 -LDFLAGS =
45 +LDFLAGS =
46 +LDFLAGS_AUTO =
47 LIBCC = -lgcc
48 CPPFLAGS =
49 -CFLAGS = -Os -pipe
50 +CFLAGS =
51 +CFLAGS_AUTO = -Os -pipe
52 CFLAGS_C99FSE = -std=c99 -ffreestanding -nostdinc
53
54 CFLAGS_ALL = $(CFLAGS_C99FSE)
55 -CFLAGS_ALL += -D_XOPEN_SOURCE=700 -I./arch/$(ARCH) -I./src/internal -I./include
56 -CFLAGS_ALL += $(CPPFLAGS) $(CFLAGS)
57 -CFLAGS_ALL_STATIC = $(CFLAGS_ALL)
58 -CFLAGS_ALL_SHARED = $(CFLAGS_ALL) -fPIC -DSHARED
59 +CFLAGS_ALL += -D_XOPEN_SOURCE=700 -I$(srcdir)/arch/$(ARCH) -Iobj/src/internal -I$(srcdir)/src/internal -Iobj/include -I$(srcdir)/include
60 +CFLAGS_ALL += $(CPPFLAGS) $(CFLAGS_AUTO) $(CFLAGS)
61 +
62 +LDFLAGS_ALL = $(LDFLAGS_AUTO) $(LDFLAGS)
63
64 AR = $(CROSS_COMPILE)ar
65 RANLIB = $(CROSS_COMPILE)ranlib
66 -INSTALL = ./tools/install.sh
67 +INSTALL = $(srcdir)/tools/install.sh
68
69 -ARCH_INCLUDES = $(wildcard arch/$(ARCH)/bits/*.h)
70 -ALL_INCLUDES = $(sort $(wildcard include/*.h include/*/*.h) $(GENH) $(ARCH_INCLUDES:arch/$(ARCH)/%=include/%))
71 +ARCH_INCLUDES = $(wildcard $(srcdir)/arch/$(ARCH)/bits/*.h)
72 +INCLUDES = $(wildcard $(srcdir)/include/*.h $(srcdir)/include/*/*.h)
73 +ALL_INCLUDES = $(sort $(INCLUDES:$(srcdir)/%=%) $(GENH:obj/%=%) $(ARCH_INCLUDES:$(srcdir)/arch/$(ARCH)/%=include/%))
74
75 EMPTY_LIB_NAMES = m rt pthread crypt util xnet resolv dl
76 EMPTY_LIBS = $(EMPTY_LIB_NAMES:%=lib/lib%.a)
77 @@ -49,7 +57,7 @@ STATIC_LIBS = lib/libc.a
78 SHARED_LIBS = lib/libc.so
79 TOOL_LIBS = lib/musl-gcc.specs
80 ALL_LIBS = $(CRT_LIBS) $(STATIC_LIBS) $(SHARED_LIBS) $(EMPTY_LIBS) $(TOOL_LIBS)
81 -ALL_TOOLS = tools/musl-gcc
82 +ALL_TOOLS = obj/musl-gcc
83
84 WRAPCC_GCC = gcc
85 WRAPCC_CLANG = clang
86 @@ -58,95 +66,93 @@ LDSO_PATHNAME = $(syslibdir)/ld-musl-$(A
87
88 -include config.mak
89
90 +ifeq ($(ARCH),)
91 +$(error Please set ARCH in config.mak before running make.)
92 +endif
93 +
94 all: $(ALL_LIBS) $(ALL_TOOLS)
95
96 +OBJ_DIRS = $(sort $(patsubst %/,%,$(dir $(ALL_LIBS) $(ALL_TOOLS) $(OBJS) $(GENH) $(GENH_INT))) $(addprefix obj/, crt crt/$(ARCH) include))
97 +
98 +$(ALL_LIBS) $(ALL_TOOLS) $(CRT_LIBS:lib/%=obj/crt/%) $(OBJS) $(LOBJS) $(GENH) $(GENH_INT): | $(OBJ_DIRS)
99 +
100 +$(OBJ_DIRS):
101 + mkdir -p $@
102 +
103 install: install-libs install-headers install-tools
104
105 clean:
106 - rm -f crt/*.o
107 - rm -f $(OBJS)
108 - rm -f $(LOBJS)
109 - rm -f $(ALL_LIBS) lib/*.[ao] lib/*.so
110 - rm -f $(ALL_TOOLS)
111 - rm -f $(GENH) $(GENH_INT)
112 - rm -f include/bits
113 + rm -rf obj lib
114
115 distclean: clean
116 rm -f config.mak
117
118 -include/bits:
119 - @test "$(ARCH)" || { echo "Please set ARCH in config.mak before running make." ; exit 1 ; }
120 - ln -sf ../arch/$(ARCH)/bits $@
121 +obj/include/bits/alltypes.h: $(srcdir)/arch/$(ARCH)/bits/alltypes.h.in $(srcdir)/include/alltypes.h.in $(srcdir)/tools/mkalltypes.sed
122 + sed -f $(srcdir)/tools/mkalltypes.sed $(srcdir)/arch/$(ARCH)/bits/alltypes.h.in $(srcdir)/include/alltypes.h.in > $@
123
124 -include/bits/alltypes.h.in: include/bits
125 +obj/src/internal/version.h: $(wildcard $(srcdir)/VERSION $(srcdir)/.git)
126 + printf '#define VERSION "%s"\n' "$$(cd $(srcdir); sh tools/version.sh)" > $@
127
128 -include/bits/alltypes.h: include/bits/alltypes.h.in include/alltypes.h.in tools/mkalltypes.sed
129 - sed -f tools/mkalltypes.sed include/bits/alltypes.h.in include/alltypes.h.in > $@
130 +obj/src/internal/version.o obj/src/internal/version.lo: obj/src/internal/version.h
131
132 -src/internal/version.h: $(wildcard VERSION .git)
133 - printf '#define VERSION "%s"\n' "$$(sh tools/version.sh)" > $@
134 +obj/crt/rcrt1.o obj/src/ldso/dlstart.lo obj/src/ldso/dynlink.lo: $(srcdir)/src/internal/dynlink.h $(srcdir)/arch/$(ARCH)/reloc.h
135
136 -src/internal/version.lo: src/internal/version.h
137 +obj/crt/crt1.o obj/crt/scrt1.o obj/crt/rcrt1.o obj/src/ldso/dlstart.lo: $(srcdir)/arch/$(ARCH)/crt_arch.h
138
139 -crt/rcrt1.o src/ldso/dlstart.lo src/ldso/dynlink.lo: src/internal/dynlink.h arch/$(ARCH)/reloc.h
140 +obj/crt/rcrt1.o: $(srcdir)/src/ldso/dlstart.c
141
142 -crt/crt1.o crt/Scrt1.o crt/rcrt1.o src/ldso/dlstart.lo: $(wildcard arch/$(ARCH)/crt_arch.h)
143 +obj/crt/Scrt1.o obj/crt/rcrt1.o: CFLAGS_ALL += -fPIC
144
145 -crt/rcrt1.o: src/ldso/dlstart.c
146 +obj/crt/$(ARCH)/crti.o: $(srcdir)/crt/$(ARCH)/crti.s
147
148 -crt/Scrt1.o crt/rcrt1.o: CFLAGS += -fPIC
149 +obj/crt/$(ARCH)/crtn.o: $(srcdir)/crt/$(ARCH)/crtn.s
150
151 -OPTIMIZE_SRCS = $(wildcard $(OPTIMIZE_GLOBS:%=src/%))
152 -$(OPTIMIZE_SRCS:%.c=%.o) $(OPTIMIZE_SRCS:%.c=%.lo): CFLAGS += -O3
153 +OPTIMIZE_SRCS = $(wildcard $(OPTIMIZE_GLOBS:%=$(srcdir)/src/%))
154 +$(OPTIMIZE_SRCS:$(srcdir)/%.c=obj/%.o) $(OPTIMIZE_SRCS:$(srcdir)/%.c=obj/%.lo): CFLAGS += -O3
155
156 MEMOPS_SRCS = src/string/memcpy.c src/string/memmove.c src/string/memcmp.c src/string/memset.c
157 -$(MEMOPS_SRCS:%.c=%.o) $(MEMOPS_SRCS:%.c=%.lo): CFLAGS += $(CFLAGS_MEMOPS)
158 +$(MEMOPS_SRCS:%.c=obj/%.o) $(MEMOPS_SRCS:%.c=obj/%.lo): CFLAGS_ALL += $(CFLAGS_MEMOPS)
159
160 NOSSP_SRCS = $(wildcard crt/*.c) \
161 src/env/__libc_start_main.c src/env/__init_tls.c \
162 src/thread/__set_thread_area.c src/env/__stack_chk_fail.c \
163 src/string/memset.c src/string/memcpy.c \
164 src/ldso/dlstart.c src/ldso/dynlink.c
165 -$(NOSSP_SRCS:%.c=%.o) $(NOSSP_SRCS:%.c=%.lo): CFLAGS += $(CFLAGS_NOSSP)
166 +$(NOSSP_SRCS:%.c=obj/%.o) $(NOSSP_SRCS:%.c=obj/%.lo): CFLAGS_ALL += $(CFLAGS_NOSSP)
167 +
168 +$(CRT_LIBS:lib/%=obj/crt/%): CFLAGS_ALL += -DCRT
169
170 -$(CRT_LIBS:lib/%=crt/%): CFLAGS += -DCRT
171 +$(LOBJS): CFLAGS_ALL += -fPIC -DSHARED
172
173 -# This incantation ensures that changes to any subarch asm files will
174 -# force the corresponding object file to be rebuilt, even if the implicit
175 -# rule below goes indirectly through a .sub file.
176 -define mkasmdep
177 -$(dir $(patsubst %/,%,$(dir $(1))))$(notdir $(1:.s=.o)): $(1)
178 -endef
179 -$(foreach s,$(wildcard src/*/$(ARCH)*/*.s),$(eval $(call mkasmdep,$(s))))
180 +CC_CMD = $(CC) $(CFLAGS_ALL) -c -o $@ $<
181
182 # Choose invocation of assembler to be used
183 -# $(1) is input file, $(2) is output file, $(3) is assembler flags
184 ifeq ($(ADD_CFI),yes)
185 - AS_CMD = LC_ALL=C awk -f tools/add-cfi.common.awk -f tools/add-cfi.$(ARCH).awk $< | $(CC) -x assembler -c -o $@ -
186 + AS_CMD = LC_ALL=C awk -f $(srcdir)/tools/add-cfi.common.awk -f $(srcdir)/tools/add-cfi.$(ARCH).awk $< | $(CC) $(CFLAGS_ALL) -x assembler -c -o $@ -
187 else
188 - AS_CMD = $(CC) -c -o $@ $<
189 + AS_CMD = $(CC_CMD)
190 endif
191
192 -%.o: $(ARCH)$(ASMSUBARCH)/%.sub
193 - $(CC) $(CFLAGS_ALL_STATIC) -c -o $@ $(dir $<)$(shell cat $<)
194 +obj/%.o: $(srcdir)/%.s
195 + $(AS_CMD)
196
197 -%.o: $(ARCH)/%.s
198 - $(AS_CMD) $(CFLAGS_ALL_STATIC)
199 +obj/%.o: $(srcdir)/%.S
200 + $(CC_CMD)
201
202 -%.o: %.c $(GENH) $(IMPH)
203 - $(CC) $(CFLAGS_ALL_STATIC) -c -o $@ $<
204 +obj/%.o: $(srcdir)/%.c $(GENH) $(IMPH)
205 + $(CC_CMD)
206
207 -%.lo: $(ARCH)$(ASMSUBARCH)/%.sub
208 - $(CC) $(CFLAGS_ALL_SHARED) -c -o $@ $(dir $<)$(shell cat $<)
209 +obj/%.lo: $(srcdir)/%.s
210 + $(AS_CMD)
211
212 -%.lo: $(ARCH)/%.s
213 - $(AS_CMD) $(CFLAGS_ALL_SHARED)
214 +obj/%.lo: $(srcdir)/%.S
215 + $(CC_CMD)
216
217 -%.lo: %.c $(GENH) $(IMPH)
218 - $(CC) $(CFLAGS_ALL_SHARED) -c -o $@ $<
219 +obj/%.lo: $(srcdir)/%.c $(GENH) $(IMPH)
220 + $(CC_CMD)
221
222 lib/libc.so: $(LOBJS)
223 - $(CC) $(CFLAGS_ALL_SHARED) $(LDFLAGS) -nostdlib -shared \
224 + $(CC) $(CFLAGS_ALL) $(LDFLAGS_ALL) -nostdlib -shared \
225 -Wl,-e,_dlstart -Wl,-Bsymbolic-functions \
226 -o $@ $(LOBJS) $(LIBCC)
227
228 @@ -159,21 +165,27 @@ $(EMPTY_LIBS):
229 rm -f $@
230 $(AR) rc $@
231
232 -lib/%.o: crt/%.o
233 +lib/%.o: obj/crt/%.o
234 cp $< $@
235
236 -lib/musl-gcc.specs: tools/musl-gcc.specs.sh config.mak
237 +lib/crti.o: obj/crt/$(ARCH)/crti.o
238 + cp $< $@
239 +
240 +lib/crtn.o: obj/crt/$(ARCH)/crtn.o
241 + cp $< $@
242 +
243 +lib/musl-gcc.specs: $(srcdir)/tools/musl-gcc.specs.sh config.mak
244 sh $< "$(includedir)" "$(libdir)" "$(LDSO_PATHNAME)" > $@
245
246 -tools/musl-gcc: config.mak
247 +obj/musl-gcc: config.mak
248 printf '#!/bin/sh\nexec "$${REALGCC:-$(WRAPCC_GCC)}" "$$@" -specs "%s/musl-gcc.specs"\n' "$(libdir)" > $@
249 chmod +x $@
250
251 -tools/%-clang: tools/%-clang.in config.mak
252 +obj/%-clang: $(srcdir)/tools/%-clang.in config.mak
253 sed -e 's!@CC@!$(WRAPCC_CLANG)!g' -e 's!@PREFIX@!$(prefix)!g' -e 's!@INCDIR@!$(includedir)!g' -e 's!@LIBDIR@!$(libdir)!g' -e 's!@LDSO@!$(LDSO_PATHNAME)!g' $< > $@
254 chmod +x $@
255
256 -$(DESTDIR)$(bindir)/%: tools/%
257 +$(DESTDIR)$(bindir)/%: obj/%
258 $(INSTALL) -D $< $@
259
260 $(DESTDIR)$(libdir)/%.so: lib/%.so
261 @@ -182,10 +194,13 @@ $(DESTDIR)$(libdir)/%.so: lib/%.so
262 $(DESTDIR)$(libdir)/%: lib/%
263 $(INSTALL) -D -m 644 $< $@
264
265 -$(DESTDIR)$(includedir)/bits/%: arch/$(ARCH)/bits/%
266 +$(DESTDIR)$(includedir)/bits/%: $(srcdir)/arch/$(ARCH)/bits/%
267 + $(INSTALL) -D -m 644 $< $@
268 +
269 +$(DESTDIR)$(includedir)/bits/%: obj/include/bits/%
270 $(INSTALL) -D -m 644 $< $@
271
272 -$(DESTDIR)$(includedir)/%: include/%
273 +$(DESTDIR)$(includedir)/%: $(srcdir)/include/%
274 $(INSTALL) -D -m 644 $< $@
275
276 $(DESTDIR)$(LDSO_PATHNAME): $(DESTDIR)$(libdir)/libc.so
277 @@ -195,12 +210,12 @@ install-libs: $(ALL_LIBS:lib/%=$(DESTDIR
278
279 install-headers: $(ALL_INCLUDES:include/%=$(DESTDIR)$(includedir)/%)
280
281 -install-tools: $(ALL_TOOLS:tools/%=$(DESTDIR)$(bindir)/%)
282 +install-tools: $(ALL_TOOLS:obj/%=$(DESTDIR)$(bindir)/%)
283
284 musl-git-%.tar.gz: .git
285 - git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ $(patsubst musl-git-%.tar.gz,%,$@)
286 + git --git-dir=$(srcdir)/.git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ $(patsubst musl-git-%.tar.gz,%,$@)
287
288 musl-%.tar.gz: .git
289 - git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ v$(patsubst musl-%.tar.gz,%,$@)
290 + git --git-dir=$(srcdir)/.git archive --format=tar.gz --prefix=$(patsubst %.tar.gz,%,$@)/ -o $@ v$(patsubst musl-%.tar.gz,%,$@)
291
292 .PHONY: all clean install install-libs install-headers install-tools
293 --- a/arch/aarch64/atomic.h
294 +++ /dev/null
295 @@ -1,206 +0,0 @@
296 -#ifndef _INTERNAL_ATOMIC_H
297 -#define _INTERNAL_ATOMIC_H
298 -
299 -#include <stdint.h>
300 -
301 -static inline int a_ctz_64(uint64_t x)
302 -{
303 - __asm__(
304 - " rbit %0, %1\n"
305 - " clz %0, %0\n"
306 - : "=r"(x) : "r"(x));
307 - return x;
308 -}
309 -
310 -static inline int a_ctz_l(unsigned long x)
311 -{
312 - return a_ctz_64(x);
313 -}
314 -
315 -static inline void a_barrier()
316 -{
317 - __asm__ __volatile__("dmb ish");
318 -}
319 -
320 -static inline void *a_cas_p(volatile void *p, void *t, void *s)
321 -{
322 - void *old;
323 - __asm__ __volatile__(
324 - " dmb ish\n"
325 - "1: ldxr %0,%3\n"
326 - " cmp %0,%1\n"
327 - " b.ne 1f\n"
328 - " stxr %w0,%2,%3\n"
329 - " cbnz %w0,1b\n"
330 - " mov %0,%1\n"
331 - "1: dmb ish\n"
332 - : "=&r"(old)
333 - : "r"(t), "r"(s), "Q"(*(long*)p)
334 - : "memory", "cc");
335 - return old;
336 -}
337 -
338 -static inline int a_cas(volatile int *p, int t, int s)
339 -{
340 - int old;
341 - __asm__ __volatile__(
342 - " dmb ish\n"
343 - "1: ldxr %w0,%3\n"
344 - " cmp %w0,%w1\n"
345 - " b.ne 1f\n"
346 - " stxr %w0,%w2,%3\n"
347 - " cbnz %w0,1b\n"
348 - " mov %w0,%w1\n"
349 - "1: dmb ish\n"
350 - : "=&r"(old)
351 - : "r"(t), "r"(s), "Q"(*p)
352 - : "memory", "cc");
353 - return old;
354 -}
355 -
356 -static inline int a_swap(volatile int *x, int v)
357 -{
358 - int old, tmp;
359 - __asm__ __volatile__(
360 - " dmb ish\n"
361 - "1: ldxr %w0,%3\n"
362 - " stxr %w1,%w2,%3\n"
363 - " cbnz %w1,1b\n"
364 - " dmb ish\n"
365 - : "=&r"(old), "=&r"(tmp)
366 - : "r"(v), "Q"(*x)
367 - : "memory", "cc" );
368 - return old;
369 -}
370 -
371 -static inline int a_fetch_add(volatile int *x, int v)
372 -{
373 - int old, tmp;
374 - __asm__ __volatile__(
375 - " dmb ish\n"
376 - "1: ldxr %w0,%3\n"
377 - " add %w0,%w0,%w2\n"
378 - " stxr %w1,%w0,%3\n"
379 - " cbnz %w1,1b\n"
380 - " dmb ish\n"
381 - : "=&r"(old), "=&r"(tmp)
382 - : "r"(v), "Q"(*x)
383 - : "memory", "cc" );
384 - return old-v;
385 -}
386 -
387 -static inline void a_inc(volatile int *x)
388 -{
389 - int tmp, tmp2;
390 - __asm__ __volatile__(
391 - " dmb ish\n"
392 - "1: ldxr %w0,%2\n"
393 - " add %w0,%w0,#1\n"
394 - " stxr %w1,%w0,%2\n"
395 - " cbnz %w1,1b\n"
396 - " dmb ish\n"
397 - : "=&r"(tmp), "=&r"(tmp2)
398 - : "Q"(*x)
399 - : "memory", "cc" );
400 -}
401 -
402 -static inline void a_dec(volatile int *x)
403 -{
404 - int tmp, tmp2;
405 - __asm__ __volatile__(
406 - " dmb ish\n"
407 - "1: ldxr %w0,%2\n"
408 - " sub %w0,%w0,#1\n"
409 - " stxr %w1,%w0,%2\n"
410 - " cbnz %w1,1b\n"
411 - " dmb ish\n"
412 - : "=&r"(tmp), "=&r"(tmp2)
413 - : "Q"(*x)
414 - : "memory", "cc" );
415 -}
416 -
417 -static inline void a_and_64(volatile uint64_t *p, uint64_t v)
418 -{
419 - int tmp, tmp2;
420 - __asm__ __volatile__(
421 - " dmb ish\n"
422 - "1: ldxr %0,%3\n"
423 - " and %0,%0,%2\n"
424 - " stxr %w1,%0,%3\n"
425 - " cbnz %w1,1b\n"
426 - " dmb ish\n"
427 - : "=&r"(tmp), "=&r"(tmp2)
428 - : "r"(v), "Q"(*p)
429 - : "memory", "cc" );
430 -}
431 -
432 -static inline void a_and(volatile int *p, int v)
433 -{
434 - int tmp, tmp2;
435 - __asm__ __volatile__(
436 - " dmb ish\n"
437 - "1: ldxr %w0,%3\n"
438 - " and %w0,%w0,%w2\n"
439 - " stxr %w1,%w0,%3\n"
440 - " cbnz %w1,1b\n"
441 - " dmb ish\n"
442 - : "=&r"(tmp), "=&r"(tmp2)
443 - : "r"(v), "Q"(*p)
444 - : "memory", "cc" );
445 -}
446 -
447 -static inline void a_or_64(volatile uint64_t *p, uint64_t v)
448 -{
449 - int tmp, tmp2;
450 - __asm__ __volatile__(
451 - " dmb ish\n"
452 - "1: ldxr %0,%3\n"
453 - " orr %0,%0,%2\n"
454 - " stxr %w1,%0,%3\n"
455 - " cbnz %w1,1b\n"
456 - " dmb ish\n"
457 - : "=&r"(tmp), "=&r"(tmp2)
458 - : "r"(v), "Q"(*p)
459 - : "memory", "cc" );
460 -}
461 -
462 -static inline void a_or_l(volatile void *p, long v)
463 -{
464 - return a_or_64(p, v);
465 -}
466 -
467 -static inline void a_or(volatile int *p, int v)
468 -{
469 - int tmp, tmp2;
470 - __asm__ __volatile__(
471 - " dmb ish\n"
472 - "1: ldxr %w0,%3\n"
473 - " orr %w0,%w0,%w2\n"
474 - " stxr %w1,%w0,%3\n"
475 - " cbnz %w1,1b\n"
476 - " dmb ish\n"
477 - : "=&r"(tmp), "=&r"(tmp2)
478 - : "r"(v), "Q"(*p)
479 - : "memory", "cc" );
480 -}
481 -
482 -static inline void a_store(volatile int *p, int x)
483 -{
484 - __asm__ __volatile__(
485 - " dmb ish\n"
486 - " str %w1,%0\n"
487 - " dmb ish\n"
488 - : "=m"(*p)
489 - : "r"(x)
490 - : "memory", "cc" );
491 -}
492 -
493 -#define a_spin a_barrier
494 -
495 -static inline void a_crash()
496 -{
497 - *(volatile char *)0=0;
498 -}
499 -
500 -
501 -#endif
502 --- /dev/null
503 +++ b/arch/aarch64/atomic_arch.h
504 @@ -0,0 +1,53 @@
505 +#define a_ll a_ll
506 +static inline int a_ll(volatile int *p)
507 +{
508 + int v;
509 + __asm__ __volatile__ ("ldxr %0, %1" : "=r"(v) : "Q"(*p));
510 + return v;
511 +}
512 +
513 +#define a_sc a_sc
514 +static inline int a_sc(volatile int *p, int v)
515 +{
516 + int r;
517 + __asm__ __volatile__ ("stxr %w0,%1,%2" : "=&r"(r) : "r"(v), "Q"(*p) : "memory");
518 + return !r;
519 +}
520 +
521 +#define a_barrier a_barrier
522 +static inline void a_barrier()
523 +{
524 + __asm__ __volatile__ ("dmb ish" : : : "memory");
525 +}
526 +
527 +#define a_pre_llsc a_barrier
528 +#define a_post_llsc a_barrier
529 +
530 +#define a_cas_p a_cas_p
531 +static inline void *a_cas_p(volatile void *p, void *t, void *s)
532 +{
533 + void *old;
534 + __asm__ __volatile__(
535 + " dmb ish\n"
536 + "1: ldxr %0,%3\n"
537 + " cmp %0,%1\n"
538 + " b.ne 1f\n"
539 + " stxr %w0,%2,%3\n"
540 + " cbnz %w0,1b\n"
541 + " mov %0,%1\n"
542 + "1: dmb ish\n"
543 + : "=&r"(old)
544 + : "r"(t), "r"(s), "Q"(*(void *volatile *)p)
545 + : "memory", "cc");
546 + return old;
547 +}
548 +
549 +#define a_ctz_64 a_ctz_64
550 +static inline int a_ctz_64(uint64_t x)
551 +{
552 + __asm__(
553 + " rbit %0, %1\n"
554 + " clz %0, %0\n"
555 + : "=r"(x) : "r"(x));
556 + return x;
557 +}
558 --- a/arch/aarch64/pthread_arch.h
559 +++ b/arch/aarch64/pthread_arch.h
560 @@ -8,4 +8,4 @@ static inline struct pthread *__pthread_
561 #define TLS_ABOVE_TP
562 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 16)
563
564 -#define CANCEL_REG_IP 33
565 +#define MC_PC pc
566 --- a/arch/arm/atomic.h
567 +++ /dev/null
568 @@ -1,261 +0,0 @@
569 -#ifndef _INTERNAL_ATOMIC_H
570 -#define _INTERNAL_ATOMIC_H
571 -
572 -#include <stdint.h>
573 -
574 -static inline int a_ctz_l(unsigned long x)
575 -{
576 - static const char debruijn32[32] = {
577 - 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
578 - 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
579 - };
580 - return debruijn32[(x&-x)*0x076be629 >> 27];
581 -}
582 -
583 -static inline int a_ctz_64(uint64_t x)
584 -{
585 - uint32_t y = x;
586 - if (!y) {
587 - y = x>>32;
588 - return 32 + a_ctz_l(y);
589 - }
590 - return a_ctz_l(y);
591 -}
592 -
593 -#if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
594 -
595 -static inline void a_barrier()
596 -{
597 - __asm__ __volatile__("dmb ish");
598 -}
599 -
600 -static inline int a_cas(volatile int *p, int t, int s)
601 -{
602 - int old;
603 - __asm__ __volatile__(
604 - " dmb ish\n"
605 - "1: ldrex %0,%3\n"
606 - " cmp %0,%1\n"
607 - " bne 1f\n"
608 - " strex %0,%2,%3\n"
609 - " cmp %0, #0\n"
610 - " bne 1b\n"
611 - " mov %0, %1\n"
612 - "1: dmb ish\n"
613 - : "=&r"(old)
614 - : "r"(t), "r"(s), "Q"(*p)
615 - : "memory", "cc" );
616 - return old;
617 -}
618 -
619 -static inline int a_swap(volatile int *x, int v)
620 -{
621 - int old, tmp;
622 - __asm__ __volatile__(
623 - " dmb ish\n"
624 - "1: ldrex %0,%3\n"
625 - " strex %1,%2,%3\n"
626 - " cmp %1, #0\n"
627 - " bne 1b\n"
628 - " dmb ish\n"
629 - : "=&r"(old), "=&r"(tmp)
630 - : "r"(v), "Q"(*x)
631 - : "memory", "cc" );
632 - return old;
633 -}
634 -
635 -static inline int a_fetch_add(volatile int *x, int v)
636 -{
637 - int old, tmp;
638 - __asm__ __volatile__(
639 - " dmb ish\n"
640 - "1: ldrex %0,%3\n"
641 - " add %0,%0,%2\n"
642 - " strex %1,%0,%3\n"
643 - " cmp %1, #0\n"
644 - " bne 1b\n"
645 - " dmb ish\n"
646 - : "=&r"(old), "=&r"(tmp)
647 - : "r"(v), "Q"(*x)
648 - : "memory", "cc" );
649 - return old-v;
650 -}
651 -
652 -static inline void a_inc(volatile int *x)
653 -{
654 - int tmp, tmp2;
655 - __asm__ __volatile__(
656 - " dmb ish\n"
657 - "1: ldrex %0,%2\n"
658 - " add %0,%0,#1\n"
659 - " strex %1,%0,%2\n"
660 - " cmp %1, #0\n"
661 - " bne 1b\n"
662 - " dmb ish\n"
663 - : "=&r"(tmp), "=&r"(tmp2)
664 - : "Q"(*x)
665 - : "memory", "cc" );
666 -}
667 -
668 -static inline void a_dec(volatile int *x)
669 -{
670 - int tmp, tmp2;
671 - __asm__ __volatile__(
672 - " dmb ish\n"
673 - "1: ldrex %0,%2\n"
674 - " sub %0,%0,#1\n"
675 - " strex %1,%0,%2\n"
676 - " cmp %1, #0\n"
677 - " bne 1b\n"
678 - " dmb ish\n"
679 - : "=&r"(tmp), "=&r"(tmp2)
680 - : "Q"(*x)
681 - : "memory", "cc" );
682 -}
683 -
684 -static inline void a_and(volatile int *x, int v)
685 -{
686 - int tmp, tmp2;
687 - __asm__ __volatile__(
688 - " dmb ish\n"
689 - "1: ldrex %0,%3\n"
690 - " and %0,%0,%2\n"
691 - " strex %1,%0,%3\n"
692 - " cmp %1, #0\n"
693 - " bne 1b\n"
694 - " dmb ish\n"
695 - : "=&r"(tmp), "=&r"(tmp2)
696 - : "r"(v), "Q"(*x)
697 - : "memory", "cc" );
698 -}
699 -
700 -static inline void a_or(volatile int *x, int v)
701 -{
702 - int tmp, tmp2;
703 - __asm__ __volatile__(
704 - " dmb ish\n"
705 - "1: ldrex %0,%3\n"
706 - " orr %0,%0,%2\n"
707 - " strex %1,%0,%3\n"
708 - " cmp %1, #0\n"
709 - " bne 1b\n"
710 - " dmb ish\n"
711 - : "=&r"(tmp), "=&r"(tmp2)
712 - : "r"(v), "Q"(*x)
713 - : "memory", "cc" );
714 -}
715 -
716 -static inline void a_store(volatile int *p, int x)
717 -{
718 - __asm__ __volatile__(
719 - " dmb ish\n"
720 - " str %1,%0\n"
721 - " dmb ish\n"
722 - : "=m"(*p)
723 - : "r"(x)
724 - : "memory", "cc" );
725 -}
726 -
727 -#else
728 -
729 -int __a_cas(int, int, volatile int *) __attribute__((__visibility__("hidden")));
730 -#define __k_cas __a_cas
731 -
732 -static inline void a_barrier()
733 -{
734 - __asm__ __volatile__("bl __a_barrier"
735 - : : : "memory", "cc", "ip", "lr" );
736 -}
737 -
738 -static inline int a_cas(volatile int *p, int t, int s)
739 -{
740 - int old;
741 - for (;;) {
742 - if (!__k_cas(t, s, p))
743 - return t;
744 - if ((old=*p) != t)
745 - return old;
746 - }
747 -}
748 -
749 -static inline int a_swap(volatile int *x, int v)
750 -{
751 - int old;
752 - do old = *x;
753 - while (__k_cas(old, v, x));
754 - return old;
755 -}
756 -
757 -static inline int a_fetch_add(volatile int *x, int v)
758 -{
759 - int old;
760 - do old = *x;
761 - while (__k_cas(old, old+v, x));
762 - return old;
763 -}
764 -
765 -static inline void a_inc(volatile int *x)
766 -{
767 - a_fetch_add(x, 1);
768 -}
769 -
770 -static inline void a_dec(volatile int *x)
771 -{
772 - a_fetch_add(x, -1);
773 -}
774 -
775 -static inline void a_store(volatile int *p, int x)
776 -{
777 - a_barrier();
778 - *p = x;
779 - a_barrier();
780 -}
781 -
782 -static inline void a_and(volatile int *p, int v)
783 -{
784 - int old;
785 - do old = *p;
786 - while (__k_cas(old, old&v, p));
787 -}
788 -
789 -static inline void a_or(volatile int *p, int v)
790 -{
791 - int old;
792 - do old = *p;
793 - while (__k_cas(old, old|v, p));
794 -}
795 -
796 -#endif
797 -
798 -static inline void *a_cas_p(volatile void *p, void *t, void *s)
799 -{
800 - return (void *)a_cas(p, (int)t, (int)s);
801 -}
802 -
803 -#define a_spin a_barrier
804 -
805 -static inline void a_crash()
806 -{
807 - *(volatile char *)0=0;
808 -}
809 -
810 -static inline void a_or_l(volatile void *p, long v)
811 -{
812 - a_or(p, v);
813 -}
814 -
815 -static inline void a_and_64(volatile uint64_t *p, uint64_t v)
816 -{
817 - union { uint64_t v; uint32_t r[2]; } u = { v };
818 - a_and((int *)p, u.r[0]);
819 - a_and((int *)p+1, u.r[1]);
820 -}
821 -
822 -static inline void a_or_64(volatile uint64_t *p, uint64_t v)
823 -{
824 - union { uint64_t v; uint32_t r[2]; } u = { v };
825 - a_or((int *)p, u.r[0]);
826 - a_or((int *)p+1, u.r[1]);
827 -}
828 -
829 -#endif
830 --- /dev/null
831 +++ b/arch/arm/atomic_arch.h
832 @@ -0,0 +1,64 @@
833 +__attribute__((__visibility__("hidden")))
834 +extern const void *__arm_atomics[3]; /* gettp, cas, barrier */
835 +
836 +#if ((__ARM_ARCH_6__ || __ARM_ARCH_6K__ || __ARM_ARCH_6ZK__) && !__thumb__) \
837 + || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
838 +
839 +#define a_ll a_ll
840 +static inline int a_ll(volatile int *p)
841 +{
842 + int v;
843 + __asm__ __volatile__ ("ldrex %0, %1" : "=r"(v) : "Q"(*p));
844 + return v;
845 +}
846 +
847 +#define a_sc a_sc
848 +static inline int a_sc(volatile int *p, int v)
849 +{
850 + int r;
851 + __asm__ __volatile__ ("strex %0,%1,%2" : "=&r"(r) : "r"(v), "Q"(*p) : "memory");
852 + return !r;
853 +}
854 +
855 +#if __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH >= 7
856 +
857 +#define a_barrier a_barrier
858 +static inline void a_barrier()
859 +{
860 + __asm__ __volatile__ ("dmb ish" : : : "memory");
861 +}
862 +
863 +#endif
864 +
865 +#define a_pre_llsc a_barrier
866 +#define a_post_llsc a_barrier
867 +
868 +#else
869 +
870 +#define a_cas a_cas
871 +static inline int a_cas(volatile int *p, int t, int s)
872 +{
873 + for (;;) {
874 + register int r0 __asm__("r0") = t;
875 + register int r1 __asm__("r1") = s;
876 + register volatile int *r2 __asm__("r2") = p;
877 + int old;
878 + __asm__ __volatile__ (
879 + "bl __a_cas"
880 + : "+r"(r0) : "r"(r1), "r"(r2)
881 + : "memory", "r3", "lr", "ip", "cc" );
882 + if (!r0) return t;
883 + if ((old=*p)!=t) return old;
884 + }
885 +}
886 +
887 +#endif
888 +
889 +#ifndef a_barrier
890 +#define a_barrier a_barrier
891 +static inline void a_barrier()
892 +{
893 + __asm__ __volatile__("bl __a_barrier"
894 + : : : "memory", "cc", "ip", "lr" );
895 +}
896 +#endif
897 --- a/arch/arm/pthread_arch.h
898 +++ b/arch/arm/pthread_arch.h
899 @@ -27,4 +27,4 @@ static inline pthread_t __pthread_self()
900 #define TLS_ABOVE_TP
901 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8)
902
903 -#define CANCEL_REG_IP 18
904 +#define MC_PC arm_pc
905 --- a/arch/arm/reloc.h
906 +++ b/arch/arm/reloc.h
907 @@ -6,10 +6,10 @@
908 #define ENDIAN_SUFFIX ""
909 #endif
910
911 -#if __SOFTFP__
912 -#define FP_SUFFIX ""
913 -#else
914 +#if __ARM_PCS_VFP
915 #define FP_SUFFIX "hf"
916 +#else
917 +#define FP_SUFFIX ""
918 #endif
919
920 #define LDSO_ARCH "arm" ENDIAN_SUFFIX FP_SUFFIX
921 @@ -28,10 +28,5 @@
922 #define REL_TPOFF R_ARM_TLS_TPOFF32
923 //#define REL_TLSDESC R_ARM_TLS_DESC
924
925 -#ifdef __thumb__
926 #define CRTJMP(pc,sp) __asm__ __volatile__( \
927 "mov sp,%1 ; bx %0" : : "r"(pc), "r"(sp) : "memory" )
928 -#else
929 -#define CRTJMP(pc,sp) __asm__ __volatile__( \
930 - "mov sp,%1 ; tst %0,#1 ; moveq pc,%0 ; bx %0" : : "r"(pc), "r"(sp) : "memory" )
931 -#endif
932 --- a/arch/arm/src/__aeabi_atexit.c
933 +++ /dev/null
934 @@ -1,6 +0,0 @@
935 -int __cxa_atexit(void (*func)(void *), void *arg, void *dso);
936 -
937 -int __aeabi_atexit (void *obj, void (*func) (void *), void *d)
938 -{
939 - return __cxa_atexit (func, obj, d);
940 -}
941 --- a/arch/arm/src/__aeabi_memclr.c
942 +++ /dev/null
943 @@ -1,9 +0,0 @@
944 -#include <string.h>
945 -#include "libc.h"
946 -
947 -void __aeabi_memclr(void *dest, size_t n)
948 -{
949 - memset(dest, 0, n);
950 -}
951 -weak_alias(__aeabi_memclr, __aeabi_memclr4);
952 -weak_alias(__aeabi_memclr, __aeabi_memclr8);
953 --- a/arch/arm/src/__aeabi_memcpy.c
954 +++ /dev/null
955 @@ -1,9 +0,0 @@
956 -#include <string.h>
957 -#include "libc.h"
958 -
959 -void __aeabi_memcpy(void *restrict dest, const void *restrict src, size_t n)
960 -{
961 - memcpy(dest, src, n);
962 -}
963 -weak_alias(__aeabi_memcpy, __aeabi_memcpy4);
964 -weak_alias(__aeabi_memcpy, __aeabi_memcpy8);
965 --- a/arch/arm/src/__aeabi_memmove.c
966 +++ /dev/null
967 @@ -1,9 +0,0 @@
968 -#include <string.h>
969 -#include "libc.h"
970 -
971 -void __aeabi_memmove(void *dest, const void *src, size_t n)
972 -{
973 - memmove(dest, src, n);
974 -}
975 -weak_alias(__aeabi_memmove, __aeabi_memmove4);
976 -weak_alias(__aeabi_memmove, __aeabi_memmove8);
977 --- a/arch/arm/src/__aeabi_memset.c
978 +++ /dev/null
979 @@ -1,9 +0,0 @@
980 -#include <string.h>
981 -#include "libc.h"
982 -
983 -void __aeabi_memset(void *dest, size_t n, int c)
984 -{
985 - memset(dest, c, n);
986 -}
987 -weak_alias(__aeabi_memset, __aeabi_memset4);
988 -weak_alias(__aeabi_memset, __aeabi_memset8);
989 --- a/arch/arm/src/__set_thread_area.c
990 +++ /dev/null
991 @@ -1,49 +0,0 @@
992 -#include <stdint.h>
993 -#include <elf.h>
994 -#include "pthread_impl.h"
995 -#include "libc.h"
996 -
997 -#define HWCAP_TLS (1 << 15)
998 -
999 -extern const unsigned char __attribute__((__visibility__("hidden")))
1000 - __a_barrier_dummy[], __a_barrier_oldkuser[],
1001 - __a_barrier_v6[], __a_barrier_v7[],
1002 - __a_cas_dummy[], __a_cas_v6[], __a_cas_v7[],
1003 - __a_gettp_dummy[];
1004 -
1005 -#define __a_barrier_kuser 0xffff0fa0
1006 -#define __a_cas_kuser 0xffff0fc0
1007 -#define __a_gettp_kuser 0xffff0fe0
1008 -
1009 -extern uintptr_t __attribute__((__visibility__("hidden")))
1010 - __a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr;
1011 -
1012 -#define SET(op,ver) (__a_##op##_ptr = \
1013 - (uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy)
1014 -
1015 -int __set_thread_area(void *p)
1016 -{
1017 -#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7
1018 - if (__hwcap & HWCAP_TLS) {
1019 - size_t *aux;
1020 - SET(cas, v7);
1021 - SET(barrier, v7);
1022 - for (aux=libc.auxv; *aux; aux+=2) {
1023 - if (*aux != AT_PLATFORM) continue;
1024 - const char *s = (void *)aux[1];
1025 - if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break;
1026 - SET(cas, v6);
1027 - SET(barrier, v6);
1028 - break;
1029 - }
1030 - } else {
1031 - int ver = *(int *)0xffff0ffc;
1032 - SET(gettp, kuser);
1033 - SET(cas, kuser);
1034 - SET(barrier, kuser);
1035 - if (ver < 2) a_crash();
1036 - if (ver < 3) SET(barrier, oldkuser);
1037 - }
1038 -#endif
1039 - return __syscall(0xf0005, p);
1040 -}
1041 --- a/arch/arm/src/arm/atomics.s
1042 +++ /dev/null
1043 @@ -1,116 +0,0 @@
1044 -.text
1045 -
1046 -.global __a_barrier
1047 -.hidden __a_barrier
1048 -.type __a_barrier,%function
1049 -__a_barrier:
1050 - ldr ip,1f
1051 - ldr ip,[pc,ip]
1052 - add pc,pc,ip
1053 -1: .word __a_barrier_ptr-1b
1054 -.global __a_barrier_dummy
1055 -.hidden __a_barrier_dummy
1056 -__a_barrier_dummy:
1057 - tst lr,#1
1058 - moveq pc,lr
1059 - bx lr
1060 -.global __a_barrier_oldkuser
1061 -.hidden __a_barrier_oldkuser
1062 -__a_barrier_oldkuser:
1063 - push {r0,r1,r2,r3,ip,lr}
1064 - mov r1,r0
1065 - mov r2,sp
1066 - ldr ip,=0xffff0fc0
1067 - mov lr,pc
1068 - mov pc,ip
1069 - pop {r0,r1,r2,r3,ip,lr}
1070 - tst lr,#1
1071 - moveq pc,lr
1072 - bx lr
1073 -.global __a_barrier_v6
1074 -.hidden __a_barrier_v6
1075 -__a_barrier_v6:
1076 - mcr p15,0,r0,c7,c10,5
1077 - bx lr
1078 -.global __a_barrier_v7
1079 -.hidden __a_barrier_v7
1080 -__a_barrier_v7:
1081 - .word 0xf57ff05b /* dmb ish */
1082 - bx lr
1083 -
1084 -.global __a_cas
1085 -.hidden __a_cas
1086 -.type __a_cas,%function
1087 -__a_cas:
1088 - ldr ip,1f
1089 - ldr ip,[pc,ip]
1090 - add pc,pc,ip
1091 -1: .word __a_cas_ptr-1b
1092 -.global __a_cas_dummy
1093 -.hidden __a_cas_dummy
1094 -__a_cas_dummy:
1095 - mov r3,r0
1096 - ldr r0,[r2]
1097 - subs r0,r3,r0
1098 - streq r1,[r2]
1099 - tst lr,#1
1100 - moveq pc,lr
1101 - bx lr
1102 -.global __a_cas_v6
1103 -.hidden __a_cas_v6
1104 -__a_cas_v6:
1105 - mov r3,r0
1106 - mcr p15,0,r0,c7,c10,5
1107 -1: .word 0xe1920f9f /* ldrex r0,[r2] */
1108 - subs r0,r3,r0
1109 - .word 0x01820f91 /* strexeq r0,r1,[r2] */
1110 - teqeq r0,#1
1111 - beq 1b
1112 - mcr p15,0,r0,c7,c10,5
1113 - bx lr
1114 -.global __a_cas_v7
1115 -.hidden __a_cas_v7
1116 -__a_cas_v7:
1117 - mov r3,r0
1118 - .word 0xf57ff05b /* dmb ish */
1119 -1: .word 0xe1920f9f /* ldrex r0,[r2] */
1120 - subs r0,r3,r0
1121 - .word 0x01820f91 /* strexeq r0,r1,[r2] */
1122 - teqeq r0,#1
1123 - beq 1b
1124 - .word 0xf57ff05b /* dmb ish */
1125 - bx lr
1126 -
1127 -.global __aeabi_read_tp
1128 -.type __aeabi_read_tp,%function
1129 -__aeabi_read_tp:
1130 -
1131 -.global __a_gettp
1132 -.hidden __a_gettp
1133 -.type __a_gettp,%function
1134 -__a_gettp:
1135 - ldr r0,1f
1136 - ldr r0,[pc,r0]
1137 - add pc,pc,r0
1138 -1: .word __a_gettp_ptr-1b
1139 -.global __a_gettp_dummy
1140 -.hidden __a_gettp_dummy
1141 -__a_gettp_dummy:
1142 - mrc p15,0,r0,c13,c0,3
1143 - bx lr
1144 -
1145 -.data
1146 -.global __a_barrier_ptr
1147 -.hidden __a_barrier_ptr
1148 -__a_barrier_ptr:
1149 - .word 0
1150 -
1151 -.global __a_cas_ptr
1152 -.hidden __a_cas_ptr
1153 -__a_cas_ptr:
1154 - .word 0
1155 -
1156 -.global __a_gettp_ptr
1157 -.hidden __a_gettp_ptr
1158 -__a_gettp_ptr:
1159 - .word 0
1160 --- a/arch/arm/src/find_exidx.c
1161 +++ /dev/null
1162 @@ -1,42 +0,0 @@
1163 -#define _GNU_SOURCE
1164 -#include <link.h>
1165 -#include <stdint.h>
1166 -
1167 -struct find_exidx_data {
1168 - uintptr_t pc, exidx_start;
1169 - int exidx_len;
1170 -};
1171 -
1172 -static int find_exidx(struct dl_phdr_info *info, size_t size, void *ptr)
1173 -{
1174 - struct find_exidx_data *data = ptr;
1175 - const ElfW(Phdr) *phdr = info->dlpi_phdr;
1176 - uintptr_t addr, exidx_start = 0;
1177 - int i, match = 0, exidx_len = 0;
1178 -
1179 - for (i = info->dlpi_phnum; i > 0; i--, phdr++) {
1180 - addr = info->dlpi_addr + phdr->p_vaddr;
1181 - switch (phdr->p_type) {
1182 - case PT_LOAD:
1183 - match |= data->pc >= addr && data->pc < addr + phdr->p_memsz;
1184 - break;
1185 - case PT_ARM_EXIDX:
1186 - exidx_start = addr;
1187 - exidx_len = phdr->p_memsz;
1188 - break;
1189 - }
1190 - }
1191 - data->exidx_start = exidx_start;
1192 - data->exidx_len = exidx_len;
1193 - return match;
1194 -}
1195 -
1196 -uintptr_t __gnu_Unwind_Find_exidx(uintptr_t pc, int *pcount)
1197 -{
1198 - struct find_exidx_data data;
1199 - data.pc = pc;
1200 - if (dl_iterate_phdr(find_exidx, &data) <= 0)
1201 - return 0;
1202 - *pcount = data.exidx_len / 8;
1203 - return data.exidx_start;
1204 -}
1205 --- a/arch/i386/atomic.h
1206 +++ /dev/null
1207 @@ -1,110 +0,0 @@
1208 -#ifndef _INTERNAL_ATOMIC_H
1209 -#define _INTERNAL_ATOMIC_H
1210 -
1211 -#include <stdint.h>
1212 -
1213 -static inline int a_ctz_64(uint64_t x)
1214 -{
1215 - int r;
1216 - __asm__( "bsf %1,%0 ; jnz 1f ; bsf %2,%0 ; addl $32,%0\n1:"
1217 - : "=&r"(r) : "r"((unsigned)x), "r"((unsigned)(x>>32)) );
1218 - return r;
1219 -}
1220 -
1221 -static inline int a_ctz_l(unsigned long x)
1222 -{
1223 - long r;
1224 - __asm__( "bsf %1,%0" : "=r"(r) : "r"(x) );
1225 - return r;
1226 -}
1227 -
1228 -static inline void a_and_64(volatile uint64_t *p, uint64_t v)
1229 -{
1230 - __asm__( "lock ; andl %1, (%0) ; lock ; andl %2, 4(%0)"
1231 - : : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" );
1232 -}
1233 -
1234 -static inline void a_or_64(volatile uint64_t *p, uint64_t v)
1235 -{
1236 - __asm__( "lock ; orl %1, (%0) ; lock ; orl %2, 4(%0)"
1237 - : : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" );
1238 -}
1239 -
1240 -static inline void a_or_l(volatile void *p, long v)
1241 -{
1242 - __asm__( "lock ; orl %1, %0"
1243 - : "=m"(*(long *)p) : "r"(v) : "memory" );
1244 -}
1245 -
1246 -static inline void *a_cas_p(volatile void *p, void *t, void *s)
1247 -{
1248 - __asm__( "lock ; cmpxchg %3, %1"
1249 - : "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" );
1250 - return t;
1251 -}
1252 -
1253 -static inline int a_cas(volatile int *p, int t, int s)
1254 -{
1255 - __asm__( "lock ; cmpxchg %3, %1"
1256 - : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
1257 - return t;
1258 -}
1259 -
1260 -static inline void a_or(volatile int *p, int v)
1261 -{
1262 - __asm__( "lock ; orl %1, %0"
1263 - : "=m"(*p) : "r"(v) : "memory" );
1264 -}
1265 -
1266 -static inline void a_and(volatile int *p, int v)
1267 -{
1268 - __asm__( "lock ; andl %1, %0"
1269 - : "=m"(*p) : "r"(v) : "memory" );
1270 -}
1271 -
1272 -static inline int a_swap(volatile int *x, int v)
1273 -{
1274 - __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
1275 - return v;
1276 -}
1277 -
1278 -#define a_xchg a_swap
1279 -
1280 -static inline int a_fetch_add(volatile int *x, int v)
1281 -{
1282 - __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
1283 - return v;
1284 -}
1285 -
1286 -static inline void a_inc(volatile int *x)
1287 -{
1288 - __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" );
1289 -}
1290 -
1291 -static inline void a_dec(volatile int *x)
1292 -{
1293 - __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" );
1294 -}
1295 -
1296 -static inline void a_store(volatile int *p, int x)
1297 -{
1298 - __asm__( "movl %1, %0 ; lock ; orl $0,(%%esp)" : "=m"(*p) : "r"(x) : "memory" );
1299 -}
1300 -
1301 -static inline void a_spin()
1302 -{
1303 - __asm__ __volatile__( "pause" : : : "memory" );
1304 -}
1305 -
1306 -static inline void a_barrier()
1307 -{
1308 - __asm__ __volatile__( "" : : : "memory" );
1309 -}
1310 -
1311 -static inline void a_crash()
1312 -{
1313 - __asm__ __volatile__( "hlt" : : : "memory" );
1314 -}
1315 -
1316 -
1317 -#endif
1318 --- /dev/null
1319 +++ b/arch/i386/atomic_arch.h
1320 @@ -0,0 +1,109 @@
1321 +#define a_ctz_64 a_ctz_64
1322 +static inline int a_ctz_64(uint64_t x)
1323 +{
1324 + int r;
1325 + __asm__( "bsf %1,%0 ; jnz 1f ; bsf %2,%0 ; addl $32,%0\n1:"
1326 + : "=&r"(r) : "r"((unsigned)x), "r"((unsigned)(x>>32)) );
1327 + return r;
1328 +}
1329 +
1330 +#define a_ctz_l a_ctz_l
1331 +static inline int a_ctz_l(unsigned long x)
1332 +{
1333 + long r;
1334 + __asm__( "bsf %1,%0" : "=r"(r) : "r"(x) );
1335 + return r;
1336 +}
1337 +
1338 +#define a_and_64 a_and_64
1339 +static inline void a_and_64(volatile uint64_t *p, uint64_t v)
1340 +{
1341 + __asm__( "lock ; andl %1, (%0) ; lock ; andl %2, 4(%0)"
1342 + : : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" );
1343 +}
1344 +
1345 +#define a_or_64 a_or_64
1346 +static inline void a_or_64(volatile uint64_t *p, uint64_t v)
1347 +{
1348 + __asm__( "lock ; orl %1, (%0) ; lock ; orl %2, 4(%0)"
1349 + : : "r"((long *)p), "r"((unsigned)v), "r"((unsigned)(v>>32)) : "memory" );
1350 +}
1351 +
1352 +#define a_or_l a_or_l
1353 +static inline void a_or_l(volatile void *p, long v)
1354 +{
1355 + __asm__( "lock ; orl %1, %0"
1356 + : "=m"(*(long *)p) : "r"(v) : "memory" );
1357 +}
1358 +
1359 +#define a_cas a_cas
1360 +static inline int a_cas(volatile int *p, int t, int s)
1361 +{
1362 + __asm__( "lock ; cmpxchg %3, %1"
1363 + : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
1364 + return t;
1365 +}
1366 +
1367 +#define a_or a_or
1368 +static inline void a_or(volatile int *p, int v)
1369 +{
1370 + __asm__( "lock ; orl %1, %0"
1371 + : "=m"(*p) : "r"(v) : "memory" );
1372 +}
1373 +
1374 +#define a_and a_and
1375 +static inline void a_and(volatile int *p, int v)
1376 +{
1377 + __asm__( "lock ; andl %1, %0"
1378 + : "=m"(*p) : "r"(v) : "memory" );
1379 +}
1380 +
1381 +#define a_swap a_swap
1382 +static inline int a_swap(volatile int *x, int v)
1383 +{
1384 + __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
1385 + return v;
1386 +}
1387 +
1388 +#define a_fetch_add a_fetch_add
1389 +static inline int a_fetch_add(volatile int *x, int v)
1390 +{
1391 + __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
1392 + return v;
1393 +}
1394 +
1395 +#define a_inc a_inc
1396 +static inline void a_inc(volatile int *x)
1397 +{
1398 + __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" );
1399 +}
1400 +
1401 +#define a_dec a_dec
1402 +static inline void a_dec(volatile int *x)
1403 +{
1404 + __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" );
1405 +}
1406 +
1407 +#define a_store a_store
1408 +static inline void a_store(volatile int *p, int x)
1409 +{
1410 + __asm__( "movl %1, %0 ; lock ; orl $0,(%%esp)" : "=m"(*p) : "r"(x) : "memory" );
1411 +}
1412 +
1413 +#define a_spin a_spin
1414 +static inline void a_spin()
1415 +{
1416 + __asm__ __volatile__( "pause" : : : "memory" );
1417 +}
1418 +
1419 +#define a_barrier a_barrier
1420 +static inline void a_barrier()
1421 +{
1422 + __asm__ __volatile__( "" : : : "memory" );
1423 +}
1424 +
1425 +#define a_crash a_crash
1426 +static inline void a_crash()
1427 +{
1428 + __asm__ __volatile__( "hlt" : : : "memory" );
1429 +}
1430 --- a/arch/i386/bits/alltypes.h.in
1431 +++ b/arch/i386/bits/alltypes.h.in
1432 @@ -26,10 +26,12 @@ TYPEDEF long double float_t;
1433 TYPEDEF long double double_t;
1434 #endif
1435
1436 -#ifdef __cplusplus
1437 -TYPEDEF struct { alignas(8) long long __ll; long double __ld; } max_align_t;
1438 -#else
1439 +#if !defined(__cplusplus)
1440 TYPEDEF struct { _Alignas(8) long long __ll; long double __ld; } max_align_t;
1441 +#elif defined(__GNUC__)
1442 +TYPEDEF struct { __attribute__((__aligned__(8))) long long __ll; long double __ld; } max_align_t;
1443 +#else
1444 +TYPEDEF struct { alignas(8) long long __ll; long double __ld; } max_align_t;
1445 #endif
1446
1447 TYPEDEF long time_t;
1448 --- a/arch/i386/pthread_arch.h
1449 +++ b/arch/i386/pthread_arch.h
1450 @@ -7,4 +7,4 @@ static inline struct pthread *__pthread_
1451
1452 #define TP_ADJ(p) (p)
1453
1454 -#define CANCEL_REG_IP 14
1455 +#define MC_PC gregs[REG_EIP]
1456 --- a/arch/microblaze/atomic.h
1457 +++ /dev/null
1458 @@ -1,143 +0,0 @@
1459 -#ifndef _INTERNAL_ATOMIC_H
1460 -#define _INTERNAL_ATOMIC_H
1461 -
1462 -#include <stdint.h>
1463 -
1464 -static inline int a_ctz_l(unsigned long x)
1465 -{
1466 - static const char debruijn32[32] = {
1467 - 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
1468 - 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
1469 - };
1470 - return debruijn32[(x&-x)*0x076be629 >> 27];
1471 -}
1472 -
1473 -static inline int a_ctz_64(uint64_t x)
1474 -{
1475 - uint32_t y = x;
1476 - if (!y) {
1477 - y = x>>32;
1478 - return 32 + a_ctz_l(y);
1479 - }
1480 - return a_ctz_l(y);
1481 -}
1482 -
1483 -static inline int a_cas(volatile int *p, int t, int s)
1484 -{
1485 - register int old, tmp;
1486 - __asm__ __volatile__ (
1487 - " addi %0, r0, 0\n"
1488 - "1: lwx %0, %2, r0\n"
1489 - " rsubk %1, %0, %3\n"
1490 - " bnei %1, 1f\n"
1491 - " swx %4, %2, r0\n"
1492 - " addic %1, r0, 0\n"
1493 - " bnei %1, 1b\n"
1494 - "1: "
1495 - : "=&r"(old), "=&r"(tmp)
1496 - : "r"(p), "r"(t), "r"(s)
1497 - : "cc", "memory" );
1498 - return old;
1499 -}
1500 -
1501 -static inline void *a_cas_p(volatile void *p, void *t, void *s)
1502 -{
1503 - return (void *)a_cas(p, (int)t, (int)s);
1504 -}
1505 -
1506 -static inline int a_swap(volatile int *x, int v)
1507 -{
1508 - register int old, tmp;
1509 - __asm__ __volatile__ (
1510 - " addi %0, r0, 0\n"
1511 - "1: lwx %0, %2, r0\n"
1512 - " swx %3, %2, r0\n"
1513 - " addic %1, r0, 0\n"
1514 - " bnei %1, 1b\n"
1515 - "1: "
1516 - : "=&r"(old), "=&r"(tmp)
1517 - : "r"(x), "r"(v)
1518 - : "cc", "memory" );
1519 - return old;
1520 -}
1521 -
1522 -static inline int a_fetch_add(volatile int *x, int v)
1523 -{
1524 - register int new, tmp;
1525 - __asm__ __volatile__ (
1526 - " addi %0, r0, 0\n"
1527 - "1: lwx %0, %2, r0\n"
1528 - " addk %0, %0, %3\n"
1529 - " swx %0, %2, r0\n"
1530 - " addic %1, r0, 0\n"
1531 - " bnei %1, 1b\n"
1532 - "1: "
1533 - : "=&r"(new), "=&r"(tmp)
1534 - : "r"(x), "r"(v)
1535 - : "cc", "memory" );
1536 - return new-v;
1537 -}
1538 -
1539 -static inline void a_inc(volatile int *x)
1540 -{
1541 - a_fetch_add(x, 1);
1542 -}
1543 -
1544 -static inline void a_dec(volatile int *x)
1545 -{
1546 - a_fetch_add(x, -1);
1547 -}
1548 -
1549 -static inline void a_store(volatile int *p, int x)
1550 -{
1551 - __asm__ __volatile__ (
1552 - "swi %1, %0"
1553 - : "=m"(*p) : "r"(x) : "memory" );
1554 -}
1555 -
1556 -#define a_spin a_barrier
1557 -
1558 -static inline void a_barrier()
1559 -{
1560 - a_cas(&(int){0}, 0, 0);
1561 -}
1562 -
1563 -static inline void a_crash()
1564 -{
1565 - *(volatile char *)0=0;
1566 -}
1567 -
1568 -static inline void a_and(volatile int *p, int v)
1569 -{
1570 - int old;
1571 - do old = *p;
1572 - while (a_cas(p, old, old&v) != old);
1573 -}
1574 -
1575 -static inline void a_or(volatile int *p, int v)
1576 -{
1577 - int old;
1578 - do old = *p;
1579 - while (a_cas(p, old, old|v) != old);
1580 -}
1581 -
1582 -static inline void a_or_l(volatile void *p, long v)
1583 -{
1584 - a_or(p, v);
1585 -}
1586 -
1587 -static inline void a_and_64(volatile uint64_t *p, uint64_t v)
1588 -{
1589 - union { uint64_t v; uint32_t r[2]; } u = { v };
1590 - a_and((int *)p, u.r[0]);
1591 - a_and((int *)p+1, u.r[1]);
1592 -}
1593 -
1594 -static inline void a_or_64(volatile uint64_t *p, uint64_t v)
1595 -{
1596 - union { uint64_t v; uint32_t r[2]; } u = { v };
1597 - a_or((int *)p, u.r[0]);
1598 - a_or((int *)p+1, u.r[1]);
1599 -}
1600 -
1601 -#endif
1602 --- /dev/null
1603 +++ b/arch/microblaze/atomic_arch.h
1604 @@ -0,0 +1,53 @@
1605 +#define a_cas a_cas
1606 +static inline int a_cas(volatile int *p, int t, int s)
1607 +{
1608 + register int old, tmp;
1609 + __asm__ __volatile__ (
1610 + " addi %0, r0, 0\n"
1611 + "1: lwx %0, %2, r0\n"
1612 + " rsubk %1, %0, %3\n"
1613 + " bnei %1, 1f\n"
1614 + " swx %4, %2, r0\n"
1615 + " addic %1, r0, 0\n"
1616 + " bnei %1, 1b\n"
1617 + "1: "
1618 + : "=&r"(old), "=&r"(tmp)
1619 + : "r"(p), "r"(t), "r"(s)
1620 + : "cc", "memory" );
1621 + return old;
1622 +}
1623 +
1624 +#define a_swap a_swap
1625 +static inline int a_swap(volatile int *x, int v)
1626 +{
1627 + register int old, tmp;
1628 + __asm__ __volatile__ (
1629 + " addi %0, r0, 0\n"
1630 + "1: lwx %0, %2, r0\n"
1631 + " swx %3, %2, r0\n"
1632 + " addic %1, r0, 0\n"
1633 + " bnei %1, 1b\n"
1634 + "1: "
1635 + : "=&r"(old), "=&r"(tmp)
1636 + : "r"(x), "r"(v)
1637 + : "cc", "memory" );
1638 + return old;
1639 +}
1640 +
1641 +#define a_fetch_add a_fetch_add
1642 +static inline int a_fetch_add(volatile int *x, int v)
1643 +{
1644 + register int new, tmp;
1645 + __asm__ __volatile__ (
1646 + " addi %0, r0, 0\n"
1647 + "1: lwx %0, %2, r0\n"
1648 + " addk %0, %0, %3\n"
1649 + " swx %0, %2, r0\n"
1650 + " addic %1, r0, 0\n"
1651 + " bnei %1, 1b\n"
1652 + "1: "
1653 + : "=&r"(new), "=&r"(tmp)
1654 + : "r"(x), "r"(v)
1655 + : "cc", "memory" );
1656 + return new-v;
1657 +}
1658 --- a/arch/microblaze/pthread_arch.h
1659 +++ b/arch/microblaze/pthread_arch.h
1660 @@ -7,4 +7,4 @@ static inline struct pthread *__pthread_
1661
1662 #define TP_ADJ(p) (p)
1663
1664 -#define CANCEL_REG_IP 32
1665 +#define MC_PC regs.pc
1666 --- a/arch/mips/atomic.h
1667 +++ /dev/null
1668 @@ -1,205 +0,0 @@
1669 -#ifndef _INTERNAL_ATOMIC_H
1670 -#define _INTERNAL_ATOMIC_H
1671 -
1672 -#include <stdint.h>
1673 -
1674 -static inline int a_ctz_l(unsigned long x)
1675 -{
1676 - static const char debruijn32[32] = {
1677 - 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
1678 - 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
1679 - };
1680 - return debruijn32[(x&-x)*0x076be629 >> 27];
1681 -}
1682 -
1683 -static inline int a_ctz_64(uint64_t x)
1684 -{
1685 - uint32_t y = x;
1686 - if (!y) {
1687 - y = x>>32;
1688 - return 32 + a_ctz_l(y);
1689 - }
1690 - return a_ctz_l(y);
1691 -}
1692 -
1693 -static inline int a_cas(volatile int *p, int t, int s)
1694 -{
1695 - int dummy;
1696 - __asm__ __volatile__(
1697 - ".set push\n"
1698 - ".set mips2\n"
1699 - ".set noreorder\n"
1700 - " sync\n"
1701 - "1: ll %0, %2\n"
1702 - " bne %0, %3, 1f\n"
1703 - " addu %1, %4, $0\n"
1704 - " sc %1, %2\n"
1705 - " beq %1, $0, 1b\n"
1706 - " nop\n"
1707 - " sync\n"
1708 - "1: \n"
1709 - ".set pop\n"
1710 - : "=&r"(t), "=&r"(dummy), "+m"(*p) : "r"(t), "r"(s) : "memory" );
1711 - return t;
1712 -}
1713 -
1714 -static inline void *a_cas_p(volatile void *p, void *t, void *s)
1715 -{
1716 - return (void *)a_cas(p, (int)t, (int)s);
1717 -}
1718 -
1719 -static inline int a_swap(volatile int *x, int v)
1720 -{
1721 - int old, dummy;
1722 - __asm__ __volatile__(
1723 - ".set push\n"
1724 - ".set mips2\n"
1725 - ".set noreorder\n"
1726 - " sync\n"
1727 - "1: ll %0, %2\n"
1728 - " addu %1, %3, $0\n"
1729 - " sc %1, %2\n"
1730 - " beq %1, $0, 1b\n"
1731 - " nop\n"
1732 - " sync\n"
1733 - ".set pop\n"
1734 - : "=&r"(old), "=&r"(dummy), "+m"(*x) : "r"(v) : "memory" );
1735 - return old;
1736 -}
1737 -
1738 -static inline int a_fetch_add(volatile int *x, int v)
1739 -{
1740 - int old, dummy;
1741 - __asm__ __volatile__(
1742 - ".set push\n"
1743 - ".set mips2\n"
1744 - ".set noreorder\n"
1745 - " sync\n"
1746 - "1: ll %0, %2\n"
1747 - " addu %1, %0, %3\n"
1748 - " sc %1, %2\n"
1749 - " beq %1, $0, 1b\n"
1750 - " nop\n"
1751 - " sync\n"
1752 - ".set pop\n"
1753 - : "=&r"(old), "=&r"(dummy), "+m"(*x) : "r"(v) : "memory" );
1754 - return old;
1755 -}
1756 -
1757 -static inline void a_inc(volatile int *x)
1758 -{
1759 - int dummy;
1760 - __asm__ __volatile__(
1761 - ".set push\n"
1762 - ".set mips2\n"
1763 - ".set noreorder\n"
1764 - " sync\n"
1765 - "1: ll %0, %1\n"
1766 - " addu %0, %0, 1\n"
1767 - " sc %0, %1\n"
1768 - " beq %0, $0, 1b\n"
1769 - " nop\n"
1770 - " sync\n"
1771 - ".set pop\n"
1772 - : "=&r"(dummy), "+m"(*x) : : "memory" );
1773 -}
1774 -
1775 -static inline void a_dec(volatile int *x)
1776 -{
1777 - int dummy;
1778 - __asm__ __volatile__(
1779 - ".set push\n"
1780 - ".set mips2\n"
1781 - ".set noreorder\n"
1782 - " sync\n"
1783 - "1: ll %0, %1\n"
1784 - " subu %0, %0, 1\n"
1785 - " sc %0, %1\n"
1786 - " beq %0, $0, 1b\n"
1787 - " nop\n"
1788 - " sync\n"
1789 - ".set pop\n"
1790 - : "=&r"(dummy), "+m"(*x) : : "memory" );
1791 -}
1792 -
1793 -static inline void a_store(volatile int *p, int x)
1794 -{
1795 - __asm__ __volatile__(
1796 - ".set push\n"
1797 - ".set mips2\n"
1798 - ".set noreorder\n"
1799 - " sync\n"
1800 - " sw %1, %0\n"
1801 - " sync\n"
1802 - ".set pop\n"
1803 - : "+m"(*p) : "r"(x) : "memory" );
1804 -}
1805 -
1806 -#define a_spin a_barrier
1807 -
1808 -static inline void a_barrier()
1809 -{
1810 - a_cas(&(int){0}, 0, 0);
1811 -}
1812 -
1813 -static inline void a_crash()
1814 -{
1815 - *(volatile char *)0=0;
1816 -}
1817 -
1818 -static inline void a_and(volatile int *p, int v)
1819 -{
1820 - int dummy;
1821 - __asm__ __volatile__(
1822 - ".set push\n"
1823 - ".set mips2\n"
1824 - ".set noreorder\n"
1825 - " sync\n"
1826 - "1: ll %0, %1\n"
1827 - " and %0, %0, %2\n"
1828 - " sc %0, %1\n"
1829 - " beq %0, $0, 1b\n"
1830 - " nop\n"
1831 - " sync\n"
1832 - ".set pop\n"
1833 - : "=&r"(dummy), "+m"(*p) : "r"(v) : "memory" );
1834 -}
1835 -
1836 -static inline void a_or(volatile int *p, int v)
1837 -{
1838 - int dummy;
1839 - __asm__ __volatile__(
1840 - ".set push\n"
1841 - ".set mips2\n"
1842 - ".set noreorder\n"
1843 - " sync\n"
1844 - "1: ll %0, %1\n"
1845 - " or %0, %0, %2\n"
1846 - " sc %0, %1\n"
1847 - " beq %0, $0, 1b\n"
1848 - " nop\n"
1849 - " sync\n"
1850 - ".set pop\n"
1851 - : "=&r"(dummy), "+m"(*p) : "r"(v) : "memory" );
1852 -}
1853 -
1854 -static inline void a_or_l(volatile void *p, long v)
1855 -{
1856 - a_or(p, v);
1857 -}
1858 -
1859 -static inline void a_and_64(volatile uint64_t *p, uint64_t v)
1860 -{
1861 - union { uint64_t v; uint32_t r[2]; } u = { v };
1862 - a_and((int *)p, u.r[0]);
1863 - a_and((int *)p+1, u.r[1]);
1864 -}
1865 -
1866 -static inline void a_or_64(volatile uint64_t *p, uint64_t v)
1867 -{
1868 - union { uint64_t v; uint32_t r[2]; } u = { v };
1869 - a_or((int *)p, u.r[0]);
1870 - a_or((int *)p+1, u.r[1]);
1871 -}
1872 -
1873 -#endif
1874 --- /dev/null
1875 +++ b/arch/mips/atomic_arch.h
1876 @@ -0,0 +1,39 @@
1877 +#define a_ll a_ll
1878 +static inline int a_ll(volatile int *p)
1879 +{
1880 + int v;
1881 + __asm__ __volatile__ (
1882 + ".set push ; .set mips2\n\t"
1883 + "ll %0, %1"
1884 + "\n\t.set pop"
1885 + : "=r"(v) : "m"(*p));
1886 + return v;
1887 +}
1888 +
1889 +#define a_sc a_sc
1890 +static inline int a_sc(volatile int *p, int v)
1891 +{
1892 + int r;
1893 + __asm__ __volatile__ (
1894 + ".set push ; .set mips2\n\t"
1895 + "sc %0, %1"
1896 + "\n\t.set pop"
1897 + : "=r"(r), "=m"(*p) : "0"(v) : "memory");
1898 + return r;
1899 +}
1900 +
1901 +#define a_barrier a_barrier
1902 +static inline void a_barrier()
1903 +{
1904 + /* mips2 sync, but using too many directives causes
1905 + * gcc not to inline it, so encode with .long instead. */
1906 + __asm__ __volatile__ (".long 0xf" : : : "memory");
1907 +#if 0
1908 + __asm__ __volatile__ (
1909 + ".set push ; .set mips2 ; sync ; .set pop"
1910 + : : : "memory");
1911 +#endif
1912 +}
1913 +
1914 +#define a_pre_llsc a_barrier
1915 +#define a_post_llsc a_barrier
1916 --- a/arch/mips/crt_arch.h
1917 +++ b/arch/mips/crt_arch.h
1918 @@ -4,13 +4,16 @@ __asm__(
1919 ".text \n"
1920 ".global _" START "\n"
1921 ".global " START "\n"
1922 +".global " START "_data\n"
1923 ".type _" START ", @function\n"
1924 ".type " START ", @function\n"
1925 +".type " START "_data, @function\n"
1926 "_" START ":\n"
1927 "" START ":\n"
1928 " bal 1f \n"
1929 " move $fp, $0 \n"
1930 -"2: .gpword 2b \n"
1931 +"" START "_data: \n"
1932 +" .gpword " START "_data \n"
1933 " .gpword " START "_c \n"
1934 ".weak _DYNAMIC \n"
1935 ".hidden _DYNAMIC \n"
1936 --- a/arch/mips/pthread_arch.h
1937 +++ b/arch/mips/pthread_arch.h
1938 @@ -16,4 +16,4 @@ static inline struct pthread *__pthread_
1939
1940 #define DTP_OFFSET 0x8000
1941
1942 -#define CANCEL_REG_IP (3-(union {int __i; char __b;}){1}.__b)
1943 +#define MC_PC pc
1944 --- a/arch/mips/syscall_arch.h
1945 +++ b/arch/mips/syscall_arch.h
1946 @@ -3,9 +3,7 @@
1947 ((union { long long ll; long l[2]; }){ .ll = x }).l[1]
1948 #define __SYSCALL_LL_O(x) 0, __SYSCALL_LL_E((x))
1949
1950 -#ifdef SHARED
1951 __attribute__((visibility("hidden")))
1952 -#endif
1953 long (__syscall)(long, ...);
1954
1955 #define SYSCALL_RLIM_INFINITY (-1UL/2)
1956 --- a/arch/or1k/atomic.h
1957 +++ /dev/null
1958 @@ -1,120 +0,0 @@
1959 -#ifndef _INTERNAL_ATOMIC_H
1960 -#define _INTERNAL_ATOMIC_H
1961 -
1962 -#include <stdint.h>
1963 -
1964 -static inline int a_ctz_l(unsigned long x)
1965 -{
1966 - static const char debruijn32[32] = {
1967 - 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
1968 - 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
1969 - };
1970 - return debruijn32[(x&-x)*0x076be629 >> 27];
1971 -}
1972 -
1973 -static inline int a_ctz_64(uint64_t x)
1974 -{
1975 - uint32_t y = x;
1976 - if (!y) {
1977 - y = x>>32;
1978 - return 32 + a_ctz_l(y);
1979 - }
1980 - return a_ctz_l(y);
1981 -}
1982 -
1983 -static inline int a_cas(volatile int *p, int t, int s)
1984 -{
1985 - __asm__("1: l.lwa %0, %1\n"
1986 - " l.sfeq %0, %2\n"
1987 - " l.bnf 1f\n"
1988 - " l.nop\n"
1989 - " l.swa %1, %3\n"
1990 - " l.bnf 1b\n"
1991 - " l.nop\n"
1992 - "1: \n"
1993 - : "=&r"(t), "+m"(*p) : "r"(t), "r"(s) : "cc", "memory" );
1994 - return t;
1995 -}
1996 -
1997 -static inline void *a_cas_p(volatile void *p, void *t, void *s)
1998 -{
1999 - return (void *)a_cas(p, (int)t, (int)s);
2000 -}
2001 -
2002 -static inline int a_swap(volatile int *x, int v)
2003 -{
2004 - int old;
2005 - do old = *x;
2006 - while (a_cas(x, old, v) != old);
2007 - return old;
2008 -}
2009 -
2010 -static inline int a_fetch_add(volatile int *x, int v)
2011 -{
2012 - int old;
2013 - do old = *x;
2014 - while (a_cas(x, old, old+v) != old);
2015 - return old;
2016 -}
2017 -
2018 -static inline void a_inc(volatile int *x)
2019 -{
2020 - a_fetch_add(x, 1);
2021 -}
2022 -
2023 -static inline void a_dec(volatile int *x)
2024 -{
2025 - a_fetch_add(x, -1);
2026 -}
2027 -
2028 -static inline void a_store(volatile int *p, int x)
2029 -{
2030 - a_swap(p, x);
2031 -}
2032 -
2033 -#define a_spin a_barrier
2034 -
2035 -static inline void a_barrier()
2036 -{
2037 - a_cas(&(int){0}, 0, 0);
2038 -}
2039 -
2040 -static inline void a_crash()
2041 -{
2042 - *(volatile char *)0=0;
2043 -}
2044 -
2045 -static inline void a_and(volatile int *p, int v)
2046 -{
2047 - int old;
2048 - do old = *p;
2049 - while (a_cas(p, old, old&v) != old);
2050 -}
2051 -
2052 -static inline void a_or(volatile int *p, int v)
2053 -{
2054 - int old;
2055 - do old = *p;
2056 - while (a_cas(p, old, old|v) != old);
2057 -}
2058 -
2059 -static inline void a_or_l(volatile void *p, long v)
2060 -{
2061 - a_or(p, v);
2062 -}
2063 -
2064 -static inline void a_and_64(volatile uint64_t *p, uint64_t v)
2065 -{
2066 - union { uint64_t v; uint32_t r[2]; } u = { v };
2067 - a_and((int *)p, u.r[0]);
2068 - a_and((int *)p+1, u.r[1]);
2069 -}
2070 -
2071 -static inline void a_or_64(volatile uint64_t *p, uint64_t v)
2072 -{
2073 - union { uint64_t v; uint32_t r[2]; } u = { v };
2074 - a_or((int *)p, u.r[0]);
2075 - a_or((int *)p+1, u.r[1]);
2076 -}
2077 -
2078 -#endif
2079 --- /dev/null
2080 +++ b/arch/or1k/atomic_arch.h
2081 @@ -0,0 +1,14 @@
2082 +#define a_cas a_cas
2083 +static inline int a_cas(volatile int *p, int t, int s)
2084 +{
2085 + __asm__("1: l.lwa %0, %1\n"
2086 + " l.sfeq %0, %2\n"
2087 + " l.bnf 1f\n"
2088 + " l.nop\n"
2089 + " l.swa %1, %3\n"
2090 + " l.bnf 1b\n"
2091 + " l.nop\n"
2092 + "1: \n"
2093 + : "=&r"(t), "+m"(*p) : "r"(t), "r"(s) : "cc", "memory" );
2094 + return t;
2095 +}
2096 --- a/arch/or1k/pthread_arch.h
2097 +++ b/arch/or1k/pthread_arch.h
2098 @@ -14,5 +14,4 @@ static inline struct pthread *__pthread_
2099 #define TLS_ABOVE_TP
2100 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread))
2101
2102 -/* word-offset to 'pc' in mcontext_t */
2103 -#define CANCEL_REG_IP 32
2104 +#define MC_PC regs.pc
2105 --- a/arch/powerpc/atomic.h
2106 +++ /dev/null
2107 @@ -1,126 +0,0 @@
2108 -#ifndef _INTERNAL_ATOMIC_H
2109 -#define _INTERNAL_ATOMIC_H
2110 -
2111 -#include <stdint.h>
2112 -#include <endian.h>
2113 -
2114 -static inline int a_ctz_l(unsigned long x)
2115 -{
2116 - static const char debruijn32[32] = {
2117 - 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
2118 - 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
2119 - };
2120 - return debruijn32[(x&-x)*0x076be629 >> 27];
2121 -}
2122 -
2123 -static inline int a_ctz_64(uint64_t x)
2124 -{
2125 - uint32_t y = x;
2126 - if (!y) {
2127 - y = x>>32;
2128 - return 32 + a_ctz_l(y);
2129 - }
2130 - return a_ctz_l(y);
2131 -}
2132 -
2133 -static inline int a_cas(volatile int *p, int t, int s)
2134 -{
2135 - __asm__("\n"
2136 - " sync\n"
2137 - "1: lwarx %0, 0, %4\n"
2138 - " cmpw %0, %2\n"
2139 - " bne 1f\n"
2140 - " stwcx. %3, 0, %4\n"
2141 - " bne- 1b\n"
2142 - " isync\n"
2143 - "1: \n"
2144 - : "=&r"(t), "+m"(*p) : "r"(t), "r"(s), "r"(p) : "cc", "memory" );
2145 - return t;
2146 -}
2147 -
2148 -static inline void *a_cas_p(volatile void *p, void *t, void *s)
2149 -{
2150 - return (void *)a_cas(p, (int)t, (int)s);
2151 -}
2152 -
2153 -static inline int a_swap(volatile int *x, int v)
2154 -{
2155 - int old;
2156 - do old = *x;
2157 - while (a_cas(x, old, v) != old);
2158 - return old;
2159 -}
2160 -
2161 -static inline int a_fetch_add(volatile int *x, int v)
2162 -{
2163 - int old;
2164 - do old = *x;
2165 - while (a_cas(x, old, old+v) != old);
2166 - return old;
2167 -}
2168 -
2169 -static inline void a_inc(volatile int *x)
2170 -{
2171 - a_fetch_add(x, 1);
2172 -}
2173 -
2174 -static inline void a_dec(volatile int *x)
2175 -{
2176 - a_fetch_add(x, -1);
2177 -}
2178 -
2179 -static inline void a_store(volatile int *p, int x)
2180 -{
2181 - __asm__ __volatile__ ("\n"
2182 - " sync\n"
2183 - " stw %1, %0\n"
2184 - " isync\n"
2185 - : "=m"(*p) : "r"(x) : "memory" );
2186 -}
2187 -
2188 -#define a_spin a_barrier
2189 -
2190 -static inline void a_barrier()
2191 -{
2192 - a_cas(&(int){0}, 0, 0);
2193 -}
2194 -
2195 -static inline void a_crash()
2196 -{
2197 - *(volatile char *)0=0;
2198 -}
2199 -
2200 -static inline void a_and(volatile int *p, int v)
2201 -{
2202 - int old;
2203 - do old = *p;
2204 - while (a_cas(p, old, old&v) != old);
2205 -}
2206 -
2207 -static inline void a_or(volatile int *p, int v)
2208 -{
2209 - int old;
2210 - do old = *p;
2211 - while (a_cas(p, old, old|v) != old);
2212 -}
2213 -
2214 -static inline void a_or_l(volatile void *p, long v)
2215 -{
2216 - a_or(p, v);
2217 -}
2218 -
2219 -static inline void a_and_64(volatile uint64_t *p, uint64_t v)
2220 -{
2221 - union { uint64_t v; uint32_t r[2]; } u = { v };
2222 - a_and((int *)p, u.r[0]);
2223 - a_and((int *)p+1, u.r[1]);
2224 -}
2225 -
2226 -static inline void a_or_64(volatile uint64_t *p, uint64_t v)
2227 -{
2228 - union { uint64_t v; uint32_t r[2]; } u = { v };
2229 - a_or((int *)p, u.r[0]);
2230 - a_or((int *)p+1, u.r[1]);
2231 -}
2232 -
2233 -#endif
2234 --- /dev/null
2235 +++ b/arch/powerpc/atomic_arch.h
2236 @@ -0,0 +1,15 @@
2237 +#define a_cas a_cas
2238 +static inline int a_cas(volatile int *p, int t, int s)
2239 +{
2240 + __asm__("\n"
2241 + " sync\n"
2242 + "1: lwarx %0, 0, %4\n"
2243 + " cmpw %0, %2\n"
2244 + " bne 1f\n"
2245 + " stwcx. %3, 0, %4\n"
2246 + " bne- 1b\n"
2247 + " isync\n"
2248 + "1: \n"
2249 + : "=&r"(t), "+m"(*p) : "r"(t), "r"(s), "r"(p) : "cc", "memory" );
2250 + return t;
2251 +}
2252 --- a/arch/powerpc/pthread_arch.h
2253 +++ b/arch/powerpc/pthread_arch.h
2254 @@ -15,9 +15,8 @@ static inline struct pthread *__pthread_
2255
2256 #define DTP_OFFSET 0x8000
2257
2258 -// offset of the PC register in mcontext_t, divided by the system wordsize
2259 // the kernel calls the ip "nip", it's the first saved value after the 32
2260 // GPRs.
2261 -#define CANCEL_REG_IP 32
2262 +#define MC_PC gregs[32]
2263
2264 #define CANARY canary_at_end
2265 --- a/arch/sh/atomic.h
2266 +++ /dev/null
2267 @@ -1,168 +0,0 @@
2268 -#ifndef _INTERNAL_ATOMIC_H
2269 -#define _INTERNAL_ATOMIC_H
2270 -
2271 -#include <stdint.h>
2272 -
2273 -static inline int a_ctz_l(unsigned long x)
2274 -{
2275 - static const char debruijn32[32] = {
2276 - 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
2277 - 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
2278 - };
2279 - return debruijn32[(x&-x)*0x076be629 >> 27];
2280 -}
2281 -
2282 -static inline int a_ctz_64(uint64_t x)
2283 -{
2284 - uint32_t y = x;
2285 - if (!y) {
2286 - y = x>>32;
2287 - return 32 + a_ctz_l(y);
2288 - }
2289 - return a_ctz_l(y);
2290 -}
2291 -
2292 -#define LLSC_CLOBBERS "r0", "t", "memory"
2293 -#define LLSC_START(mem) "synco\n" \
2294 - "0: movli.l @" mem ", r0\n"
2295 -#define LLSC_END(mem) \
2296 - "1: movco.l r0, @" mem "\n" \
2297 - " bf 0b\n" \
2298 - " synco\n"
2299 -
2300 -static inline int __sh_cas_llsc(volatile int *p, int t, int s)
2301 -{
2302 - int old;
2303 - __asm__ __volatile__(
2304 - LLSC_START("%1")
2305 - " mov r0, %0\n"
2306 - " cmp/eq %0, %2\n"
2307 - " bf 1f\n"
2308 - " mov %3, r0\n"
2309 - LLSC_END("%1")
2310 - : "=&r"(old) : "r"(p), "r"(t), "r"(s) : LLSC_CLOBBERS);
2311 - return old;
2312 -}
2313 -
2314 -static inline int __sh_swap_llsc(volatile int *x, int v)
2315 -{
2316 - int old;
2317 - __asm__ __volatile__(
2318 - LLSC_START("%1")
2319 - " mov r0, %0\n"
2320 - " mov %2, r0\n"
2321 - LLSC_END("%1")
2322 - : "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
2323 - return old;
2324 -}
2325 -
2326 -static inline int __sh_fetch_add_llsc(volatile int *x, int v)
2327 -{
2328 - int old;
2329 - __asm__ __volatile__(
2330 - LLSC_START("%1")
2331 - " mov r0, %0\n"
2332 - " add %2, r0\n"
2333 - LLSC_END("%1")
2334 - : "=&r"(old) : "r"(x), "r"(v) : LLSC_CLOBBERS);
2335 - return old;
2336 -}
2337 -
2338 -static inline void __sh_store_llsc(volatile int *p, int x)
2339 -{
2340 - __asm__ __volatile__(
2341 - " synco\n"
2342 - " mov.l %1, @%0\n"
2343 - " synco\n"
2344 - : : "r"(p), "r"(x) : "memory");
2345 -}
2346 -
2347 -static inline void __sh_and_llsc(volatile int *x, int v)
2348 -{
2349 - __asm__ __volatile__(
2350 - LLSC_START("%0")
2351 - " and %1, r0\n"
2352 - LLSC_END("%0")
2353 - : : "r"(x), "r"(v) : LLSC_CLOBBERS);
2354 -}
2355 -
2356 -static inline void __sh_or_llsc(volatile int *x, int v)
2357 -{
2358 - __asm__ __volatile__(
2359 - LLSC_START("%0")
2360 - " or %1, r0\n"
2361 - LLSC_END("%0")
2362 - : : "r"(x), "r"(v) : LLSC_CLOBBERS);
2363 -}
2364 -
2365 -#ifdef __SH4A__
2366 -#define a_cas(p,t,s) __sh_cas_llsc(p,t,s)
2367 -#define a_swap(x,v) __sh_swap_llsc(x,v)
2368 -#define a_fetch_add(x,v) __sh_fetch_add_llsc(x, v)
2369 -#define a_store(x,v) __sh_store_llsc(x, v)
2370 -#define a_and(x,v) __sh_and_llsc(x, v)
2371 -#define a_or(x,v) __sh_or_llsc(x, v)
2372 -#else
2373 -
2374 -int __sh_cas(volatile int *, int, int);
2375 -int __sh_swap(volatile int *, int);
2376 -int __sh_fetch_add(volatile int *, int);
2377 -void __sh_store(volatile int *, int);
2378 -void __sh_and(volatile int *, int);
2379 -void __sh_or(volatile int *, int);
2380 -
2381 -#define a_cas(p,t,s) __sh_cas(p,t,s)
2382 -#define a_swap(x,v) __sh_swap(x,v)
2383 -#define a_fetch_add(x,v) __sh_fetch_add(x, v)
2384 -#define a_store(x,v) __sh_store(x, v)
2385 -#define a_and(x,v) __sh_and(x, v)
2386 -#define a_or(x,v) __sh_or(x, v)
2387 -#endif
2388 -
2389 -static inline void *a_cas_p(volatile void *p, void *t, void *s)
2390 -{
2391 - return (void *)a_cas(p, (int)t, (int)s);
2392 -}
2393 -
2394 -static inline void a_inc(volatile int *x)
2395 -{
2396 - a_fetch_add(x, 1);
2397 -}
2398 -
2399 -static inline void a_dec(volatile int *x)
2400 -{
2401 - a_fetch_add(x, -1);
2402 -}
2403 -
2404 -#define a_spin a_barrier
2405 -
2406 -static inline void a_barrier()
2407 -{
2408 - a_cas(&(int){0}, 0, 0);
2409 -}
2410 -
2411 -static inline void a_crash()
2412 -{
2413 - *(volatile char *)0=0;
2414 -}
2415 -
2416 -static inline void a_or_l(volatile void *p, long v)
2417 -{
2418 - a_or(p, v);
2419 -}
2420 -
2421 -static inline void a_and_64(volatile uint64_t *p, uint64_t v)
2422 -{
2423 - union { uint64_t v; uint32_t r[2]; } u = { v };
2424 - a_and((int *)p, u.r[0]);
2425 - a_and((int *)p+1, u.r[1]);
2426 -}
2427 -
2428 -static inline void a_or_64(volatile uint64_t *p, uint64_t v)
2429 -{
2430 - union { uint64_t v; uint32_t r[2]; } u = { v };
2431 - a_or((int *)p, u.r[0]);
2432 - a_or((int *)p+1, u.r[1]);
2433 -}
2434 -
2435 -#endif
2436 --- /dev/null
2437 +++ b/arch/sh/atomic_arch.h
2438 @@ -0,0 +1,46 @@
2439 +#if defined(__SH4A__)
2440 +
2441 +#define a_ll a_ll
2442 +static inline int a_ll(volatile int *p)
2443 +{
2444 + int v;
2445 + __asm__ __volatile__ ("movli.l @%1, %0" : "=z"(v) : "r"(p), "m"(*p));
2446 + return v;
2447 +}
2448 +
2449 +#define a_sc a_sc
2450 +static inline int a_sc(volatile int *p, int v)
2451 +{
2452 + int r;
2453 + __asm__ __volatile__ (
2454 + "movco.l %2, @%3 ; movt %0"
2455 + : "=r"(r), "=m"(*p) : "z"(v), "r"(p) : "memory", "cc");
2456 + return r;
2457 +}
2458 +
2459 +#define a_barrier a_barrier
2460 +static inline void a_barrier()
2461 +{
2462 + __asm__ __volatile__ ("synco" : : "memory");
2463 +}
2464 +
2465 +#define a_pre_llsc a_barrier
2466 +#define a_post_llsc a_barrier
2467 +
2468 +#else
2469 +
2470 +#define a_cas a_cas
2471 +__attribute__((__visibility__("hidden"))) extern const void *__sh_cas_ptr;
2472 +static inline int a_cas(volatile int *p, int t, int s)
2473 +{
2474 + register int r1 __asm__("r1");
2475 + register int r2 __asm__("r2") = t;
2476 + register int r3 __asm__("r3") = s;
2477 + __asm__ __volatile__ (
2478 + "jsr @%4 ; nop"
2479 + : "=r"(r1), "+r"(r3) : "z"(p), "r"(r2), "r"(__sh_cas_ptr)
2480 + : "memory", "pr", "cc");
2481 + return r3;
2482 +}
2483 +
2484 +#endif
2485 --- a/arch/sh/crt_arch.h
2486 +++ b/arch/sh/crt_arch.h
2487 @@ -22,7 +22,8 @@ START ": \n"
2488 " mov.l 1f, r5 \n"
2489 " mov.l 1f+4, r6 \n"
2490 " add r0, r5 \n"
2491 -" bsr __fdpic_fixup \n"
2492 +" mov.l 4f, r1 \n"
2493 +"5: bsrf r1 \n"
2494 " add r0, r6 \n"
2495 " mov r0, r12 \n"
2496 #endif
2497 @@ -31,11 +32,16 @@ START ": \n"
2498 " mov.l r9, @-r15 \n"
2499 " mov.l r8, @-r15 \n"
2500 " mov #-16, r0 \n"
2501 -" bsr " START "_c \n"
2502 +" mov.l 2f, r1 \n"
2503 +"3: bsrf r1 \n"
2504 " and r0, r15 \n"
2505 ".align 2 \n"
2506 "1: .long __ROFIXUP_LIST__@PCREL \n"
2507 " .long __ROFIXUP_END__@PCREL + 4 \n"
2508 +"2: .long " START "_c@PCREL - (3b+4-.) \n"
2509 +#ifndef SHARED
2510 +"4: .long __fdpic_fixup@PCREL - (5b+4-.) \n"
2511 +#endif
2512 );
2513
2514 #ifndef SHARED
2515 @@ -53,13 +59,14 @@ START ": \n"
2516 " add r0, r5 \n"
2517 " mov r15, r4 \n"
2518 " mov #-16, r0 \n"
2519 -" and r0, r15 \n"
2520 -" bsr " START "_c \n"
2521 -" nop \n"
2522 +" mov.l 2f, r1 \n"
2523 +"3: bsrf r1 \n"
2524 +" and r0, r15 \n"
2525 ".align 2 \n"
2526 ".weak _DYNAMIC \n"
2527 ".hidden _DYNAMIC \n"
2528 "1: .long _DYNAMIC-. \n"
2529 +"2: .long " START "_c@PCREL - (3b+4-.) \n"
2530 );
2531
2532 #endif
2533 --- a/arch/sh/pthread_arch.h
2534 +++ b/arch/sh/pthread_arch.h
2535 @@ -8,4 +8,4 @@ static inline struct pthread *__pthread_
2536 #define TLS_ABOVE_TP
2537 #define TP_ADJ(p) ((char *)(p) + sizeof(struct pthread) - 8)
2538
2539 -#define CANCEL_REG_IP 17
2540 +#define MC_PC sc_pc
2541 --- a/arch/sh/reloc.h
2542 +++ b/arch/sh/reloc.h
2543 @@ -32,6 +32,8 @@
2544 #define REL_DTPOFF R_SH_TLS_DTPOFF32
2545 #define REL_TPOFF R_SH_TLS_TPOFF32
2546
2547 +#define DL_NOMMU_SUPPORT 1
2548 +
2549 #if __SH_FDPIC__
2550 #define REL_FUNCDESC R_SH_FUNCDESC
2551 #define REL_FUNCDESC_VAL R_SH_FUNCDESC_VALUE
2552 --- a/arch/sh/src/__set_thread_area.c
2553 +++ /dev/null
2554 @@ -1,34 +0,0 @@
2555 -#include "pthread_impl.h"
2556 -#include "libc.h"
2557 -#include "sh_atomic.h"
2558 -#include <elf.h>
2559 -
2560 -/* Also perform sh-specific init */
2561 -
2562 -#define CPU_HAS_LLSC 0x0040
2563 -
2564 -__attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model, __sh_nommu;
2565 -
2566 -int __set_thread_area(void *p)
2567 -{
2568 - size_t *aux;
2569 - __asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" );
2570 -#ifndef __SH4A__
2571 - if (__hwcap & CPU_HAS_LLSC) {
2572 - __sh_atomic_model = SH_A_LLSC;
2573 - return 0;
2574 - }
2575 -#if !defined(__SH3__) && !defined(__SH4__)
2576 - for (aux=libc.auxv; *aux; aux+=2) {
2577 - if (*aux != AT_PLATFORM) continue;
2578 - const char *s = (void *)aux[1];
2579 - if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break;
2580 - __sh_atomic_model = SH_A_IMASK;
2581 - __sh_nommu = 1;
2582 - return 0;
2583 - }
2584 -#endif
2585 - /* __sh_atomic_model = SH_A_GUSA; */ /* 0, default */
2586 -#endif
2587 - return 0;
2588 -}
2589 --- a/arch/sh/src/atomic.c
2590 +++ /dev/null
2591 @@ -1,158 +0,0 @@
2592 -#ifndef __SH4A__
2593 -
2594 -#include "sh_atomic.h"
2595 -#include "atomic.h"
2596 -#include "libc.h"
2597 -
2598 -static inline unsigned mask()
2599 -{
2600 - unsigned sr;
2601 - __asm__ __volatile__ ( "\n"
2602 - " stc sr,r0 \n"
2603 - " mov r0,%0 \n"
2604 - " or #0xf0,r0 \n"
2605 - " ldc r0,sr \n"
2606 - : "=&r"(sr) : : "memory", "r0" );
2607 - return sr;
2608 -}
2609 -
2610 -static inline void unmask(unsigned sr)
2611 -{
2612 - __asm__ __volatile__ ( "ldc %0,sr" : : "r"(sr) : "memory" );
2613 -}
2614 -
2615 -/* gusa is a hack in the kernel which lets you create a sequence of instructions
2616 - * which will be restarted if the process is preempted in the middle of the
2617 - * sequence. It will do for implementing atomics on non-smp systems. ABI is:
2618 - * r0 = address of first instruction after the atomic sequence
2619 - * r1 = original stack pointer
2620 - * r15 = -1 * length of atomic sequence in bytes
2621 - */
2622 -#define GUSA_CLOBBERS "r0", "r1", "memory"
2623 -#define GUSA_START(mem,old,nop) \
2624 - " .align 2\n" \
2625 - " mova 1f, r0\n" \
2626 - nop \
2627 - " mov r15, r1\n" \
2628 - " mov #(0f-1f), r15\n" \
2629 - "0: mov.l @" mem ", " old "\n"
2630 -/* the target of mova must be 4 byte aligned, so we may need a nop */
2631 -#define GUSA_START_ODD(mem,old) GUSA_START(mem,old,"")
2632 -#define GUSA_START_EVEN(mem,old) GUSA_START(mem,old,"\tnop\n")
2633 -#define GUSA_END(mem,new) \
2634 - " mov.l " new ", @" mem "\n" \
2635 - "1: mov r1, r15\n"
2636 -
2637 -int __sh_cas(volatile int *p, int t, int s)
2638 -{
2639 - if (__sh_atomic_model == SH_A_LLSC) return __sh_cas_llsc(p, t, s);
2640 -
2641 - if (__sh_atomic_model == SH_A_IMASK) {
2642 - unsigned sr = mask();
2643 - int old = *p;
2644 - if (old==t) *p = s;
2645 - unmask(sr);
2646 - return old;
2647 - }
2648 -
2649 - int old;
2650 - __asm__ __volatile__(
2651 - GUSA_START_EVEN("%1", "%0")
2652 - " cmp/eq %0, %2\n"
2653 - " bf 1f\n"
2654 - GUSA_END("%1", "%3")
2655 - : "=&r"(old) : "r"(p), "r"(t), "r"(s) : GUSA_CLOBBERS, "t");
2656 - return old;
2657 -}
2658 -
2659 -int __sh_swap(volatile int *x, int v)
2660 -{
2661 - if (__sh_atomic_model == SH_A_LLSC) return __sh_swap_llsc(x, v);
2662 -
2663 - if (__sh_atomic_model == SH_A_IMASK) {
2664 - unsigned sr = mask();
2665 - int old = *x;
2666 - *x = v;
2667 - unmask(sr);
2668 - return old;
2669 - }
2670 -
2671 - int old;
2672 - __asm__ __volatile__(
2673 - GUSA_START_EVEN("%1", "%0")
2674 - GUSA_END("%1", "%2")
2675 - : "=&r"(old) : "r"(x), "r"(v) : GUSA_CLOBBERS);
2676 - return old;
2677 -}
2678 -
2679 -int __sh_fetch_add(volatile int *x, int v)
2680 -{
2681 - if (__sh_atomic_model == SH_A_LLSC) return __sh_fetch_add_llsc(x, v);
2682 -
2683 - if (__sh_atomic_model == SH_A_IMASK) {
2684 - unsigned sr = mask();
2685 - int old = *x;
2686 - *x = old + v;
2687 - unmask(sr);
2688 - return old;
2689 - }
2690 -
2691 - int old, dummy;
2692 - __asm__ __volatile__(
2693 - GUSA_START_EVEN("%2", "%0")
2694 - " mov %0, %1\n"
2695 - " add %3, %1\n"
2696 - GUSA_END("%2", "%1")
2697 - : "=&r"(old), "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
2698 - return old;
2699 -}
2700 -
2701 -void __sh_store(volatile int *p, int x)
2702 -{
2703 - if (__sh_atomic_model == SH_A_LLSC) return __sh_store_llsc(p, x);
2704 - __asm__ __volatile__(
2705 - " mov.l %1, @%0\n"
2706 - : : "r"(p), "r"(x) : "memory");
2707 -}
2708 -
2709 -void __sh_and(volatile int *x, int v)
2710 -{
2711 - if (__sh_atomic_model == SH_A_LLSC) return __sh_and_llsc(x, v);
2712 -
2713 - if (__sh_atomic_model == SH_A_IMASK) {
2714 - unsigned sr = mask();
2715 - int old = *x;
2716 - *x = old & v;
2717 - unmask(sr);
2718 - return;
2719 - }
2720 -
2721 - int dummy;
2722 - __asm__ __volatile__(
2723 - GUSA_START_ODD("%1", "%0")
2724 - " and %2, %0\n"
2725 - GUSA_END("%1", "%0")
2726 - : "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
2727 -}
2728 -
2729 -void __sh_or(volatile int *x, int v)
2730 -{
2731 - if (__sh_atomic_model == SH_A_LLSC) return __sh_or_llsc(x, v);
2732 -
2733 - if (__sh_atomic_model == SH_A_IMASK) {
2734 - unsigned sr = mask();
2735 - int old = *x;
2736 - *x = old | v;
2737 - unmask(sr);
2738 - return;
2739 - }
2740 -
2741 - int dummy;
2742 - __asm__ __volatile__(
2743 - GUSA_START_ODD("%1", "%0")
2744 - " or %2, %0\n"
2745 - GUSA_END("%1", "%0")
2746 - : "=&r"(dummy) : "r"(x), "r"(v) : GUSA_CLOBBERS);
2747 -}
2748 -
2749 -#endif
2750 --- a/arch/sh/src/sh_atomic.h
2751 +++ /dev/null
2752 @@ -1,15 +0,0 @@
2753 -#ifndef _SH_ATOMIC_H
2754 -#define _SH_ATOMIC_H
2755 -
2756 -#define SH_A_GUSA 0
2757 -#define SH_A_LLSC 1
2758 -#define SH_A_CAS 2
2759 -#if !defined(__SH3__) && !defined(__SH4__)
2760 -#define SH_A_IMASK 3
2761 -#else
2762 -#define SH_A_IMASK -1LL /* unmatchable by unsigned int */
2763 -#endif
2764 -
2765 -extern __attribute__((__visibility__("hidden"))) unsigned __sh_atomic_model;
2766 -
2767 -#endif
2768 --- a/arch/x32/atomic.h
2769 +++ /dev/null
2770 @@ -1,105 +0,0 @@
2771 -#ifndef _INTERNAL_ATOMIC_H
2772 -#define _INTERNAL_ATOMIC_H
2773 -
2774 -#include <stdint.h>
2775 -
2776 -static inline int a_ctz_64(uint64_t x)
2777 -{
2778 - __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
2779 - return x;
2780 -}
2781 -
2782 -static inline int a_ctz_l(unsigned long x)
2783 -{
2784 - __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
2785 - return x;
2786 -}
2787 -
2788 -static inline void a_and_64(volatile uint64_t *p, uint64_t v)
2789 -{
2790 - __asm__( "lock ; and %1, %0"
2791 - : "=m"(*p) : "r"(v) : "memory" );
2792 -}
2793 -
2794 -static inline void a_or_64(volatile uint64_t *p, uint64_t v)
2795 -{
2796 - __asm__( "lock ; or %1, %0"
2797 - : "=m"(*p) : "r"(v) : "memory" );
2798 -}
2799 -
2800 -static inline void a_or_l(volatile void *p, long v)
2801 -{
2802 - __asm__( "lock ; or %1, %0"
2803 - : "=m"(*(long *)p) : "r"(v) : "memory" );
2804 -}
2805 -
2806 -static inline void *a_cas_p(volatile void *p, void *t, void *s)
2807 -{
2808 - __asm__( "lock ; cmpxchg %3, %1"
2809 - : "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" );
2810 - return t;
2811 -}
2812 -
2813 -static inline int a_cas(volatile int *p, int t, int s)
2814 -{
2815 - __asm__( "lock ; cmpxchg %3, %1"
2816 - : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
2817 - return t;
2818 -}
2819 -
2820 -static inline void a_or(volatile int *p, int v)
2821 -{
2822 - __asm__( "lock ; or %1, %0"
2823 - : "=m"(*p) : "r"(v) : "memory" );
2824 -}
2825 -
2826 -static inline void a_and(volatile int *p, int v)
2827 -{
2828 - __asm__( "lock ; and %1, %0"
2829 - : "=m"(*p) : "r"(v) : "memory" );
2830 -}
2831 -
2832 -static inline int a_swap(volatile int *x, int v)
2833 -{
2834 - __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
2835 - return v;
2836 -}
2837 -
2838 -static inline int a_fetch_add(volatile int *x, int v)
2839 -{
2840 - __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
2841 - return v;
2842 -}
2843 -
2844 -static inline void a_inc(volatile int *x)
2845 -{
2846 - __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" );
2847 -}
2848 -
2849 -static inline void a_dec(volatile int *x)
2850 -{
2851 - __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" );
2852 -}
2853 -
2854 -static inline void a_store(volatile int *p, int x)
2855 -{
2856 - __asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" );
2857 -}
2858 -
2859 -static inline void a_spin()
2860 -{
2861 - __asm__ __volatile__( "pause" : : : "memory" );
2862 -}
2863 -
2864 -static inline void a_barrier()
2865 -{
2866 - __asm__ __volatile__( "" : : : "memory" );
2867 -}
2868 -
2869 -static inline void a_crash()
2870 -{
2871 - __asm__ __volatile__( "hlt" : : : "memory" );
2872 -}
2873 -
2874 -
2875 -#endif
2876 --- /dev/null
2877 +++ b/arch/x32/atomic_arch.h
2878 @@ -0,0 +1,106 @@
2879 +#define a_ctz_64 a_ctz_64
2880 +static inline int a_ctz_64(uint64_t x)
2881 +{
2882 + __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
2883 + return x;
2884 +}
2885 +
2886 +#define a_ctz_l a_ctz_l
2887 +static inline int a_ctz_l(unsigned long x)
2888 +{
2889 + __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
2890 + return x;
2891 +}
2892 +
2893 +#define a_and_64 a_and_64
2894 +static inline void a_and_64(volatile uint64_t *p, uint64_t v)
2895 +{
2896 + __asm__( "lock ; and %1, %0"
2897 + : "=m"(*p) : "r"(v) : "memory" );
2898 +}
2899 +
2900 +#define a_or_64 a_or_64
2901 +static inline void a_or_64(volatile uint64_t *p, uint64_t v)
2902 +{
2903 + __asm__( "lock ; or %1, %0"
2904 + : "=m"(*p) : "r"(v) : "memory" );
2905 +}
2906 +
2907 +#define a_or_l a_or_l
2908 +static inline void a_or_l(volatile void *p, long v)
2909 +{
2910 + __asm__( "lock ; or %1, %0"
2911 + : "=m"(*(long *)p) : "r"(v) : "memory" );
2912 +}
2913 +
2914 +#define a_cas a_cas
2915 +static inline int a_cas(volatile int *p, int t, int s)
2916 +{
2917 + __asm__( "lock ; cmpxchg %3, %1"
2918 + : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
2919 + return t;
2920 +}
2921 +
2922 +#define a_or a_or
2923 +static inline void a_or(volatile int *p, int v)
2924 +{
2925 + __asm__( "lock ; or %1, %0"
2926 + : "=m"(*p) : "r"(v) : "memory" );
2927 +}
2928 +
2929 +#define a_and a_and
2930 +static inline void a_and(volatile int *p, int v)
2931 +{
2932 + __asm__( "lock ; and %1, %0"
2933 + : "=m"(*p) : "r"(v) : "memory" );
2934 +}
2935 +
2936 +#define a_swap a_swap
2937 +static inline int a_swap(volatile int *x, int v)
2938 +{
2939 + __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
2940 + return v;
2941 +}
2942 +
2943 +#define a_fetch_add a_fetch_add
2944 +static inline int a_fetch_add(volatile int *x, int v)
2945 +{
2946 + __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
2947 + return v;
2948 +}
2949 +
2950 +#define a_inc a_inc
2951 +static inline void a_inc(volatile int *x)
2952 +{
2953 + __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" );
2954 +}
2955 +
2956 +#define a_dec a_dec
2957 +static inline void a_dec(volatile int *x)
2958 +{
2959 + __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" );
2960 +}
2961 +
2962 +#define a_store a_store
2963 +static inline void a_store(volatile int *p, int x)
2964 +{
2965 + __asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" );
2966 +}
2967 +
2968 +#define a_spin a_spin
2969 +static inline void a_spin()
2970 +{
2971 + __asm__ __volatile__( "pause" : : : "memory" );
2972 +}
2973 +
2974 +#define a_barrier a_barrier
2975 +static inline void a_barrier()
2976 +{
2977 + __asm__ __volatile__( "" : : : "memory" );
2978 +}
2979 +
2980 +#define a_crash a_crash
2981 +static inline void a_crash()
2982 +{
2983 + __asm__ __volatile__( "hlt" : : : "memory" );
2984 +}
2985 --- a/arch/x32/pthread_arch.h
2986 +++ b/arch/x32/pthread_arch.h
2987 @@ -7,6 +7,6 @@ static inline struct pthread *__pthread_
2988
2989 #define TP_ADJ(p) (p)
2990
2991 -#define CANCEL_REG_IP 32
2992 +#define MC_PC gregs[REG_RIP]
2993
2994 #define CANARY canary2
2995 --- a/arch/x32/src/syscall_cp_fixup.c
2996 +++ b/arch/x32/src/syscall_cp_fixup.c
2997 @@ -1,8 +1,6 @@
2998 #include <sys/syscall.h>
2999
3000 -#ifdef SHARED
3001 __attribute__((__visibility__("hidden")))
3002 -#endif
3003 long __syscall_cp_internal(volatile void*, long long, long long, long long, long long,
3004 long long, long long, long long);
3005
3006 @@ -14,9 +12,7 @@ struct __timespec_kernel { long long tv_
3007 ts->tv_nsec = __tsc(X)->tv_nsec; \
3008 (X) = (unsigned long)ts; } } while(0)
3009
3010 -#ifdef SHARED
3011 __attribute__((__visibility__("hidden")))
3012 -#endif
3013 long __syscall_cp_asm (volatile void * foo, long long n, long long a1, long long a2, long long a3,
3014 long long a4, long long a5, long long a6)
3015 {
3016 --- a/arch/x86_64/atomic.h
3017 +++ /dev/null
3018 @@ -1,105 +0,0 @@
3019 -#ifndef _INTERNAL_ATOMIC_H
3020 -#define _INTERNAL_ATOMIC_H
3021 -
3022 -#include <stdint.h>
3023 -
3024 -static inline int a_ctz_64(uint64_t x)
3025 -{
3026 - __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
3027 - return x;
3028 -}
3029 -
3030 -static inline int a_ctz_l(unsigned long x)
3031 -{
3032 - __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
3033 - return x;
3034 -}
3035 -
3036 -static inline void a_and_64(volatile uint64_t *p, uint64_t v)
3037 -{
3038 - __asm__( "lock ; and %1, %0"
3039 - : "=m"(*p) : "r"(v) : "memory" );
3040 -}
3041 -
3042 -static inline void a_or_64(volatile uint64_t *p, uint64_t v)
3043 -{
3044 - __asm__( "lock ; or %1, %0"
3045 - : "=m"(*p) : "r"(v) : "memory" );
3046 -}
3047 -
3048 -static inline void a_or_l(volatile void *p, long v)
3049 -{
3050 - __asm__( "lock ; or %1, %0"
3051 - : "=m"(*(long *)p) : "r"(v) : "memory" );
3052 -}
3053 -
3054 -static inline void *a_cas_p(volatile void *p, void *t, void *s)
3055 -{
3056 - __asm__( "lock ; cmpxchg %3, %1"
3057 - : "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" );
3058 - return t;
3059 -}
3060 -
3061 -static inline int a_cas(volatile int *p, int t, int s)
3062 -{
3063 - __asm__( "lock ; cmpxchg %3, %1"
3064 - : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
3065 - return t;
3066 -}
3067 -
3068 -static inline void a_or(volatile int *p, int v)
3069 -{
3070 - __asm__( "lock ; or %1, %0"
3071 - : "=m"(*p) : "r"(v) : "memory" );
3072 -}
3073 -
3074 -static inline void a_and(volatile int *p, int v)
3075 -{
3076 - __asm__( "lock ; and %1, %0"
3077 - : "=m"(*p) : "r"(v) : "memory" );
3078 -}
3079 -
3080 -static inline int a_swap(volatile int *x, int v)
3081 -{
3082 - __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
3083 - return v;
3084 -}
3085 -
3086 -static inline int a_fetch_add(volatile int *x, int v)
3087 -{
3088 - __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
3089 - return v;
3090 -}
3091 -
3092 -static inline void a_inc(volatile int *x)
3093 -{
3094 - __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" );
3095 -}
3096 -
3097 -static inline void a_dec(volatile int *x)
3098 -{
3099 - __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" );
3100 -}
3101 -
3102 -static inline void a_store(volatile int *p, int x)
3103 -{
3104 - __asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" );
3105 -}
3106 -
3107 -static inline void a_spin()
3108 -{
3109 - __asm__ __volatile__( "pause" : : : "memory" );
3110 -}
3111 -
3112 -static inline void a_barrier()
3113 -{
3114 - __asm__ __volatile__( "" : : : "memory" );
3115 -}
3116 -
3117 -static inline void a_crash()
3118 -{
3119 - __asm__ __volatile__( "hlt" : : : "memory" );
3120 -}
3121 -
3122 -
3123 -#endif
3124 --- /dev/null
3125 +++ b/arch/x86_64/atomic_arch.h
3126 @@ -0,0 +1,107 @@
3127 +#define a_ctz_64 a_ctz_64
3128 +static inline int a_ctz_64(uint64_t x)
3129 +{
3130 + __asm__( "bsf %1,%0" : "=r"(x) : "r"(x) );
3131 + return x;
3132 +}
3133 +
3134 +#define a_and_64 a_and_64
3135 +static inline void a_and_64(volatile uint64_t *p, uint64_t v)
3136 +{
3137 + __asm__( "lock ; and %1, %0"
3138 + : "=m"(*p) : "r"(v) : "memory" );
3139 +}
3140 +
3141 +#define a_or_64 a_or_64
3142 +static inline void a_or_64(volatile uint64_t *p, uint64_t v)
3143 +{
3144 + __asm__( "lock ; or %1, %0"
3145 + : "=m"(*p) : "r"(v) : "memory" );
3146 +}
3147 +
3148 +#define a_or_l a_or_l
3149 +static inline void a_or_l(volatile void *p, long v)
3150 +{
3151 + __asm__( "lock ; or %1, %0"
3152 + : "=m"(*(long *)p) : "r"(v) : "memory" );
3153 +}
3154 +
3155 +#define a_cas_p a_cas_p
3156 +static inline void *a_cas_p(volatile void *p, void *t, void *s)
3157 +{
3158 + __asm__( "lock ; cmpxchg %3, %1"
3159 + : "=a"(t), "=m"(*(long *)p) : "a"(t), "r"(s) : "memory" );
3160 + return t;
3161 +}
3162 +
3163 +#define a_cas a_cas
3164 +static inline int a_cas(volatile int *p, int t, int s)
3165 +{
3166 + __asm__( "lock ; cmpxchg %3, %1"
3167 + : "=a"(t), "=m"(*p) : "a"(t), "r"(s) : "memory" );
3168 + return t;
3169 +}
3170 +
3171 +#define a_or a_or
3172 +static inline void a_or(volatile int *p, int v)
3173 +{
3174 + __asm__( "lock ; or %1, %0"
3175 + : "=m"(*p) : "r"(v) : "memory" );
3176 +}
3177 +
3178 +#define a_and a_and
3179 +static inline void a_and(volatile int *p, int v)
3180 +{
3181 + __asm__( "lock ; and %1, %0"
3182 + : "=m"(*p) : "r"(v) : "memory" );
3183 +}
3184 +
3185 +#define a_swap a_swap
3186 +static inline int a_swap(volatile int *x, int v)
3187 +{
3188 + __asm__( "xchg %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
3189 + return v;
3190 +}
3191 +
3192 +#define a_fetch_add a_fetch_add
3193 +static inline int a_fetch_add(volatile int *x, int v)
3194 +{
3195 + __asm__( "lock ; xadd %0, %1" : "=r"(v), "=m"(*x) : "0"(v) : "memory" );
3196 + return v;
3197 +}
3198 +
3199 +#define a_inc a_inc
3200 +static inline void a_inc(volatile int *x)
3201 +{
3202 + __asm__( "lock ; incl %0" : "=m"(*x) : "m"(*x) : "memory" );
3203 +}
3204 +
3205 +#define a_dec a_dec
3206 +static inline void a_dec(volatile int *x)
3207 +{
3208 + __asm__( "lock ; decl %0" : "=m"(*x) : "m"(*x) : "memory" );
3209 +}
3210 +
3211 +#define a_store a_store
3212 +static inline void a_store(volatile int *p, int x)
3213 +{
3214 + __asm__( "mov %1, %0 ; lock ; orl $0,(%%rsp)" : "=m"(*p) : "r"(x) : "memory" );
3215 +}
3216 +
3217 +#define a_spin a_spin
3218 +static inline void a_spin()
3219 +{
3220 + __asm__ __volatile__( "pause" : : : "memory" );
3221 +}
3222 +
3223 +#define a_barrier a_barrier
3224 +static inline void a_barrier()
3225 +{
3226 + __asm__ __volatile__( "" : : : "memory" );
3227 +}
3228 +
3229 +#define a_crash a_crash
3230 +static inline void a_crash()
3231 +{
3232 + __asm__ __volatile__( "hlt" : : : "memory" );
3233 +}
3234 --- a/arch/x86_64/pthread_arch.h
3235 +++ b/arch/x86_64/pthread_arch.h
3236 @@ -7,4 +7,4 @@ static inline struct pthread *__pthread_
3237
3238 #define TP_ADJ(p) (p)
3239
3240 -#define CANCEL_REG_IP 16
3241 +#define MC_PC gregs[REG_RIP]
3242 --- a/configure
3243 +++ b/configure
3244 @@ -9,6 +9,9 @@ VAR=VALUE. See below for descriptions o
3245
3246 Defaults for the options are specified in brackets.
3247
3248 +Configuration:
3249 + --srcdir=DIR source directory [detected]
3250 +
3251 Installation directories:
3252 --prefix=PREFIX main installation prefix [/usr/local/musl]
3253 --exec-prefix=EPREFIX installation prefix for executable files [PREFIX]
3254 @@ -117,6 +120,7 @@ CFLAGS_TRY=
3255 LDFLAGS_AUTO=
3256 LDFLAGS_TRY=
3257 OPTIMIZE_GLOBS=
3258 +srcdir=
3259 prefix=/usr/local/musl
3260 exec_prefix='$(prefix)'
3261 bindir='$(exec_prefix)/bin'
3262 @@ -139,6 +143,7 @@ clang_wrapper=no
3263 for arg ; do
3264 case "$arg" in
3265 --help) usage ;;
3266 +--srcdir=*) srcdir=${arg#*=} ;;
3267 --prefix=*) prefix=${arg#*=} ;;
3268 --exec-prefix=*) exec_prefix=${arg#*=} ;;
3269 --bindir=*) bindir=${arg#*=} ;;
3270 @@ -179,11 +184,23 @@ LIBCC=*) LIBCC=${arg#*=} ;;
3271 esac
3272 done
3273
3274 -for i in prefix exec_prefix bindir libdir includedir syslibdir ; do
3275 +for i in srcdir prefix exec_prefix bindir libdir includedir syslibdir ; do
3276 stripdir $i
3277 done
3278
3279 #
3280 +# Get the source dir for out-of-tree builds
3281 +#
3282 +if test -z "$srcdir" ; then
3283 +srcdir="${0%/configure}"
3284 +stripdir srcdir
3285 +fi
3286 +abs_builddir="$(pwd)" || fail "$0: cannot determine working directory"
3287 +abs_srcdir="$(cd $srcdir && pwd)" || fail "$0: invalid source directory $srcdir"
3288 +test "$abs_srcdir" = "$abs_builddir" && srcdir=.
3289 +test "$srcdir" != "." -a -f Makefile -a ! -h Makefile && fail "$0: Makefile already exists in the working directory"
3290 +
3291 +#
3292 # Get a temp filename we can use
3293 #
3294 i=0
3295 @@ -263,11 +280,11 @@ fi
3296 fi
3297
3298 if test "$gcc_wrapper" = yes ; then
3299 -tools="$tools tools/musl-gcc"
3300 +tools="$tools obj/musl-gcc"
3301 tool_libs="$tool_libs lib/musl-gcc.specs"
3302 fi
3303 if test "$clang_wrapper" = yes ; then
3304 -tools="$tools tools/musl-clang tools/ld.musl-clang"
3305 +tools="$tools obj/musl-clang obj/ld.musl-clang"
3306 fi
3307
3308 #
3309 @@ -321,7 +338,7 @@ __attribute__((__may_alias__))
3310 #endif
3311 x;
3312 EOF
3313 -if $CC $CFLAGS_C99FSE -I./arch/$ARCH -I./include $CPPFLAGS $CFLAGS \
3314 +if $CC $CFLAGS_C99FSE -I$srcdir/arch/$ARCH -I$srcdir/include $CPPFLAGS $CFLAGS \
3315 -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
3316 printf "no\n"
3317 else
3318 @@ -330,6 +347,13 @@ CFLAGS_C99FSE="$CFLAGS_C99FSE -D__may_al
3319 fi
3320
3321 #
3322 +# The GNU toolchain defaults to assuming unmarked files need an
3323 +# executable stack, potentially exposing vulnerabilities in programs
3324 +# linked with such object files. Fix this.
3325 +#
3326 +tryflag CFLAGS_C99FSE -Wa,--noexecstack
3327 +
3328 +#
3329 # Check for options to disable stack protector, which needs to be
3330 # disabled for a few early-bootstrap translation units. If not found,
3331 # this is not an error; we assume the toolchain does not do ssp.
3332 @@ -430,11 +454,15 @@ tryflag CFLAGS_AUTO -fno-unwind-tables
3333 tryflag CFLAGS_AUTO -fno-asynchronous-unwind-tables
3334
3335 #
3336 -# The GNU toolchain defaults to assuming unmarked files need an
3337 -# executable stack, potentially exposing vulnerabilities in programs
3338 -# linked with such object files. Fix this.
3339 +# Attempt to put each function and each data object in its own
3340 +# section. This both allows additional size optimizations at link
3341 +# time and works around a dangerous class of compiler/assembler bugs
3342 +# whereby relative address expressions are constant-folded by the
3343 +# assembler even when one or more of the symbols involved is
3344 +# replaceable. See gas pr 18561 and gcc pr 66609, 68178, etc.
3345 #
3346 -tryflag CFLAGS_AUTO -Wa,--noexecstack
3347 +tryflag CFLAGS_AUTO -ffunction-sections
3348 +tryflag CFLAGS_AUTO -fdata-sections
3349
3350 #
3351 # On x86, make sure we don't have incompatible instruction set
3352 @@ -489,7 +517,7 @@ int foo(void) { }
3353 int bar(void) { fp = foo; return foo(); }
3354 EOF
3355 if $CC $CFLAGS_C99FSE $CPPFLAGS $CFLAGS \
3356 - -DSHARED -fPIC -I./src/internal -include vis.h \
3357 + -DSHARED -fPIC -I$srcdir/src/internal -include vis.h \
3358 -nostdlib -shared -Wl,-Bsymbolic-functions \
3359 -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
3360 visibility=yes
3361 @@ -504,6 +532,16 @@ CFLAGS_AUTO="$CFLAGS_AUTO -include vis.h
3362 CFLAGS_AUTO="${CFLAGS_AUTO# }"
3363 fi
3364
3365 +# Reduce space lost to padding for alignment purposes by sorting data
3366 +# objects according to their alignment reqirements. This approximates
3367 +# optimal packing.
3368 +tryldflag LDFLAGS_AUTO -Wl,--sort-section,alignment
3369 +tryldflag LDFLAGS_AUTO -Wl,--sort-common
3370 +
3371 +# When linking shared library, drop dummy weak definitions that were
3372 +# replaced by strong definitions from other translation units.
3373 +tryldflag LDFLAGS_AUTO -Wl,--gc-sections
3374 +
3375 # Some patched GCC builds have these defaults messed up...
3376 tryldflag LDFLAGS_AUTO -Wl,--hash-style=both
3377
3378 @@ -513,6 +551,11 @@ tryldflag LDFLAGS_AUTO -Wl,--hash-style=
3379 # runtime library; implementation error is also a possibility.
3380 tryldflag LDFLAGS_AUTO -Wl,--no-undefined
3381
3382 +# Avoid exporting symbols from compiler runtime libraries. They
3383 +# should be hidden anyway, but some toolchains including old gcc
3384 +# versions built without shared library support and pcc are broken.
3385 +tryldflag LDFLAGS_AUTO -Wl,--exclude-libs=ALL
3386 +
3387 test "$shared" = "no" || {
3388 # Disable dynamic linking if ld is broken and can't do -Bsymbolic-functions
3389 LDFLAGS_DUMMY=
3390 @@ -599,7 +642,7 @@ echo '#include <float.h>' > "$tmpc"
3391 echo '#if LDBL_MANT_DIG == 53' >> "$tmpc"
3392 echo 'typedef char ldcheck[9-(int)sizeof(long double)];' >> "$tmpc"
3393 echo '#endif' >> "$tmpc"
3394 -if $CC $CFLAGS_C99FSE -I./arch/$ARCH -I./include $CPPFLAGS $CFLAGS \
3395 +if $CC $CFLAGS_C99FSE -I$srcdir/arch/$ARCH -I$srcdir/include $CPPFLAGS $CFLAGS \
3396 -c -o /dev/null "$tmpc" >/dev/null 2>&1 ; then
3397 printf "yes\n"
3398 else
3399 @@ -622,6 +665,7 @@ cat << EOF
3400 ARCH = $ARCH
3401 SUBARCH = $SUBARCH
3402 ASMSUBARCH = $ASMSUBARCH
3403 +srcdir = $srcdir
3404 prefix = $prefix
3405 exec_prefix = $exec_prefix
3406 bindir = $bindir
3407 @@ -629,12 +673,14 @@ libdir = $libdir
3408 includedir = $includedir
3409 syslibdir = $syslibdir
3410 CC = $CC
3411 -CFLAGS = $CFLAGS_AUTO $CFLAGS
3412 +CFLAGS = $CFLAGS
3413 +CFLAGS_AUTO = $CFLAGS_AUTO
3414 CFLAGS_C99FSE = $CFLAGS_C99FSE
3415 CFLAGS_MEMOPS = $CFLAGS_MEMOPS
3416 CFLAGS_NOSSP = $CFLAGS_NOSSP
3417 CPPFLAGS = $CPPFLAGS
3418 -LDFLAGS = $LDFLAGS_AUTO $LDFLAGS
3419 +LDFLAGS = $LDFLAGS
3420 +LDFLAGS_AUTO = $LDFLAGS_AUTO
3421 CROSS_COMPILE = $CROSS_COMPILE
3422 LIBCC = $LIBCC
3423 OPTIMIZE_GLOBS = $OPTIMIZE_GLOBS
3424 @@ -648,4 +694,6 @@ test "x$cc_family" = xgcc && echo 'WRAPC
3425 test "x$cc_family" = xclang && echo 'WRAPCC_CLANG = $(CC)'
3426 exec 1>&3 3>&-
3427
3428 +test "$srcdir" = "." || ln -sf $srcdir/Makefile .
3429 +
3430 printf "done\n"
3431 --- a/crt/arm/crti.s
3432 +++ b/crt/arm/crti.s
3433 @@ -1,3 +1,5 @@
3434 +.syntax unified
3435 +
3436 .section .init
3437 .global _init
3438 .type _init,%function
3439 --- a/crt/arm/crtn.s
3440 +++ b/crt/arm/crtn.s
3441 @@ -1,11 +1,9 @@
3442 +.syntax unified
3443 +
3444 .section .init
3445 pop {r0,lr}
3446 - tst lr,#1
3447 - moveq pc,lr
3448 bx lr
3449
3450 .section .fini
3451 pop {r0,lr}
3452 - tst lr,#1
3453 - moveq pc,lr
3454 bx lr
3455 --- a/include/complex.h
3456 +++ b/include/complex.h
3457 @@ -116,7 +116,7 @@ long double creall(long double complex);
3458
3459 #if __STDC_VERSION__ >= 201112L
3460 #if defined(_Imaginary_I)
3461 -#define __CMPLX(x, y, t) ((t)(x) + _Imaginary_I*(t)(y)))
3462 +#define __CMPLX(x, y, t) ((t)(x) + _Imaginary_I*(t)(y))
3463 #elif defined(__clang__)
3464 #define __CMPLX(x, y, t) (+(_Complex t){ (t)(x), (t)(y) })
3465 #else
3466 --- a/include/netinet/tcp.h
3467 +++ b/include/netinet/tcp.h
3468 @@ -41,7 +41,20 @@
3469 #define TCP_CLOSING 11
3470
3471 #if defined(_GNU_SOURCE) || defined(_BSD_SOURCE)
3472 +#define TCPOPT_EOL 0
3473 +#define TCPOPT_NOP 1
3474 +#define TCPOPT_MAXSEG 2
3475 +#define TCPOPT_WINDOW 3
3476 +#define TCPOPT_SACK_PERMITTED 4
3477 +#define TCPOPT_SACK 5
3478 +#define TCPOPT_TIMESTAMP 8
3479 +#define TCPOLEN_SACK_PERMITTED 2
3480 +#define TCPOLEN_WINDOW 3
3481 +#define TCPOLEN_MAXSEG 4
3482 +#define TCPOLEN_TIMESTAMP 10
3483 +
3484 #define SOL_TCP 6
3485 +
3486 #include <sys/types.h>
3487 #include <sys/socket.h>
3488 #include <stdint.h>
3489 --- a/src/env/__init_tls.c
3490 +++ b/src/env/__init_tls.c
3491 @@ -8,9 +8,6 @@
3492 #include "atomic.h"
3493 #include "syscall.h"
3494
3495 -#ifndef SHARED
3496 -static
3497 -#endif
3498 int __init_tp(void *p)
3499 {
3500 pthread_t td = p;
3501 @@ -24,8 +21,6 @@ int __init_tp(void *p)
3502 return 0;
3503 }
3504
3505 -#ifndef SHARED
3506 -
3507 static struct builtin_tls {
3508 char c;
3509 struct pthread pt;
3510 @@ -33,33 +28,40 @@ static struct builtin_tls {
3511 } builtin_tls[1];
3512 #define MIN_TLS_ALIGN offsetof(struct builtin_tls, pt)
3513
3514 -struct tls_image {
3515 - void *image;
3516 - size_t len, size, align;
3517 -} __static_tls;
3518 -
3519 -#define T __static_tls
3520 +static struct tls_module main_tls;
3521
3522 void *__copy_tls(unsigned char *mem)
3523 {
3524 pthread_t td;
3525 - if (!T.image) return mem;
3526 - void **dtv = (void *)mem;
3527 - dtv[0] = (void *)1;
3528 + struct tls_module *p;
3529 + size_t i;
3530 + void **dtv;
3531 +
3532 #ifdef TLS_ABOVE_TP
3533 - mem += sizeof(void *) * 2;
3534 - mem += -((uintptr_t)mem + sizeof(struct pthread)) & (T.align-1);
3535 + dtv = (void **)(mem + libc.tls_size) - (libc.tls_cnt + 1);
3536 +
3537 + mem += -((uintptr_t)mem + sizeof(struct pthread)) & (libc.tls_align-1);
3538 td = (pthread_t)mem;
3539 mem += sizeof(struct pthread);
3540 +
3541 + for (i=1, p=libc.tls_head; p; i++, p=p->next) {
3542 + dtv[i] = mem + p->offset;
3543 + memcpy(dtv[i], p->image, p->len);
3544 + }
3545 #else
3546 + dtv = (void **)mem;
3547 +
3548 mem += libc.tls_size - sizeof(struct pthread);
3549 - mem -= (uintptr_t)mem & (T.align-1);
3550 + mem -= (uintptr_t)mem & (libc.tls_align-1);
3551 td = (pthread_t)mem;
3552 - mem -= T.size;
3553 +
3554 + for (i=1, p=libc.tls_head; p; i++, p=p->next) {
3555 + dtv[i] = mem - p->offset;
3556 + memcpy(dtv[i], p->image, p->len);
3557 + }
3558 #endif
3559 + dtv[0] = (void *)libc.tls_cnt;
3560 td->dtv = td->dtv_copy = dtv;
3561 - dtv[1] = mem;
3562 - memcpy(mem, T.image, T.len);
3563 return td;
3564 }
3565
3566 @@ -69,7 +71,7 @@ typedef Elf32_Phdr Phdr;
3567 typedef Elf64_Phdr Phdr;
3568 #endif
3569
3570 -void __init_tls(size_t *aux)
3571 +static void static_init_tls(size_t *aux)
3572 {
3573 unsigned char *p;
3574 size_t n;
3575 @@ -86,16 +88,24 @@ void __init_tls(size_t *aux)
3576 }
3577
3578 if (tls_phdr) {
3579 - T.image = (void *)(base + tls_phdr->p_vaddr);
3580 - T.len = tls_phdr->p_filesz;
3581 - T.size = tls_phdr->p_memsz;
3582 - T.align = tls_phdr->p_align;
3583 + main_tls.image = (void *)(base + tls_phdr->p_vaddr);
3584 + main_tls.len = tls_phdr->p_filesz;
3585 + main_tls.size = tls_phdr->p_memsz;
3586 + main_tls.align = tls_phdr->p_align;
3587 + libc.tls_cnt = 1;
3588 + libc.tls_head = &main_tls;
3589 }
3590
3591 - T.size += (-T.size - (uintptr_t)T.image) & (T.align-1);
3592 - if (T.align < MIN_TLS_ALIGN) T.align = MIN_TLS_ALIGN;
3593 + main_tls.size += (-main_tls.size - (uintptr_t)main_tls.image)
3594 + & (main_tls.align-1);
3595 + if (main_tls.align < MIN_TLS_ALIGN) main_tls.align = MIN_TLS_ALIGN;
3596 +#ifndef TLS_ABOVE_TP
3597 + main_tls.offset = main_tls.size;
3598 +#endif
3599
3600 - libc.tls_size = 2*sizeof(void *)+T.size+T.align+sizeof(struct pthread)
3601 + libc.tls_align = main_tls.align;
3602 + libc.tls_size = 2*sizeof(void *) + sizeof(struct pthread)
3603 + + main_tls.size + main_tls.align
3604 + MIN_TLS_ALIGN-1 & -MIN_TLS_ALIGN;
3605
3606 if (libc.tls_size > sizeof builtin_tls) {
3607 @@ -117,6 +127,5 @@ void __init_tls(size_t *aux)
3608 if (__init_tp(__copy_tls(mem)) < 0)
3609 a_crash();
3610 }
3611 -#else
3612 -void __init_tls(size_t *auxv) { }
3613 -#endif
3614 +
3615 +weak_alias(static_init_tls, __init_tls);
3616 --- a/src/env/__libc_start_main.c
3617 +++ b/src/env/__libc_start_main.c
3618 @@ -8,21 +8,17 @@
3619
3620 void __init_tls(size_t *);
3621
3622 -#ifndef SHARED
3623 -static void dummy() {}
3624 +static void dummy(void) {}
3625 weak_alias(dummy, _init);
3626 -extern void (*const __init_array_start)() __attribute__((weak));
3627 -extern void (*const __init_array_end)() __attribute__((weak));
3628 -#endif
3629 +
3630 +__attribute__((__weak__, __visibility__("hidden")))
3631 +extern void (*const __init_array_start)(void), (*const __init_array_end)(void);
3632
3633 static void dummy1(void *p) {}
3634 weak_alias(dummy1, __init_ssp);
3635
3636 #define AUX_CNT 38
3637
3638 -#ifndef SHARED
3639 -static
3640 -#endif
3641 void __init_libc(char **envp, char *pn)
3642 {
3643 size_t i, *auxv, aux[AUX_CNT] = { 0 };
3644 @@ -57,20 +53,22 @@ void __init_libc(char **envp, char *pn)
3645 libc.secure = 1;
3646 }
3647
3648 -int __libc_start_main(int (*main)(int,char **,char **), int argc, char **argv)
3649 +static void libc_start_init(void)
3650 {
3651 - char **envp = argv+argc+1;
3652 -
3653 -#ifndef SHARED
3654 - __init_libc(envp, argv[0]);
3655 _init();
3656 uintptr_t a = (uintptr_t)&__init_array_start;
3657 for (; a<(uintptr_t)&__init_array_end; a+=sizeof(void(*)()))
3658 (*(void (**)())a)();
3659 -#else
3660 - void __libc_start_init(void);
3661 +}
3662 +
3663 +weak_alias(libc_start_init, __libc_start_init);
3664 +
3665 +int __libc_start_main(int (*main)(int,char **,char **), int argc, char **argv)
3666 +{
3667 + char **envp = argv+argc+1;
3668 +
3669 + __init_libc(envp, argv[0]);
3670 __libc_start_init();
3671 -#endif
3672
3673 /* Pass control to the application */
3674 exit(main(argc, argv, envp));
3675 --- a/src/env/__reset_tls.c
3676 +++ b/src/env/__reset_tls.c
3677 @@ -1,21 +1,16 @@
3678 -#ifndef SHARED
3679 -
3680 #include <string.h>
3681 #include "pthread_impl.h"
3682 -
3683 -extern struct tls_image {
3684 - void *image;
3685 - size_t len, size, align;
3686 -} __static_tls;
3687 -
3688 -#define T __static_tls
3689 +#include "libc.h"
3690
3691 void __reset_tls()
3692 {
3693 - if (!T.size) return;
3694 pthread_t self = __pthread_self();
3695 - memcpy(self->dtv[1], T.image, T.len);
3696 - memset((char *)self->dtv[1]+T.len, 0, T.size-T.len);
3697 + struct tls_module *p;
3698 + size_t i, n = (size_t)self->dtv[0];
3699 + if (n) for (p=libc.tls_head, i=1; i<=n; i++, p=p->next) {
3700 + if (!self->dtv[i]) continue;
3701 + memcpy(self->dtv[i], p->image, p->len);
3702 + memset((char *)self->dtv[i]+p->len, 0,
3703 + p->size - p->len);
3704 + }
3705 }
3706 -
3707 -#endif
3708 --- a/src/env/__stack_chk_fail.c
3709 +++ b/src/env/__stack_chk_fail.c
3710 @@ -17,16 +17,7 @@ void __stack_chk_fail(void)
3711 a_crash();
3712 }
3713
3714 -#ifdef SHARED
3715 -
3716 __attribute__((__visibility__("hidden")))
3717 -void __stack_chk_fail_local(void)
3718 -{
3719 - a_crash();
3720 -}
3721 -
3722 -#else
3723 +void __stack_chk_fail_local(void);
3724
3725 weak_alias(__stack_chk_fail, __stack_chk_fail_local);
3726 -
3727 -#endif
3728 --- /dev/null
3729 +++ b/src/exit/arm/__aeabi_atexit.c
3730 @@ -0,0 +1,6 @@
3731 +int __cxa_atexit(void (*func)(void *), void *arg, void *dso);
3732 +
3733 +int __aeabi_atexit (void *obj, void (*func) (void *), void *d)
3734 +{
3735 + return __cxa_atexit (func, obj, d);
3736 +}
3737 --- a/src/exit/exit.c
3738 +++ b/src/exit/exit.c
3739 @@ -10,25 +10,25 @@ static void dummy()
3740 * as a consequence of linking either __toread.c or __towrite.c. */
3741 weak_alias(dummy, __funcs_on_exit);
3742 weak_alias(dummy, __stdio_exit);
3743 -
3744 -#ifndef SHARED
3745 weak_alias(dummy, _fini);
3746 -extern void (*const __fini_array_start)() __attribute__((weak));
3747 -extern void (*const __fini_array_end)() __attribute__((weak));
3748 -#endif
3749
3750 -_Noreturn void exit(int code)
3751 -{
3752 - __funcs_on_exit();
3753 +__attribute__((__weak__, __visibility__("hidden")))
3754 +extern void (*const __fini_array_start)(void), (*const __fini_array_end)(void);
3755
3756 -#ifndef SHARED
3757 +static void libc_exit_fini(void)
3758 +{
3759 uintptr_t a = (uintptr_t)&__fini_array_end;
3760 for (; a>(uintptr_t)&__fini_array_start; a-=sizeof(void(*)()))
3761 (*(void (**)())(a-sizeof(void(*)())))();
3762 _fini();
3763 -#endif
3764 +}
3765
3766 - __stdio_exit();
3767 +weak_alias(libc_exit_fini, __libc_exit_fini);
3768
3769 +_Noreturn void exit(int code)
3770 +{
3771 + __funcs_on_exit();
3772 + __libc_exit_fini();
3773 + __stdio_exit();
3774 _Exit(code);
3775 }
3776 --- /dev/null
3777 +++ b/src/fenv/arm/fenv-hf.S
3778 @@ -0,0 +1,69 @@
3779 +#if __ARM_PCS_VFP
3780 +
3781 +.syntax unified
3782 +.fpu vfp
3783 +
3784 +.global fegetround
3785 +.type fegetround,%function
3786 +fegetround:
3787 + fmrx r0, fpscr
3788 + and r0, r0, #0xc00000
3789 + bx lr
3790 +
3791 +.global __fesetround
3792 +.type __fesetround,%function
3793 +__fesetround:
3794 + fmrx r3, fpscr
3795 + bic r3, r3, #0xc00000
3796 + orr r3, r3, r0
3797 + fmxr fpscr, r3
3798 + mov r0, #0
3799 + bx lr
3800 +
3801 +.global fetestexcept
3802 +.type fetestexcept,%function
3803 +fetestexcept:
3804 + and r0, r0, #0x1f
3805 + fmrx r3, fpscr
3806 + and r0, r0, r3
3807 + bx lr
3808 +
3809 +.global feclearexcept
3810 +.type feclearexcept,%function
3811 +feclearexcept:
3812 + and r0, r0, #0x1f
3813 + fmrx r3, fpscr
3814 + bic r3, r3, r0
3815 + fmxr fpscr, r3
3816 + mov r0, #0
3817 + bx lr
3818 +
3819 +.global feraiseexcept
3820 +.type feraiseexcept,%function
3821 +feraiseexcept:
3822 + and r0, r0, #0x1f
3823 + fmrx r3, fpscr
3824 + orr r3, r3, r0
3825 + fmxr fpscr, r3
3826 + mov r0, #0
3827 + bx lr
3828 +
3829 +.global fegetenv
3830 +.type fegetenv,%function
3831 +fegetenv:
3832 + fmrx r3, fpscr
3833 + str r3, [r0]
3834 + mov r0, #0
3835 + bx lr
3836 +
3837 +.global fesetenv
3838 +.type fesetenv,%function
3839 +fesetenv:
3840 + cmn r0, #1
3841 + moveq r3, #0
3842 + ldrne r3, [r0]
3843 + fmxr fpscr, r3
3844 + mov r0, #0
3845 + bx lr
3846 +
3847 +#endif
3848 --- /dev/null
3849 +++ b/src/fenv/arm/fenv.c
3850 @@ -0,0 +1,3 @@
3851 +#if !__ARM_PCS_VFP
3852 +#include "../fenv.c"
3853 +#endif
3854 --- a/src/fenv/armebhf/fenv.sub
3855 +++ /dev/null
3856 @@ -1 +0,0 @@
3857 -../armhf/fenv.s
3858 --- a/src/fenv/armhf/fenv.s
3859 +++ /dev/null
3860 @@ -1,64 +0,0 @@
3861 -.fpu vfp
3862 -
3863 -.global fegetround
3864 -.type fegetround,%function
3865 -fegetround:
3866 - mrc p10, 7, r0, cr1, cr0, 0
3867 - and r0, r0, #0xc00000
3868 - bx lr
3869 -
3870 -.global __fesetround
3871 -.type __fesetround,%function
3872 -__fesetround:
3873 - mrc p10, 7, r3, cr1, cr0, 0
3874 - bic r3, r3, #0xc00000
3875 - orr r3, r3, r0
3876 - mcr p10, 7, r3, cr1, cr0, 0
3877 - mov r0, #0
3878 - bx lr
3879 -
3880 -.global fetestexcept
3881 -.type fetestexcept,%function
3882 -fetestexcept:
3883 - and r0, r0, #0x1f
3884 - mrc p10, 7, r3, cr1, cr0, 0
3885 - and r0, r0, r3
3886 - bx lr
3887 -
3888 -.global feclearexcept
3889 -.type feclearexcept,%function
3890 -feclearexcept:
3891 - and r0, r0, #0x1f
3892 - mrc p10, 7, r3, cr1, cr0, 0
3893 - bic r3, r3, r0
3894 - mcr p10, 7, r3, cr1, cr0, 0
3895 - mov r0, #0
3896 - bx lr
3897 -
3898 -.global feraiseexcept
3899 -.type feraiseexcept,%function
3900 -feraiseexcept:
3901 - and r0, r0, #0x1f
3902 - mrc p10, 7, r3, cr1, cr0, 0
3903 - orr r3, r3, r0
3904 - mcr p10, 7, r3, cr1, cr0, 0
3905 - mov r0, #0
3906 - bx lr
3907 -
3908 -.global fegetenv
3909 -.type fegetenv,%function
3910 -fegetenv:
3911 - mrc p10, 7, r3, cr1, cr0, 0
3912 - str r3, [r0]
3913 - mov r0, #0
3914 - bx lr
3915 -
3916 -.global fesetenv
3917 -.type fesetenv,%function
3918 -fesetenv:
3919 - cmn r0, #1
3920 - moveq r3, #0
3921 - ldrne r3, [r0]
3922 - mcr p10, 7, r3, cr1, cr0, 0
3923 - mov r0, #0
3924 - bx lr
3925 --- a/src/fenv/armhf/fenv.sub
3926 +++ /dev/null
3927 @@ -1 +0,0 @@
3928 -fenv.s
3929 --- a/src/fenv/mips-sf/fenv.sub
3930 +++ /dev/null
3931 @@ -1 +0,0 @@
3932 -../fenv.c
3933 --- /dev/null
3934 +++ b/src/fenv/mips/fenv-sf.c
3935 @@ -0,0 +1,3 @@
3936 +#ifdef __mips_soft_float
3937 +#include "../fenv.c"
3938 +#endif
3939 --- /dev/null
3940 +++ b/src/fenv/mips/fenv.S
3941 @@ -0,0 +1,71 @@
3942 +#ifndef __mips_soft_float
3943 +
3944 +.set noreorder
3945 +
3946 +.global feclearexcept
3947 +.type feclearexcept,@function
3948 +feclearexcept:
3949 + and $4, $4, 0x7c
3950 + cfc1 $5, $31
3951 + or $5, $5, $4
3952 + xor $5, $5, $4
3953 + ctc1 $5, $31
3954 + jr $ra
3955 + li $2, 0
3956 +
3957 +.global feraiseexcept
3958 +.type feraiseexcept,@function
3959 +feraiseexcept:
3960 + and $4, $4, 0x7c
3961 + cfc1 $5, $31
3962 + or $5, $5, $4
3963 + ctc1 $5, $31
3964 + jr $ra
3965 + li $2, 0
3966 +
3967 +.global fetestexcept
3968 +.type fetestexcept,@function
3969 +fetestexcept:
3970 + and $4, $4, 0x7c
3971 + cfc1 $2, $31
3972 + jr $ra
3973 + and $2, $2, $4
3974 +
3975 +.global fegetround
3976 +.type fegetround,@function
3977 +fegetround:
3978 + cfc1 $2, $31
3979 + jr $ra
3980 + andi $2, $2, 3
3981 +
3982 +.global __fesetround
3983 +.type __fesetround,@function
3984 +__fesetround:
3985 + cfc1 $5, $31
3986 + li $6, -4
3987 + and $5, $5, $6
3988 + or $5, $5, $4
3989 + ctc1 $5, $31
3990 + jr $ra
3991 + li $2, 0
3992 +
3993 +.global fegetenv
3994 +.type fegetenv,@function
3995 +fegetenv:
3996 + cfc1 $5, $31
3997 + sw $5, 0($4)
3998 + jr $ra
3999 + li $2, 0
4000 +
4001 +.global fesetenv
4002 +.type fesetenv,@function
4003 +fesetenv:
4004 + addiu $5, $4, 1
4005 + beq $5, $0, 1f
4006 + nop
4007 + lw $5, 0($4)
4008 +1: ctc1 $5, $31
4009 + jr $ra
4010 + li $2, 0
4011 +
4012 +#endif
4013 --- a/src/fenv/mips/fenv.s
4014 +++ /dev/null
4015 @@ -1,67 +0,0 @@
4016 -.set noreorder
4017 -
4018 -.global feclearexcept
4019 -.type feclearexcept,@function
4020 -feclearexcept:
4021 - and $4, $4, 0x7c
4022 - cfc1 $5, $31
4023 - or $5, $5, $4
4024 - xor $5, $5, $4
4025 - ctc1 $5, $31
4026 - jr $ra
4027 - li $2, 0
4028 -
4029 -.global feraiseexcept
4030 -.type feraiseexcept,@function
4031 -feraiseexcept:
4032 - and $4, $4, 0x7c
4033 - cfc1 $5, $31
4034 - or $5, $5, $4
4035 - ctc1 $5, $31
4036 - jr $ra
4037 - li $2, 0
4038 -
4039 -.global fetestexcept
4040 -.type fetestexcept,@function
4041 -fetestexcept:
4042 - and $4, $4, 0x7c
4043 - cfc1 $2, $31
4044 - jr $ra
4045 - and $2, $2, $4
4046 -
4047 -.global fegetround
4048 -.type fegetround,@function
4049 -fegetround:
4050 - cfc1 $2, $31
4051 - jr $ra
4052 - andi $2, $2, 3
4053 -
4054 -.global __fesetround
4055 -.type __fesetround,@function
4056 -__fesetround:
4057 - cfc1 $5, $31
4058 - li $6, -4
4059 - and $5, $5, $6
4060 - or $5, $5, $4
4061 - ctc1 $5, $31
4062 - jr $ra
4063 - li $2, 0
4064 -
4065 -.global fegetenv
4066 -.type fegetenv,@function
4067 -fegetenv:
4068 - cfc1 $5, $31
4069 - sw $5, 0($4)
4070 - jr $ra
4071 - li $2, 0
4072 -
4073 -.global fesetenv
4074 -.type fesetenv,@function
4075 -fesetenv:
4076 - addiu $5, $4, 1
4077 - beq $5, $0, 1f
4078 - nop
4079 - lw $5, 0($4)
4080 -1: ctc1 $5, $31
4081 - jr $ra
4082 - li $2, 0
4083 --- a/src/fenv/mipsel-sf/fenv.sub
4084 +++ /dev/null
4085 @@ -1 +0,0 @@
4086 -../fenv.c
4087 --- a/src/fenv/sh-nofpu/fenv.sub
4088 +++ /dev/null
4089 @@ -1 +0,0 @@
4090 -../fenv.c
4091 --- /dev/null
4092 +++ b/src/fenv/sh/fenv-nofpu.c
4093 @@ -0,0 +1,3 @@
4094 +#if !__SH_FPU_ANY__ && !__SH4__
4095 +#include "../fenv.c"
4096 +#endif
4097 --- /dev/null
4098 +++ b/src/fenv/sh/fenv.S
4099 @@ -0,0 +1,78 @@
4100 +#if __SH_FPU_ANY__ || __SH4__
4101 +
4102 +.global fegetround
4103 +.type fegetround, @function
4104 +fegetround:
4105 + sts fpscr, r0
4106 + rts
4107 + and #3, r0
4108 +
4109 +.global __fesetround
4110 +.type __fesetround, @function
4111 +__fesetround:
4112 + sts fpscr, r0
4113 + or r4, r0
4114 + lds r0, fpscr
4115 + rts
4116 + mov #0, r0
4117 +
4118 +.global fetestexcept
4119 +.type fetestexcept, @function
4120 +fetestexcept:
4121 + sts fpscr, r0
4122 + and r4, r0
4123 + rts
4124 + and #0x7c, r0
4125 +
4126 +.global feclearexcept
4127 +.type feclearexcept, @function
4128 +feclearexcept:
4129 + mov r4, r0
4130 + and #0x7c, r0
4131 + not r0, r4
4132 + sts fpscr, r0
4133 + and r4, r0
4134 + lds r0, fpscr
4135 + rts
4136 + mov #0, r0
4137 +
4138 +.global feraiseexcept
4139 +.type feraiseexcept, @function
4140 +feraiseexcept:
4141 + mov r4, r0
4142 + and #0x7c, r0
4143 + sts fpscr, r4
4144 + or r4, r0
4145 + lds r0, fpscr
4146 + rts
4147 + mov #0, r0
4148 +
4149 +.global fegetenv
4150 +.type fegetenv, @function
4151 +fegetenv:
4152 + sts fpscr, r0
4153 + mov.l r0, @r4
4154 + rts
4155 + mov #0, r0
4156 +
4157 +.global fesetenv
4158 +.type fesetenv, @function
4159 +fesetenv:
4160 + mov r4, r0
4161 + cmp/eq #-1, r0
4162 + bf 1f
4163 +
4164 + ! the default environment is complicated by the fact that we need to
4165 + ! preserve the current precision bit, which we do not know a priori
4166 + sts fpscr, r0
4167 + mov #8, r1
4168 + swap.w r1, r1
4169 + bra 2f
4170 + and r1, r0
4171 +
4172 +1: mov.l @r4, r0 ! non-default environment
4173 +2: lds r0, fpscr
4174 + rts
4175 + mov #0, r0
4176 +
4177 +#endif
4178 --- a/src/fenv/sh/fenv.s
4179 +++ /dev/null
4180 @@ -1,74 +0,0 @@
4181 -.global fegetround
4182 -.type fegetround, @function
4183 -fegetround:
4184 - sts fpscr, r0
4185 - rts
4186 - and #3, r0
4187 -
4188 -.global __fesetround
4189 -.type __fesetround, @function
4190 -__fesetround:
4191 - sts fpscr, r0
4192 - or r4, r0
4193 - lds r0, fpscr
4194 - rts
4195 - mov #0, r0
4196 -
4197 -.global fetestexcept
4198 -.type fetestexcept, @function
4199 -fetestexcept:
4200 - sts fpscr, r0
4201 - and r4, r0
4202 - rts
4203 - and #0x7c, r0
4204 -
4205 -.global feclearexcept
4206 -.type feclearexcept, @function
4207 -feclearexcept:
4208 - mov r4, r0
4209 - and #0x7c, r0
4210 - not r0, r4
4211 - sts fpscr, r0
4212 - and r4, r0
4213 - lds r0, fpscr
4214 - rts
4215 - mov #0, r0
4216 -
4217 -.global feraiseexcept
4218 -.type feraiseexcept, @function
4219 -feraiseexcept:
4220 - mov r4, r0
4221 - and #0x7c, r0
4222 - sts fpscr, r4
4223 - or r4, r0
4224 - lds r0, fpscr
4225 - rts
4226 - mov #0, r0
4227 -
4228 -.global fegetenv
4229 -.type fegetenv, @function
4230 -fegetenv:
4231 - sts fpscr, r0
4232 - mov.l r0, @r4
4233 - rts
4234 - mov #0, r0
4235 -
4236 -.global fesetenv
4237 -.type fesetenv, @function
4238 -fesetenv:
4239 - mov r4, r0
4240 - cmp/eq #-1, r0
4241 - bf 1f
4242 -
4243 - ! the default environment is complicated by the fact that we need to
4244 - ! preserve the current precision bit, which we do not know a priori
4245 - sts fpscr, r0
4246 - mov #8, r1
4247 - swap.w r1, r1
4248 - bra 2f
4249 - and r1, r0
4250 -
4251 -1: mov.l @r4, r0 ! non-default environment
4252 -2: lds r0, fpscr
4253 - rts
4254 - mov #0, r0
4255 --- a/src/fenv/sheb-nofpu/fenv.sub
4256 +++ /dev/null
4257 @@ -1 +0,0 @@
4258 -../fenv.c
4259 --- a/src/internal/arm/syscall.s
4260 +++ b/src/internal/arm/syscall.s
4261 @@ -1,3 +1,4 @@
4262 +.syntax unified
4263 .global __syscall
4264 .hidden __syscall
4265 .type __syscall,%function
4266 @@ -11,6 +12,4 @@ __syscall:
4267 ldmfd ip,{r3,r4,r5,r6}
4268 svc 0
4269 ldmfd sp!,{r4,r5,r6,r7}
4270 - tst lr,#1
4271 - moveq pc,lr
4272 bx lr
4273 --- /dev/null
4274 +++ b/src/internal/atomic.h
4275 @@ -0,0 +1,275 @@
4276 +#ifndef _ATOMIC_H
4277 +#define _ATOMIC_H
4278 +
4279 +#include <stdint.h>
4280 +
4281 +#include "atomic_arch.h"
4282 +
4283 +#ifdef a_ll
4284 +
4285 +#ifndef a_pre_llsc
4286 +#define a_pre_llsc()
4287 +#endif
4288 +
4289 +#ifndef a_post_llsc
4290 +#define a_post_llsc()
4291 +#endif
4292 +
4293 +#ifndef a_cas
4294 +#define a_cas a_cas
4295 +static inline int a_cas(volatile int *p, int t, int s)
4296 +{
4297 + int old;
4298 + a_pre_llsc();
4299 + do old = a_ll(p);
4300 + while (old==t && !a_sc(p, s));
4301 + a_post_llsc();
4302 + return old;
4303 +}
4304 +#endif
4305 +
4306 +#ifndef a_swap
4307 +#define a_swap a_swap
4308 +static inline int a_swap(volatile int *p, int v)
4309 +{
4310 + int old;
4311 + a_pre_llsc();
4312 + do old = a_ll(p);
4313 + while (!a_sc(p, v));
4314 + a_post_llsc();
4315 + return old;
4316 +}
4317 +#endif
4318 +
4319 +#ifndef a_fetch_add
4320 +#define a_fetch_add a_fetch_add
4321 +static inline int a_fetch_add(volatile int *p, int v)
4322 +{
4323 + int old;
4324 + a_pre_llsc();
4325 + do old = a_ll(p);
4326 + while (!a_sc(p, (unsigned)old + v));
4327 + a_post_llsc();
4328 + return old;
4329 +}
4330 +#endif
4331 +
4332 +#ifndef a_fetch_and
4333 +#define a_fetch_and a_fetch_and
4334 +static inline int a_fetch_and(volatile int *p, int v)
4335 +{
4336 + int old;
4337 + a_pre_llsc();
4338 + do old = a_ll(p);
4339 + while (!a_sc(p, old & v));
4340 + a_post_llsc();
4341 + return old;
4342 +}
4343 +#endif
4344 +
4345 +#ifndef a_fetch_or
4346 +#define a_fetch_or a_fetch_or
4347 +static inline int a_fetch_or(volatile int *p, int v)
4348 +{
4349 + int old;
4350 + a_pre_llsc();
4351 + do old = a_ll(p);
4352 + while (!a_sc(p, old | v));
4353 + a_post_llsc();
4354 + return old;
4355 +}
4356 +#endif
4357 +
4358 +#endif
4359 +
4360 +#ifndef a_cas
4361 +#error missing definition of a_cas
4362 +#endif
4363 +
4364 +#ifndef a_swap
4365 +#define a_swap a_swap
4366 +static inline int a_swap(volatile int *p, int v)
4367 +{
4368 + int old;
4369 + do old = *p;
4370 + while (a_cas(p, old, v) != old);
4371 + return old;
4372 +}
4373 +#endif
4374 +
4375 +#ifndef a_fetch_add
4376 +#define a_fetch_add a_fetch_add
4377 +static inline int a_fetch_add(volatile int *p, int v)
4378 +{
4379 + int old;
4380 + do old = *p;
4381 + while (a_cas(p, old, (unsigned)old+v) != old);
4382 + return old;
4383 +}
4384 +#endif
4385 +
4386 +#ifndef a_fetch_and
4387 +#define a_fetch_and a_fetch_and
4388 +static inline int a_fetch_and(volatile int *p, int v)
4389 +{
4390 + int old;
4391 + do old = *p;
4392 + while (a_cas(p, old, old&v) != old);
4393 + return old;
4394 +}
4395 +#endif
4396 +#ifndef a_fetch_or
4397 +#define a_fetch_or a_fetch_or
4398 +static inline int a_fetch_or(volatile int *p, int v)
4399 +{
4400 + int old;
4401 + do old = *p;
4402 + while (a_cas(p, old, old|v) != old);
4403 + return old;
4404 +}
4405 +#endif
4406 +
4407 +#ifndef a_and
4408 +#define a_and a_and
4409 +static inline void a_and(volatile int *p, int v)
4410 +{
4411 + a_fetch_and(p, v);
4412 +}
4413 +#endif
4414 +
4415 +#ifndef a_or
4416 +#define a_or a_or
4417 +static inline void a_or(volatile int *p, int v)
4418 +{
4419 + a_fetch_or(p, v);
4420 +}
4421 +#endif
4422 +
4423 +#ifndef a_inc
4424 +#define a_inc a_inc
4425 +static inline void a_inc(volatile int *p)
4426 +{
4427 + a_fetch_add(p, 1);
4428 +}
4429 +#endif
4430 +
4431 +#ifndef a_dec
4432 +#define a_dec a_dec
4433 +static inline void a_dec(volatile int *p)
4434 +{
4435 + a_fetch_add(p, -1);
4436 +}
4437 +#endif
4438 +
4439 +#ifndef a_store
4440 +#define a_store a_store
4441 +static inline void a_store(volatile int *p, int v)
4442 +{
4443 +#ifdef a_barrier
4444 + a_barrier();
4445 + *p = v;
4446 + a_barrier();
4447 +#else
4448 + a_swap(p, v);
4449 +#endif
4450 +}
4451 +#endif
4452 +
4453 +#ifndef a_barrier
4454 +#define a_barrier a_barrier
4455 +static void a_barrier()
4456 +{
4457 + volatile int tmp = 0;
4458 + a_cas(&tmp, 0, 0);
4459 +}
4460 +#endif
4461 +
4462 +#ifndef a_spin
4463 +#define a_spin a_barrier
4464 +#endif
4465 +
4466 +#ifndef a_and_64
4467 +#define a_and_64 a_and_64
4468 +static inline void a_and_64(volatile uint64_t *p, uint64_t v)
4469 +{
4470 + union { uint64_t v; uint32_t r[2]; } u = { v };
4471 + if (u.r[0]+1) a_and((int *)p, u.r[0]);
4472 + if (u.r[1]+1) a_and((int *)p+1, u.r[1]);
4473 +}
4474 +#endif
4475 +
4476 +#ifndef a_or_64
4477 +#define a_or_64 a_or_64
4478 +static inline void a_or_64(volatile uint64_t *p, uint64_t v)
4479 +{
4480 + union { uint64_t v; uint32_t r[2]; } u = { v };
4481 + if (u.r[0]) a_or((int *)p, u.r[0]);
4482 + if (u.r[1]) a_or((int *)p+1, u.r[1]);
4483 +}
4484 +#endif
4485 +
4486 +#ifndef a_cas_p
4487 +#define a_cas_p a_cas_p
4488 +static inline void *a_cas_p(volatile void *p, void *t, void *s)
4489 +{
4490 + return (void *)a_cas((volatile int *)p, (int)t, (int)s);
4491 +}
4492 +#endif
4493 +
4494 +#ifndef a_or_l
4495 +#define a_or_l a_or_l
4496 +static inline void a_or_l(volatile void *p, long v)
4497 +{
4498 + if (sizeof(long) == sizeof(int)) a_or(p, v);
4499 + else a_or_64(p, v);
4500 +}
4501 +#endif
4502 +
4503 +#ifndef a_crash
4504 +#define a_crash a_crash
4505 +static inline void a_crash()
4506 +{
4507 + *(volatile char *)0=0;
4508 +}
4509 +#endif
4510 +
4511 +#ifndef a_ctz_64
4512 +#define a_ctz_64 a_ctz_64
4513 +static inline int a_ctz_64(uint64_t x)
4514 +{
4515 + static const char debruijn64[64] = {
4516 + 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28,
4517 + 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11,
4518 + 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10,
4519 + 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12
4520 + };
4521 + static const char debruijn32[32] = {
4522 + 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
4523 + 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
4524 + };
4525 + if (sizeof(long) < 8) {
4526 + uint32_t y = x;
4527 + if (!y) {
4528 + y = x>>32;
4529 + return 32 + debruijn32[(y&-y)*0x076be629 >> 27];
4530 + }
4531 + return debruijn32[(y&-y)*0x076be629 >> 27];
4532 + }
4533 + return debruijn64[(x&-x)*0x022fdd63cc95386dull >> 58];
4534 +}
4535 +#endif
4536 +
4537 +#ifndef a_ctz_l
4538 +#define a_ctz_l a_ctz_l
4539 +static inline int a_ctz_l(unsigned long x)
4540 +{
4541 + static const char debruijn32[32] = {
4542 + 0, 1, 23, 2, 29, 24, 19, 3, 30, 27, 25, 11, 20, 8, 4, 13,
4543 + 31, 22, 28, 18, 26, 10, 7, 12, 21, 17, 9, 6, 16, 5, 15, 14
4544 + };
4545 + if (sizeof(long) == 8) return a_ctz_64(x);
4546 + return debruijn32[(x&-x)*0x076be629 >> 27];
4547 +}
4548 +#endif
4549 +
4550 +#endif
4551 --- a/src/internal/dynlink.h
4552 +++ b/src/internal/dynlink.h
4553 @@ -64,6 +64,10 @@ struct fdpic_dummy_loadmap {
4554 #define DL_FDPIC 0
4555 #endif
4556
4557 +#ifndef DL_NOMMU_SUPPORT
4558 +#define DL_NOMMU_SUPPORT 0
4559 +#endif
4560 +
4561 #if !DL_FDPIC
4562 #define IS_RELATIVE(x,s) ( \
4563 (R_TYPE(x) == REL_RELATIVE) || \
4564 --- a/src/internal/libc.h
4565 +++ b/src/internal/libc.h
4566 @@ -11,13 +11,20 @@ struct __locale_struct {
4567 const struct __locale_map *volatile cat[6];
4568 };
4569
4570 +struct tls_module {
4571 + struct tls_module *next;
4572 + void *image;
4573 + size_t len, size, align, offset;
4574 +};
4575 +
4576 struct __libc {
4577 int can_do_threads;
4578 int threaded;
4579 int secure;
4580 volatile int threads_minus_1;
4581 size_t *auxv;
4582 - size_t tls_size;
4583 + struct tls_module *tls_head;
4584 + size_t tls_size, tls_align, tls_cnt;
4585 size_t page_size;
4586 struct __locale_struct global_locale;
4587 };
4588 --- a/src/internal/syscall.h
4589 +++ b/src/internal/syscall.h
4590 @@ -17,9 +17,7 @@
4591 typedef long syscall_arg_t;
4592 #endif
4593
4594 -#ifdef SHARED
4595 __attribute__((visibility("hidden")))
4596 -#endif
4597 long __syscall_ret(unsigned long), __syscall(syscall_arg_t, ...),
4598 __syscall_cp(syscall_arg_t, syscall_arg_t, syscall_arg_t, syscall_arg_t,
4599 syscall_arg_t, syscall_arg_t, syscall_arg_t);
4600 --- a/src/internal/version.c
4601 +++ b/src/internal/version.c
4602 @@ -1,12 +1,9 @@
4603 -#ifdef SHARED
4604 -
4605 #include "version.h"
4606
4607 static const char version[] = VERSION;
4608
4609 +__attribute__((__visibility__("hidden")))
4610 const char *__libc_get_version()
4611 {
4612 return version;
4613 }
4614 -
4615 -#endif
4616 --- a/src/internal/vis.h
4617 +++ b/src/internal/vis.h
4618 @@ -4,10 +4,9 @@
4619 * override default visibilities to reduce the size and performance costs
4620 * of position-independent code. */
4621
4622 -#ifndef CRT
4623 -#ifdef SHARED
4624 +#if !defined(CRT) && !defined(__ASSEMBLER__)
4625
4626 -/* For shared libc.so, all symbols should be protected, but some toolchains
4627 +/* Conceptually, all symbols should be protected, but some toolchains
4628 * fail to support copy relocations for protected data, so exclude all
4629 * exported data symbols. */
4630
4631 @@ -25,16 +24,4 @@ extern char *optarg, **environ, **__envi
4632
4633 #pragma GCC visibility push(protected)
4634
4635 -#elif defined(__PIC__)
4636 -
4637 -/* If building static libc.a as position-independent code, try to make
4638 - * everything hidden except possibly-undefined weak references. */
4639 -
4640 -__attribute__((__visibility__("default")))
4641 -extern void (*const __init_array_start)(), (*const __init_array_end)(),
4642 - (*const __fini_array_start)(), (*const __fini_array_end)();
4643 -
4644 -#pragma GCC visibility push(hidden)
4645 -
4646 -#endif
4647 #endif
4648 --- a/src/ldso/arm/dlsym.s
4649 +++ b/src/ldso/arm/dlsym.s
4650 @@ -1,3 +1,4 @@
4651 +.syntax unified
4652 .text
4653 .global dlsym
4654 .hidden __dlsym
4655 --- /dev/null
4656 +++ b/src/ldso/arm/find_exidx.c
4657 @@ -0,0 +1,42 @@
4658 +#define _GNU_SOURCE
4659 +#include <link.h>
4660 +#include <stdint.h>
4661 +
4662 +struct find_exidx_data {
4663 + uintptr_t pc, exidx_start;
4664 + int exidx_len;
4665 +};
4666 +
4667 +static int find_exidx(struct dl_phdr_info *info, size_t size, void *ptr)
4668 +{
4669 + struct find_exidx_data *data = ptr;
4670 + const ElfW(Phdr) *phdr = info->dlpi_phdr;
4671 + uintptr_t addr, exidx_start = 0;
4672 + int i, match = 0, exidx_len = 0;
4673 +
4674 + for (i = info->dlpi_phnum; i > 0; i--, phdr++) {
4675 + addr = info->dlpi_addr + phdr->p_vaddr;
4676 + switch (phdr->p_type) {
4677 + case PT_LOAD:
4678 + match |= data->pc >= addr && data->pc < addr + phdr->p_memsz;
4679 + break;
4680 + case PT_ARM_EXIDX:
4681 + exidx_start = addr;
4682 + exidx_len = phdr->p_memsz;
4683 + break;
4684 + }
4685 + }
4686 + data->exidx_start = exidx_start;
4687 + data->exidx_len = exidx_len;
4688 + return match;
4689 +}
4690 +
4691 +uintptr_t __gnu_Unwind_Find_exidx(uintptr_t pc, int *pcount)
4692 +{
4693 + struct find_exidx_data data;
4694 + data.pc = pc;
4695 + if (dl_iterate_phdr(find_exidx, &data) <= 0)
4696 + return 0;
4697 + *pcount = data.exidx_len / 8;
4698 + return data.exidx_start;
4699 +}
4700 --- a/src/ldso/dynlink.c
4701 +++ b/src/ldso/dynlink.c
4702 @@ -70,8 +70,8 @@ struct dso {
4703 char kernel_mapped;
4704 struct dso **deps, *needed_by;
4705 char *rpath_orig, *rpath;
4706 - void *tls_image;
4707 - size_t tls_len, tls_size, tls_align, tls_id, tls_offset;
4708 + struct tls_module tls;
4709 + size_t tls_id;
4710 size_t relro_start, relro_end;
4711 void **new_dtv;
4712 unsigned char *new_tls;
4713 @@ -99,7 +99,9 @@ struct symdef {
4714
4715 int __init_tp(void *);
4716 void __init_libc(char **, char *);
4717 +void *__copy_tls(unsigned char *);
4718
4719 +__attribute__((__visibility__("hidden")))
4720 const char *__libc_get_version(void);
4721
4722 static struct builtin_tls {
4723 @@ -123,6 +125,7 @@ static int noload;
4724 static jmp_buf *rtld_fail;
4725 static pthread_rwlock_t lock;
4726 static struct debug debug;
4727 +static struct tls_module *tls_tail;
4728 static size_t tls_cnt, tls_offset, tls_align = MIN_TLS_ALIGN;
4729 static size_t static_tls_cnt;
4730 static pthread_mutex_t init_fini_lock = { ._m_type = PTHREAD_MUTEX_RECURSIVE };
4731 @@ -131,6 +134,15 @@ static struct fdpic_dummy_loadmap app_du
4732
4733 struct debug *_dl_debug_addr = &debug;
4734
4735 +__attribute__((__visibility__("hidden")))
4736 +void (*const __init_array_start)(void)=0, (*const __fini_array_start)(void)=0;
4737 +
4738 +__attribute__((__visibility__("hidden")))
4739 +extern void (*const __init_array_end)(void), (*const __fini_array_end)(void);
4740 +
4741 +weak_alias(__init_array_start, __init_array_end);
4742 +weak_alias(__fini_array_start, __fini_array_end);
4743 +
4744 static int dl_strcmp(const char *l, const char *r)
4745 {
4746 for (; *l==*r && *l; l++, r++);
4747 @@ -397,14 +409,14 @@ static void do_relocs(struct dso *dso, s
4748 break;
4749 #ifdef TLS_ABOVE_TP
4750 case REL_TPOFF:
4751 - *reloc_addr = tls_val + def.dso->tls_offset + TPOFF_K + addend;
4752 + *reloc_addr = tls_val + def.dso->tls.offset + TPOFF_K + addend;
4753 break;
4754 #else
4755 case REL_TPOFF:
4756 - *reloc_addr = tls_val - def.dso->tls_offset + addend;
4757 + *reloc_addr = tls_val - def.dso->tls.offset + addend;
4758 break;
4759 case REL_TPOFF_NEG:
4760 - *reloc_addr = def.dso->tls_offset - tls_val + addend;
4761 + *reloc_addr = def.dso->tls.offset - tls_val + addend;
4762 break;
4763 #endif
4764 case REL_TLSDESC:
4765 @@ -426,10 +438,10 @@ static void do_relocs(struct dso *dso, s
4766 } else {
4767 reloc_addr[0] = (size_t)__tlsdesc_static;
4768 #ifdef TLS_ABOVE_TP
4769 - reloc_addr[1] = tls_val + def.dso->tls_offset
4770 + reloc_addr[1] = tls_val + def.dso->tls.offset
4771 + TPOFF_K + addend;
4772 #else
4773 - reloc_addr[1] = tls_val - def.dso->tls_offset
4774 + reloc_addr[1] = tls_val - def.dso->tls.offset
4775 + addend;
4776 #endif
4777 }
4778 @@ -482,8 +494,14 @@ static void reclaim_gaps(struct dso *dso
4779
4780 static void *mmap_fixed(void *p, size_t n, int prot, int flags, int fd, off_t off)
4781 {
4782 - char *q = mmap(p, n, prot, flags, fd, off);
4783 - if (q != MAP_FAILED || errno != EINVAL) return q;
4784 + static int no_map_fixed;
4785 + char *q;
4786 + if (!no_map_fixed) {
4787 + q = mmap(p, n, prot, flags|MAP_FIXED, fd, off);
4788 + if (!DL_NOMMU_SUPPORT || q != MAP_FAILED || errno != EINVAL)
4789 + return q;
4790 + no_map_fixed = 1;
4791 + }
4792 /* Fallbacks for MAP_FIXED failure on NOMMU kernels. */
4793 if (flags & MAP_ANONYMOUS) {
4794 memset(p, 0, n);
4795 @@ -561,9 +579,9 @@ static void *map_library(int fd, struct
4796 dyn = ph->p_vaddr;
4797 } else if (ph->p_type == PT_TLS) {
4798 tls_image = ph->p_vaddr;
4799 - dso->tls_align = ph->p_align;
4800 - dso->tls_len = ph->p_filesz;
4801 - dso->tls_size = ph->p_memsz;
4802 + dso->tls.align = ph->p_align;
4803 + dso->tls.len = ph->p_filesz;
4804 + dso->tls.size = ph->p_memsz;
4805 } else if (ph->p_type == PT_GNU_RELRO) {
4806 dso->relro_start = ph->p_vaddr & -PAGE_SIZE;
4807 dso->relro_end = (ph->p_vaddr + ph->p_memsz) & -PAGE_SIZE;
4808 @@ -593,7 +611,7 @@ static void *map_library(int fd, struct
4809 ((ph->p_flags&PF_W) ? PROT_WRITE: 0) |
4810 ((ph->p_flags&PF_X) ? PROT_EXEC : 0));
4811 map = mmap(0, ph->p_memsz + (ph->p_vaddr & PAGE_SIZE-1),
4812 - prot, (prot&PROT_WRITE) ? MAP_PRIVATE : MAP_SHARED,
4813 + prot, MAP_PRIVATE,
4814 fd, ph->p_offset & -PAGE_SIZE);
4815 if (map == MAP_FAILED) {
4816 unmap_library(dso);
4817 @@ -604,6 +622,19 @@ static void *map_library(int fd, struct
4818 dso->loadmap->segs[i].p_vaddr = ph->p_vaddr;
4819 dso->loadmap->segs[i].p_memsz = ph->p_memsz;
4820 i++;
4821 + if (prot & PROT_WRITE) {
4822 + size_t brk = (ph->p_vaddr & PAGE_SIZE-1)
4823 + + ph->p_filesz;
4824 + size_t pgbrk = brk + PAGE_SIZE-1 & -PAGE_SIZE;
4825 + size_t pgend = brk + ph->p_memsz - ph->p_filesz
4826 + + PAGE_SIZE-1 & -PAGE_SIZE;
4827 + if (pgend > pgbrk && mmap_fixed(map+pgbrk,
4828 + pgend-pgbrk, prot,
4829 + MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS,
4830 + -1, off_start) == MAP_FAILED)
4831 + goto error;
4832 + memset(map + brk, 0, pgbrk-brk);
4833 + }
4834 }
4835 map = (void *)dso->loadmap->segs[0].addr;
4836 map_len = 0;
4837 @@ -618,7 +649,11 @@ static void *map_library(int fd, struct
4838 * the length of the file. This is okay because we will not
4839 * use the invalid part; we just need to reserve the right
4840 * amount of virtual address space to map over later. */
4841 - map = mmap((void *)addr_min, map_len, prot, MAP_PRIVATE, fd, off_start);
4842 + map = DL_NOMMU_SUPPORT
4843 + ? mmap((void *)addr_min, map_len, PROT_READ|PROT_WRITE|PROT_EXEC,
4844 + MAP_PRIVATE|MAP_ANONYMOUS, -1, 0)
4845 + : mmap((void *)addr_min, map_len, prot,
4846 + MAP_PRIVATE, fd, off_start);
4847 if (map==MAP_FAILED) goto error;
4848 dso->map = map;
4849 dso->map_len = map_len;
4850 @@ -643,7 +678,8 @@ static void *map_library(int fd, struct
4851 dso->phentsize = eh->e_phentsize;
4852 }
4853 /* Reuse the existing mapping for the lowest-address LOAD */
4854 - if ((ph->p_vaddr & -PAGE_SIZE) == addr_min) continue;
4855 + if ((ph->p_vaddr & -PAGE_SIZE) == addr_min && !DL_NOMMU_SUPPORT)
4856 + continue;
4857 this_min = ph->p_vaddr & -PAGE_SIZE;
4858 this_max = ph->p_vaddr+ph->p_memsz+PAGE_SIZE-1 & -PAGE_SIZE;
4859 off_start = ph->p_offset & -PAGE_SIZE;
4860 @@ -670,7 +706,7 @@ static void *map_library(int fd, struct
4861 done_mapping:
4862 dso->base = base;
4863 dso->dynv = laddr(dso, dyn);
4864 - if (dso->tls_size) dso->tls_image = laddr(dso, tls_image);
4865 + if (dso->tls.size) dso->tls.image = laddr(dso, tls_image);
4866 if (!runtime) reclaim_gaps(dso);
4867 free(allocated_buf);
4868 return map;
4869 @@ -987,8 +1023,8 @@ static struct dso *load_library(const ch
4870 * extended DTV capable of storing an additional slot for
4871 * the newly-loaded DSO. */
4872 alloc_size = sizeof *p + strlen(pathname) + 1;
4873 - if (runtime && temp_dso.tls_image) {
4874 - size_t per_th = temp_dso.tls_size + temp_dso.tls_align
4875 + if (runtime && temp_dso.tls.image) {
4876 + size_t per_th = temp_dso.tls.size + temp_dso.tls.align
4877 + sizeof(void *) * (tls_cnt+3);
4878 n_th = libc.threads_minus_1 + 1;
4879 if (n_th > SSIZE_MAX / per_th) alloc_size = SIZE_MAX;
4880 @@ -1009,22 +1045,25 @@ static struct dso *load_library(const ch
4881 strcpy(p->name, pathname);
4882 /* Add a shortname only if name arg was not an explicit pathname. */
4883 if (pathname != name) p->shortname = strrchr(p->name, '/')+1;
4884 - if (p->tls_image) {
4885 + if (p->tls.image) {
4886 p->tls_id = ++tls_cnt;
4887 - tls_align = MAXP2(tls_align, p->tls_align);
4888 + tls_align = MAXP2(tls_align, p->tls.align);
4889 #ifdef TLS_ABOVE_TP
4890 - p->tls_offset = tls_offset + ( (tls_align-1) &
4891 - -(tls_offset + (uintptr_t)p->tls_image) );
4892 - tls_offset += p->tls_size;
4893 + p->tls.offset = tls_offset + ( (tls_align-1) &
4894 + -(tls_offset + (uintptr_t)p->tls.image) );
4895 + tls_offset += p->tls.size;
4896 #else
4897 - tls_offset += p->tls_size + p->tls_align - 1;
4898 - tls_offset -= (tls_offset + (uintptr_t)p->tls_image)
4899 - & (p->tls_align-1);
4900 - p->tls_offset = tls_offset;
4901 + tls_offset += p->tls.size + p->tls.align - 1;
4902 + tls_offset -= (tls_offset + (uintptr_t)p->tls.image)
4903 + & (p->tls.align-1);
4904 + p->tls.offset = tls_offset;
4905 #endif
4906 p->new_dtv = (void *)(-sizeof(size_t) &
4907 (uintptr_t)(p->name+strlen(p->name)+sizeof(size_t)));
4908 p->new_tls = (void *)(p->new_dtv + n_th*(tls_cnt+1));
4909 + if (tls_tail) tls_tail->next = &p->tls;
4910 + else libc.tls_head = &p->tls;
4911 + tls_tail = &p->tls;
4912 }
4913
4914 tail->next = p;
4915 @@ -1151,7 +1190,7 @@ static void kernel_mapped_dso(struct dso
4916 p->kernel_mapped = 1;
4917 }
4918
4919 -static void do_fini()
4920 +void __libc_exit_fini()
4921 {
4922 struct dso *p;
4923 size_t dyn[DYN_CNT];
4924 @@ -1214,53 +1253,8 @@ static void dl_debug_state(void)
4925
4926 weak_alias(dl_debug_state, _dl_debug_state);
4927
4928 -void __reset_tls()
4929 +void __init_tls(size_t *auxv)
4930 {
4931 - pthread_t self = __pthread_self();
4932 - struct dso *p;
4933 - for (p=head; p; p=p->next) {
4934 - if (!p->tls_id || !self->dtv[p->tls_id]) continue;
4935 - memcpy(self->dtv[p->tls_id], p->tls_image, p->tls_len);
4936 - memset((char *)self->dtv[p->tls_id]+p->tls_len, 0,
4937 - p->tls_size - p->tls_len);
4938 - if (p->tls_id == (size_t)self->dtv[0]) break;
4939 - }
4940 -}
4941 -
4942 -void *__copy_tls(unsigned char *mem)
4943 -{
4944 - pthread_t td;
4945 - struct dso *p;
4946 - void **dtv;
4947 -
4948 -#ifdef TLS_ABOVE_TP
4949 - dtv = (void **)(mem + libc.tls_size) - (tls_cnt + 1);
4950 -
4951 - mem += -((uintptr_t)mem + sizeof(struct pthread)) & (tls_align-1);
4952 - td = (pthread_t)mem;
4953 - mem += sizeof(struct pthread);
4954 -
4955 - for (p=head; p; p=p->next) {
4956 - if (!p->tls_id) continue;
4957 - dtv[p->tls_id] = mem + p->tls_offset;
4958 - memcpy(dtv[p->tls_id], p->tls_image, p->tls_len);
4959 - }
4960 -#else
4961 - dtv = (void **)mem;
4962 -
4963 - mem += libc.tls_size - sizeof(struct pthread);
4964 - mem -= (uintptr_t)mem & (tls_align-1);
4965 - td = (pthread_t)mem;
4966 -
4967 - for (p=head; p; p=p->next) {
4968 - if (!p->tls_id) continue;
4969 - dtv[p->tls_id] = mem - p->tls_offset;
4970 - memcpy(dtv[p->tls_id], p->tls_image, p->tls_len);
4971 - }
4972 -#endif
4973 - dtv[0] = (void *)tls_cnt;
4974 - td->dtv = td->dtv_copy = dtv;
4975 - return td;
4976 }
4977
4978 __attribute__((__visibility__("hidden")))
4979 @@ -1286,7 +1280,7 @@ void *__tls_get_new(size_t *v)
4980 /* Get new DTV space from new DSO if needed */
4981 if (v[0] > (size_t)self->dtv[0]) {
4982 void **newdtv = p->new_dtv +
4983 - (v[0]+1)*sizeof(void *)*a_fetch_add(&p->new_dtv_idx,1);
4984 + (v[0]+1)*a_fetch_add(&p->new_dtv_idx,1);
4985 memcpy(newdtv, self->dtv,
4986 ((size_t)self->dtv[0]+1) * sizeof(void *));
4987 newdtv[0] = (void *)v[0];
4988 @@ -1297,12 +1291,12 @@ void *__tls_get_new(size_t *v)
4989 unsigned char *mem;
4990 for (p=head; ; p=p->next) {
4991 if (!p->tls_id || self->dtv[p->tls_id]) continue;
4992 - mem = p->new_tls + (p->tls_size + p->tls_align)
4993 + mem = p->new_tls + (p->tls.size + p->tls.align)
4994 * a_fetch_add(&p->new_tls_idx,1);
4995 - mem += ((uintptr_t)p->tls_image - (uintptr_t)mem)
4996 - & (p->tls_align-1);
4997 + mem += ((uintptr_t)p->tls.image - (uintptr_t)mem)
4998 + & (p->tls.align-1);
4999 self->dtv[p->tls_id] = mem;
5000 - memcpy(mem, p->tls_image, p->tls_len);
5001 + memcpy(mem, p->tls.image, p->tls.len);
5002 if (p->tls_id == v[0]) break;
5003 }
5004 __restore_sigs(&set);
5005 @@ -1311,6 +1305,8 @@ void *__tls_get_new(size_t *v)
5006
5007 static void update_tls_size()
5008 {
5009 + libc.tls_cnt = tls_cnt;
5010 + libc.tls_align = tls_align;
5011 libc.tls_size = ALIGN(
5012 (1+tls_cnt) * sizeof(void *) +
5013 tls_offset +
5014 @@ -1421,6 +1417,7 @@ _Noreturn void __dls3(size_t *sp)
5015 * use during dynamic linking. If possible it will also serve as the
5016 * thread pointer at runtime. */
5017 libc.tls_size = sizeof builtin_tls;
5018 + libc.tls_align = tls_align;
5019 if (__init_tp(__copy_tls((void *)builtin_tls)) < 0) {
5020 a_crash();
5021 }
5022 @@ -1448,13 +1445,13 @@ _Noreturn void __dls3(size_t *sp)
5023 interp_off = (size_t)phdr->p_vaddr;
5024 else if (phdr->p_type == PT_TLS) {
5025 tls_image = phdr->p_vaddr;
5026 - app.tls_len = phdr->p_filesz;
5027 - app.tls_size = phdr->p_memsz;
5028 - app.tls_align = phdr->p_align;
5029 + app.tls.len = phdr->p_filesz;
5030 + app.tls.size = phdr->p_memsz;
5031 + app.tls.align = phdr->p_align;
5032 }
5033 }
5034 if (DL_FDPIC) app.loadmap = app_loadmap;
5035 - if (app.tls_size) app.tls_image = laddr(&app, tls_image);
5036 + if (app.tls.size) app.tls.image = laddr(&app, tls_image);
5037 if (interp_off) ldso.name = laddr(&app, interp_off);
5038 if ((aux[0] & (1UL<<AT_EXECFN))
5039 && strncmp((char *)aux[AT_EXECFN], "/proc/", 6))
5040 @@ -1523,19 +1520,20 @@ _Noreturn void __dls3(size_t *sp)
5041 dprintf(1, "\t%s (%p)\n", ldso.name, ldso.base);
5042 }
5043 }
5044 - if (app.tls_size) {
5045 + if (app.tls.size) {
5046 + libc.tls_head = &app.tls;
5047 app.tls_id = tls_cnt = 1;
5048 #ifdef TLS_ABOVE_TP
5049 - app.tls_offset = 0;
5050 - tls_offset = app.tls_size
5051 - + ( -((uintptr_t)app.tls_image + app.tls_size)
5052 - & (app.tls_align-1) );
5053 + app.tls.offset = 0;
5054 + tls_offset = app.tls.size
5055 + + ( -((uintptr_t)app.tls.image + app.tls.size)
5056 + & (app.tls.align-1) );
5057 #else
5058 - tls_offset = app.tls_offset = app.tls_size
5059 - + ( -((uintptr_t)app.tls_image + app.tls_size)
5060 - & (app.tls_align-1) );
5061 + tls_offset = app.tls.offset = app.tls.size
5062 + + ( -((uintptr_t)app.tls.image + app.tls.size)
5063 + & (app.tls.align-1) );
5064 #endif
5065 - tls_align = MAXP2(tls_align, app.tls_align);
5066 + tls_align = MAXP2(tls_align, app.tls.align);
5067 }
5068 app.global = 1;
5069 decode_dyn(&app);
5070 @@ -1635,8 +1633,6 @@ _Noreturn void __dls3(size_t *sp)
5071 debug.state = 0;
5072 _dl_debug_state();
5073
5074 - __init_libc(envp, argv[0]);
5075 - atexit(do_fini);
5076 errno = 0;
5077
5078 CRTJMP((void *)aux[AT_ENTRY], argv-1);
5079 @@ -1646,6 +1642,7 @@ _Noreturn void __dls3(size_t *sp)
5080 void *dlopen(const char *file, int mode)
5081 {
5082 struct dso *volatile p, *orig_tail, *next;
5083 + struct tls_module *orig_tls_tail;
5084 size_t orig_tls_cnt, orig_tls_offset, orig_tls_align;
5085 size_t i;
5086 int cs;
5087 @@ -1658,6 +1655,7 @@ void *dlopen(const char *file, int mode)
5088 __inhibit_ptc();
5089
5090 p = 0;
5091 + orig_tls_tail = tls_tail;
5092 orig_tls_cnt = tls_cnt;
5093 orig_tls_offset = tls_offset;
5094 orig_tls_align = tls_align;
5095 @@ -1684,6 +1682,8 @@ void *dlopen(const char *file, int mode)
5096 unmap_library(p);
5097 free(p);
5098 }
5099 + if (!orig_tls_tail) libc.tls_head = 0;
5100 + tls_tail = orig_tls_tail;
5101 tls_cnt = orig_tls_cnt;
5102 tls_offset = orig_tls_offset;
5103 tls_align = orig_tls_align;
5104 @@ -1900,7 +1900,7 @@ int dl_iterate_phdr(int(*callback)(struc
5105 info.dlpi_adds = gencnt;
5106 info.dlpi_subs = 0;
5107 info.dlpi_tls_modid = current->tls_id;
5108 - info.dlpi_tls_data = current->tls_image;
5109 + info.dlpi_tls_data = current->tls.image;
5110
5111 ret = (callback)(&info, sizeof (info), data);
5112
5113 --- a/src/locale/langinfo.c
5114 +++ b/src/locale/langinfo.c
5115 @@ -37,23 +37,23 @@ char *__nl_langinfo_l(nl_item item, loca
5116
5117 switch (cat) {
5118 case LC_NUMERIC:
5119 - if (idx > 1) return NULL;
5120 + if (idx > 1) return "";
5121 str = c_numeric;
5122 break;
5123 case LC_TIME:
5124 - if (idx > 0x31) return NULL;
5125 + if (idx > 0x31) return "";
5126 str = c_time;
5127 break;
5128 case LC_MONETARY:
5129 - if (idx > 0) return NULL;
5130 + if (idx > 0) return "";
5131 str = "";
5132 break;
5133 case LC_MESSAGES:
5134 - if (idx > 3) return NULL;
5135 + if (idx > 3) return "";
5136 str = c_messages;
5137 break;
5138 default:
5139 - return NULL;
5140 + return "";
5141 }
5142
5143 for (; idx; idx--, str++) for (; *str; str++);
5144 --- a/src/malloc/lite_malloc.c
5145 +++ b/src/malloc/lite_malloc.c
5146 @@ -8,7 +8,7 @@
5147
5148 void *__expand_heap(size_t *);
5149
5150 -void *__simple_malloc(size_t n)
5151 +static void *__simple_malloc(size_t n)
5152 {
5153 static char *cur, *end;
5154 static volatile int lock[2];
5155 --- a/src/math/__rem_pio2.c
5156 +++ b/src/math/__rem_pio2.c
5157 @@ -118,7 +118,7 @@ int __rem_pio2(double x, double *y)
5158 if (ix < 0x413921fb) { /* |x| ~< 2^20*(pi/2), medium size */
5159 medium:
5160 /* rint(x/(pi/2)), Assume round-to-nearest. */
5161 - fn = x*invpio2 + toint - toint;
5162 + fn = (double_t)x*invpio2 + toint - toint;
5163 n = (int32_t)fn;
5164 r = x - fn*pio2_1;
5165 w = fn*pio2_1t; /* 1st round, good to 85 bits */
5166 --- a/src/math/__rem_pio2f.c
5167 +++ b/src/math/__rem_pio2f.c
5168 @@ -51,7 +51,7 @@ int __rem_pio2f(float x, double *y)
5169 /* 25+53 bit pi is good enough for medium size */
5170 if (ix < 0x4dc90fdb) { /* |x| ~< 2^28*(pi/2), medium size */
5171 /* Use a specialized rint() to get fn. Assume round-to-nearest. */
5172 - fn = x*invpio2 + toint - toint;
5173 + fn = (double_t)x*invpio2 + toint - toint;
5174 n = (int32_t)fn;
5175 *y = x - fn*pio2_1 - fn*pio2_1t;
5176 return n;
5177 --- /dev/null
5178 +++ b/src/math/arm/fabs.c
5179 @@ -0,0 +1,15 @@
5180 +#include <math.h>
5181 +
5182 +#if __ARM_PCS_VFP
5183 +
5184 +double fabs(double x)
5185 +{
5186 + __asm__ ("vabs.f64 %P0, %P1" : "=w"(x) : "w"(x));
5187 + return x;
5188 +}
5189 +
5190 +#else
5191 +
5192 +#include "../fabs.c"
5193 +
5194 +#endif
5195 --- /dev/null
5196 +++ b/src/math/arm/fabsf.c
5197 @@ -0,0 +1,15 @@
5198 +#include <math.h>
5199 +
5200 +#if __ARM_PCS_VFP
5201 +
5202 +float fabsf(float x)
5203 +{
5204 + __asm__ ("vabs.f32 %0, %1" : "=t"(x) : "t"(x));
5205 + return x;
5206 +}
5207 +
5208 +#else
5209 +
5210 +#include "../fabsf.c"
5211 +
5212 +#endif
5213 --- /dev/null
5214 +++ b/src/math/arm/sqrt.c
5215 @@ -0,0 +1,15 @@
5216 +#include <math.h>
5217 +
5218 +#if __VFP_FP__ && !__SOFTFP__
5219 +
5220 +double sqrt(double x)
5221 +{
5222 + __asm__ ("vsqrt.f64 %P0, %P1" : "=w"(x) : "w"(x));
5223 + return x;
5224 +}
5225 +
5226 +#else
5227 +
5228 +#include "../sqrt.c"
5229 +
5230 +#endif
5231 --- /dev/null
5232 +++ b/src/math/arm/sqrtf.c
5233 @@ -0,0 +1,15 @@
5234 +#include <math.h>
5235 +
5236 +#if __VFP_FP__ && !__SOFTFP__
5237 +
5238 +float sqrtf(float x)
5239 +{
5240 + __asm__ ("vsqrt.f32 %0, %1" : "=t"(x) : "t"(x));
5241 + return x;
5242 +}
5243 +
5244 +#else
5245 +
5246 +#include "../sqrtf.c"
5247 +
5248 +#endif
5249 --- a/src/math/armebhf/fabs.sub
5250 +++ /dev/null
5251 @@ -1 +0,0 @@
5252 -../armhf/fabs.s
5253 --- a/src/math/armebhf/fabsf.sub
5254 +++ /dev/null
5255 @@ -1 +0,0 @@
5256 -../armhf/fabsf.s
5257 --- a/src/math/armebhf/sqrt.sub
5258 +++ /dev/null
5259 @@ -1 +0,0 @@
5260 -../armhf/sqrt.s
5261 --- a/src/math/armebhf/sqrtf.sub
5262 +++ /dev/null
5263 @@ -1 +0,0 @@
5264 -../armhf/sqrtf.s
5265 --- a/src/math/armhf/fabs.s
5266 +++ /dev/null
5267 @@ -1,7 +0,0 @@
5268 -.fpu vfp
5269 -.text
5270 -.global fabs
5271 -.type fabs,%function
5272 -fabs:
5273 - vabs.f64 d0, d0
5274 - bx lr
5275 --- a/src/math/armhf/fabs.sub
5276 +++ /dev/null
5277 @@ -1 +0,0 @@
5278 -fabs.s
5279 --- a/src/math/armhf/fabsf.s
5280 +++ /dev/null
5281 @@ -1,7 +0,0 @@
5282 -.fpu vfp
5283 -.text
5284 -.global fabsf
5285 -.type fabsf,%function
5286 -fabsf:
5287 - vabs.f32 s0, s0
5288 - bx lr
5289 --- a/src/math/armhf/fabsf.sub
5290 +++ /dev/null
5291 @@ -1 +0,0 @@
5292 -fabsf.s
5293 --- a/src/math/armhf/sqrt.s
5294 +++ /dev/null
5295 @@ -1,7 +0,0 @@
5296 -.fpu vfp
5297 -.text
5298 -.global sqrt
5299 -.type sqrt,%function
5300 -sqrt:
5301 - vsqrt.f64 d0, d0
5302 - bx lr
5303 --- a/src/math/armhf/sqrt.sub
5304 +++ /dev/null
5305 @@ -1 +0,0 @@
5306 -sqrt.s
5307 --- a/src/math/armhf/sqrtf.s
5308 +++ /dev/null
5309 @@ -1,7 +0,0 @@
5310 -.fpu vfp
5311 -.text
5312 -.global sqrtf
5313 -.type sqrtf,%function
5314 -sqrtf:
5315 - vsqrt.f32 s0, s0
5316 - bx lr
5317 --- a/src/math/armhf/sqrtf.sub
5318 +++ /dev/null
5319 @@ -1 +0,0 @@
5320 -sqrtf.s
5321 --- a/src/math/hypot.c
5322 +++ b/src/math/hypot.c
5323 @@ -12,10 +12,10 @@ static void sq(double_t *hi, double_t *l
5324 {
5325 double_t xh, xl, xc;
5326
5327 - xc = x*SPLIT;
5328 + xc = (double_t)x*SPLIT;
5329 xh = x - xc + xc;
5330 xl = x - xh;
5331 - *hi = x*x;
5332 + *hi = (double_t)x*x;
5333 *lo = xh*xh - *hi + 2*xh*xl + xl*xl;
5334 }
5335
5336 --- a/src/mman/mremap.c
5337 +++ b/src/mman/mremap.c
5338 @@ -1,17 +1,31 @@
5339 +#define _GNU_SOURCE
5340 #include <unistd.h>
5341 #include <sys/mman.h>
5342 +#include <errno.h>
5343 +#include <stdint.h>
5344 #include <stdarg.h>
5345 #include "syscall.h"
5346 #include "libc.h"
5347
5348 +static void dummy(void) { }
5349 +weak_alias(dummy, __vm_wait);
5350 +
5351 void *__mremap(void *old_addr, size_t old_len, size_t new_len, int flags, ...)
5352 {
5353 va_list ap;
5354 - void *new_addr;
5355 -
5356 - va_start(ap, flags);
5357 - new_addr = va_arg(ap, void *);
5358 - va_end(ap);
5359 + void *new_addr = 0;
5360 +
5361 + if (new_len >= PTRDIFF_MAX) {
5362 + errno = ENOMEM;
5363 + return MAP_FAILED;
5364 + }
5365 +
5366 + if (flags & MREMAP_FIXED) {
5367 + __vm_wait();
5368 + va_start(ap, flags);
5369 + new_addr = va_arg(ap, void *);
5370 + va_end(ap);
5371 + }
5372
5373 return (void *)syscall(SYS_mremap, old_addr, old_len, new_len, flags, new_addr);
5374 }
5375 --- a/src/network/getifaddrs.c
5376 +++ b/src/network/getifaddrs.c
5377 @@ -162,13 +162,26 @@ static int netlink_msg_to_ifaddr(void *p
5378 for (rta = NLMSG_RTA(h, sizeof(*ifa)); NLMSG_RTAOK(rta, h); rta = RTA_NEXT(rta)) {
5379 switch (rta->rta_type) {
5380 case IFA_ADDRESS:
5381 - copy_addr(&ifs->ifa.ifa_addr, ifa->ifa_family, &ifs->addr, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index);
5382 + /* If ifa_addr is already set we, received an IFA_LOCAL before
5383 + * so treat this as destination address */
5384 + if (ifs->ifa.ifa_addr)
5385 + copy_addr(&ifs->ifa.ifa_dstaddr, ifa->ifa_family, &ifs->ifu, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index);
5386 + else
5387 + copy_addr(&ifs->ifa.ifa_addr, ifa->ifa_family, &ifs->addr, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index);
5388 break;
5389 case IFA_BROADCAST:
5390 - /* For point-to-point links this is peer, but ifa_broadaddr
5391 - * and ifa_dstaddr are union, so this works for both. */
5392 copy_addr(&ifs->ifa.ifa_broadaddr, ifa->ifa_family, &ifs->ifu, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index);
5393 break;
5394 + case IFA_LOCAL:
5395 + /* If ifa_addr is set and we get IFA_LOCAL, assume we have
5396 + * a point-to-point network. Move address to correct field. */
5397 + if (ifs->ifa.ifa_addr) {
5398 + ifs->ifu = ifs->addr;
5399 + ifs->ifa.ifa_dstaddr = &ifs->ifu.sa;
5400 + memset(&ifs->addr, 0, sizeof(ifs->addr));
5401 + }
5402 + copy_addr(&ifs->ifa.ifa_addr, ifa->ifa_family, &ifs->addr, RTA_DATA(rta), RTA_DATALEN(rta), ifa->ifa_index);
5403 + break;
5404 case IFA_LABEL:
5405 if (RTA_DATALEN(rta) < sizeof(ifs->name)) {
5406 memcpy(ifs->name, RTA_DATA(rta), RTA_DATALEN(rta));
5407 --- a/src/network/getnameinfo.c
5408 +++ b/src/network/getnameinfo.c
5409 @@ -135,13 +135,13 @@ int getnameinfo(const struct sockaddr *r
5410 switch (af) {
5411 case AF_INET:
5412 a = (void *)&((struct sockaddr_in *)sa)->sin_addr;
5413 - if (sl != sizeof(struct sockaddr_in)) return EAI_FAMILY;
5414 + if (sl < sizeof(struct sockaddr_in)) return EAI_FAMILY;
5415 mkptr4(ptr, a);
5416 scopeid = 0;
5417 break;
5418 case AF_INET6:
5419 a = (void *)&((struct sockaddr_in6 *)sa)->sin6_addr;
5420 - if (sl != sizeof(struct sockaddr_in6)) return EAI_FAMILY;
5421 + if (sl < sizeof(struct sockaddr_in6)) return EAI_FAMILY;
5422 if (memcmp(a, "\0\0\0\0\0\0\0\0\0\0\xff\xff", 12))
5423 mkptr6(ptr, a);
5424 else
5425 --- a/src/network/if_nametoindex.c
5426 +++ b/src/network/if_nametoindex.c
5427 @@ -10,7 +10,7 @@ unsigned if_nametoindex(const char *name
5428 struct ifreq ifr;
5429 int fd, r;
5430
5431 - if ((fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0)) < 0) return -1;
5432 + if ((fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0)) < 0) return 0;
5433 strncpy(ifr.ifr_name, name, sizeof ifr.ifr_name);
5434 r = ioctl(fd, SIOCGIFINDEX, &ifr);
5435 __syscall(SYS_close, fd);
5436 --- a/src/network/lookup_name.c
5437 +++ b/src/network/lookup_name.c
5438 @@ -9,6 +9,7 @@
5439 #include <fcntl.h>
5440 #include <unistd.h>
5441 #include <pthread.h>
5442 +#include <errno.h>
5443 #include "lookup.h"
5444 #include "stdio_impl.h"
5445 #include "syscall.h"
5446 @@ -51,7 +52,14 @@ static int name_from_hosts(struct addres
5447 int cnt = 0;
5448 unsigned char _buf[1032];
5449 FILE _f, *f = __fopen_rb_ca("/etc/hosts", &_f, _buf, sizeof _buf);
5450 - if (!f) return 0;
5451 + if (!f) switch (errno) {
5452 + case ENOENT:
5453 + case ENOTDIR:
5454 + case EACCES:
5455 + return 0;
5456 + default:
5457 + return EAI_SYSTEM;
5458 + }
5459 while (fgets(line, sizeof line, f) && cnt < MAXADDRS) {
5460 char *p, *z;
5461
5462 --- a/src/network/lookup_serv.c
5463 +++ b/src/network/lookup_serv.c
5464 @@ -4,6 +4,7 @@
5465 #include <ctype.h>
5466 #include <string.h>
5467 #include <fcntl.h>
5468 +#include <errno.h>
5469 #include "lookup.h"
5470 #include "stdio_impl.h"
5471
5472 @@ -69,7 +70,14 @@ int __lookup_serv(struct service buf[sta
5473
5474 unsigned char _buf[1032];
5475 FILE _f, *f = __fopen_rb_ca("/etc/services", &_f, _buf, sizeof _buf);
5476 - if (!f) return EAI_SERVICE;
5477 + if (!f) switch (errno) {
5478 + case ENOENT:
5479 + case ENOTDIR:
5480 + case EACCES:
5481 + return EAI_SERVICE;
5482 + default:
5483 + return EAI_SYSTEM;
5484 + }
5485
5486 while (fgets(line, sizeof line, f) && cnt < MAXSERVS) {
5487 if ((p=strchr(line, '#'))) *p++='\n', *p=0;
5488 --- a/src/network/proto.c
5489 +++ b/src/network/proto.c
5490 @@ -9,21 +9,36 @@ static const unsigned char protos[] = {
5491 "\001icmp\0"
5492 "\002igmp\0"
5493 "\003ggp\0"
5494 + "\004ipencap\0"
5495 + "\005st\0"
5496 "\006tcp\0"
5497 + "\008egp\0"
5498 "\014pup\0"
5499 "\021udp\0"
5500 - "\026idp\0"
5501 + "\024hmp\0"
5502 + "\026xns-idp\0"
5503 + "\033rdp\0"
5504 + "\035iso-tp4\0"
5505 + "\044xtp\0"
5506 + "\045ddp\0"
5507 + "\046idpr-cmtp\0"
5508 "\051ipv6\0"
5509 "\053ipv6-route\0"
5510 "\054ipv6-frag\0"
5511 + "\055idrp\0"
5512 + "\056rsvp\0"
5513 "\057gre\0"
5514 "\062esp\0"
5515 "\063ah\0"
5516 + "\071skip\0"
5517 "\072ipv6-icmp\0"
5518 "\073ipv6-nonxt\0"
5519 "\074ipv6-opts\0"
5520 + "\111rspf\0"
5521 + "\121vmtp\0"
5522 "\131ospf\0"
5523 "\136ipip\0"
5524 + "\142encap\0"
5525 "\147pim\0"
5526 "\377raw"
5527 };
5528 --- a/src/network/res_msend.c
5529 +++ b/src/network/res_msend.c
5530 @@ -54,7 +54,15 @@ int __res_msend(int nqueries, const unsi
5531
5532 /* Get nameservers from resolv.conf, fallback to localhost */
5533 f = __fopen_rb_ca("/etc/resolv.conf", &_f, _buf, sizeof _buf);
5534 - if (f) for (nns=0; nns<3 && fgets(line, sizeof line, f); ) {
5535 + if (!f) switch (errno) {
5536 + case ENOENT:
5537 + case ENOTDIR:
5538 + case EACCES:
5539 + goto no_resolv_conf;
5540 + default:
5541 + return -1;
5542 + }
5543 + for (nns=0; nns<3 && fgets(line, sizeof line, f); ) {
5544 if (!strncmp(line, "options", 7) && isspace(line[7])) {
5545 unsigned long x;
5546 char *p, *z;
5547 @@ -92,7 +100,8 @@ int __res_msend(int nqueries, const unsi
5548 }
5549 }
5550 }
5551 - if (f) __fclose_ca(f);
5552 + __fclose_ca(f);
5553 +no_resolv_conf:
5554 if (!nns) {
5555 ns[0].sin.sin_family = AF_INET;
5556 ns[0].sin.sin_port = htons(53);
5557 --- a/src/search/tsearch_avl.c
5558 +++ b/src/search/tsearch_avl.c
5559 @@ -77,38 +77,45 @@ static struct node *find(struct node *n,
5560 return find(n->right, k, cmp);
5561 }
5562
5563 -static struct node *insert(struct node **n, const void *k,
5564 - int (*cmp)(const void *, const void *), int *new)
5565 +static struct node *insert(struct node *n, const void *k,
5566 + int (*cmp)(const void *, const void *), struct node **found)
5567 {
5568 - struct node *r = *n;
5569 + struct node *r;
5570 int c;
5571
5572 - if (!r) {
5573 - *n = r = malloc(sizeof **n);
5574 - if (r) {
5575 - r->key = k;
5576 - r->left = r->right = 0;
5577 - r->height = 1;
5578 + if (!n) {
5579 + n = malloc(sizeof *n);
5580 + if (n) {
5581 + n->key = k;
5582 + n->left = n->right = 0;
5583 + n->height = 1;
5584 }
5585 - *new = 1;
5586 - return r;
5587 + *found = n;
5588 + return n;
5589 + }
5590 + c = cmp(k, n->key);
5591 + if (c == 0) {
5592 + *found = n;
5593 + return 0;
5594 + }
5595 + r = insert(c < 0 ? n->left : n->right, k, cmp, found);
5596 + if (r) {
5597 + if (c < 0)
5598 + n->left = r;
5599 + else
5600 + n->right = r;
5601 + r = balance(n);
5602 }
5603 - c = cmp(k, r->key);
5604 - if (c == 0)
5605 - return r;
5606 - if (c < 0)
5607 - r = insert(&r->left, k, cmp, new);
5608 - else
5609 - r = insert(&r->right, k, cmp, new);
5610 - if (*new)
5611 - *n = balance(*n);
5612 return r;
5613 }
5614
5615 -static struct node *movr(struct node *n, struct node *r) {
5616 - if (!n)
5617 - return r;
5618 - n->right = movr(n->right, r);
5619 +static struct node *remove_rightmost(struct node *n, struct node **rightmost)
5620 +{
5621 + if (!n->right) {
5622 + *rightmost = n;
5623 + return n->left;
5624 + }
5625 + n->right = remove_rightmost(n->right, rightmost);
5626 return balance(n);
5627 }
5628
5629 @@ -122,7 +129,13 @@ static struct node *remove(struct node *
5630 c = cmp(k, (*n)->key);
5631 if (c == 0) {
5632 struct node *r = *n;
5633 - *n = movr(r->left, r->right);
5634 + if (r->left) {
5635 + r->left = remove_rightmost(r->left, n);
5636 + (*n)->left = r->left;
5637 + (*n)->right = r->right;
5638 + *n = balance(*n);
5639 + } else
5640 + *n = r->right;
5641 free(r);
5642 return parent;
5643 }
5644 @@ -138,6 +151,8 @@ static struct node *remove(struct node *
5645 void *tdelete(const void *restrict key, void **restrict rootp,
5646 int(*compar)(const void *, const void *))
5647 {
5648 + if (!rootp)
5649 + return 0;
5650 struct node *n = *rootp;
5651 struct node *ret;
5652 /* last argument is arbitrary non-null pointer
5653 @@ -150,17 +165,21 @@ void *tdelete(const void *restrict key,
5654 void *tfind(const void *key, void *const *rootp,
5655 int(*compar)(const void *, const void *))
5656 {
5657 + if (!rootp)
5658 + return 0;
5659 return find(*rootp, key, compar);
5660 }
5661
5662 void *tsearch(const void *key, void **rootp,
5663 int (*compar)(const void *, const void *))
5664 {
5665 - int new = 0;
5666 - struct node *n = *rootp;
5667 + struct node *update;
5668 struct node *ret;
5669 - ret = insert(&n, key, compar, &new);
5670 - *rootp = n;
5671 + if (!rootp)
5672 + return 0;
5673 + update = insert(*rootp, key, compar, &ret);
5674 + if (update)
5675 + *rootp = update;
5676 return ret;
5677 }
5678
5679 --- a/src/setjmp/arm/longjmp.s
5680 +++ b/src/setjmp/arm/longjmp.s
5681 @@ -1,3 +1,4 @@
5682 +.syntax unified
5683 .global _longjmp
5684 .global longjmp
5685 .type _longjmp,%function
5686 @@ -20,7 +21,11 @@ longjmp:
5687 ldc p2, cr4, [ip], #48
5688 2: tst r1,#0x40
5689 beq 2f
5690 - .word 0xecbc8b10 /* vldmia ip!, {d8-d15} */
5691 + .fpu vfp
5692 + vldmia ip!, {d8-d15}
5693 + .fpu softvfp
5694 + .eabi_attribute 10, 0
5695 + .eabi_attribute 27, 0
5696 2: tst r1,#0x200
5697 beq 3f
5698 ldcl p1, cr10, [ip], #8
5699 @@ -29,9 +34,7 @@ longjmp:
5700 ldcl p1, cr13, [ip], #8
5701 ldcl p1, cr14, [ip], #8
5702 ldcl p1, cr15, [ip], #8
5703 -3: tst lr,#1
5704 - moveq pc,lr
5705 - bx lr
5706 +3: bx lr
5707
5708 .hidden __hwcap
5709 1: .word __hwcap-1b
5710 --- a/src/setjmp/arm/setjmp.s
5711 +++ b/src/setjmp/arm/setjmp.s
5712 @@ -1,3 +1,4 @@
5713 +.syntax unified
5714 .global __setjmp
5715 .global _setjmp
5716 .global setjmp
5717 @@ -22,7 +23,11 @@ setjmp:
5718 stc p2, cr4, [ip], #48
5719 2: tst r1,#0x40
5720 beq 2f
5721 - .word 0xecac8b10 /* vstmia ip!, {d8-d15} */
5722 + .fpu vfp
5723 + vstmia ip!, {d8-d15}
5724 + .fpu softvfp
5725 + .eabi_attribute 10, 0
5726 + .eabi_attribute 27, 0
5727 2: tst r1,#0x200
5728 beq 3f
5729 stcl p1, cr10, [ip], #8
5730 @@ -31,9 +36,7 @@ setjmp:
5731 stcl p1, cr13, [ip], #8
5732 stcl p1, cr14, [ip], #8
5733 stcl p1, cr15, [ip], #8
5734 -3: tst lr,#1
5735 - moveq pc,lr
5736 - bx lr
5737 +3: bx lr
5738
5739 .hidden __hwcap
5740 1: .word __hwcap-1b
5741 --- a/src/setjmp/mips-sf/longjmp.s
5742 +++ /dev/null
5743 @@ -1,25 +0,0 @@
5744 -.set noreorder
5745 -
5746 -.global _longjmp
5747 -.global longjmp
5748 -.type _longjmp,@function
5749 -.type longjmp,@function
5750 -_longjmp:
5751 -longjmp:
5752 - move $2, $5
5753 - bne $2, $0, 1f
5754 - nop
5755 - addu $2, $2, 1
5756 -1: lw $ra, 0($4)
5757 - lw $sp, 4($4)
5758 - lw $16, 8($4)
5759 - lw $17, 12($4)
5760 - lw $18, 16($4)
5761 - lw $19, 20($4)
5762 - lw $20, 24($4)
5763 - lw $21, 28($4)
5764 - lw $22, 32($4)
5765 - lw $23, 36($4)
5766 - lw $30, 40($4)
5767 - jr $ra
5768 - lw $28, 44($4)
5769 --- a/src/setjmp/mips-sf/longjmp.sub
5770 +++ /dev/null
5771 @@ -1 +0,0 @@
5772 -longjmp.s
5773 --- a/src/setjmp/mips-sf/setjmp.s
5774 +++ /dev/null
5775 @@ -1,25 +0,0 @@
5776 -.set noreorder
5777 -
5778 -.global __setjmp
5779 -.global _setjmp
5780 -.global setjmp
5781 -.type __setjmp,@function
5782 -.type _setjmp,@function
5783 -.type setjmp,@function
5784 -__setjmp:
5785 -_setjmp:
5786 -setjmp:
5787 - sw $ra, 0($4)
5788 - sw $sp, 4($4)
5789 - sw $16, 8($4)
5790 - sw $17, 12($4)
5791 - sw $18, 16($4)
5792 - sw $19, 20($4)
5793 - sw $20, 24($4)
5794 - sw $21, 28($4)
5795 - sw $22, 32($4)
5796 - sw $23, 36($4)
5797 - sw $30, 40($4)
5798 - sw $28, 44($4)
5799 - jr $ra
5800 - li $2, 0
5801 --- a/src/setjmp/mips-sf/setjmp.sub
5802 +++ /dev/null
5803 @@ -1 +0,0 @@
5804 -setjmp.s
5805 --- /dev/null
5806 +++ b/src/setjmp/mips/longjmp.S
5807 @@ -0,0 +1,40 @@
5808 +.set noreorder
5809 +
5810 +.global _longjmp
5811 +.global longjmp
5812 +.type _longjmp,@function
5813 +.type longjmp,@function
5814 +_longjmp:
5815 +longjmp:
5816 + move $2, $5
5817 + bne $2, $0, 1f
5818 + nop
5819 + addu $2, $2, 1
5820 +1:
5821 +#ifndef __mips_soft_float
5822 + lwc1 $20, 56($4)
5823 + lwc1 $21, 60($4)
5824 + lwc1 $22, 64($4)
5825 + lwc1 $23, 68($4)
5826 + lwc1 $24, 72($4)
5827 + lwc1 $25, 76($4)
5828 + lwc1 $26, 80($4)
5829 + lwc1 $27, 84($4)
5830 + lwc1 $28, 88($4)
5831 + lwc1 $29, 92($4)
5832 + lwc1 $30, 96($4)
5833 + lwc1 $31, 100($4)
5834 +#endif
5835 + lw $ra, 0($4)
5836 + lw $sp, 4($4)
5837 + lw $16, 8($4)
5838 + lw $17, 12($4)
5839 + lw $18, 16($4)
5840 + lw $19, 20($4)
5841 + lw $20, 24($4)
5842 + lw $21, 28($4)
5843 + lw $22, 32($4)
5844 + lw $23, 36($4)
5845 + lw $30, 40($4)
5846 + jr $ra
5847 + lw $28, 44($4)
5848 --- a/src/setjmp/mips/longjmp.s
5849 +++ /dev/null
5850 @@ -1,37 +0,0 @@
5851 -.set noreorder
5852 -
5853 -.global _longjmp
5854 -.global longjmp
5855 -.type _longjmp,@function
5856 -.type longjmp,@function
5857 -_longjmp:
5858 -longjmp:
5859 - move $2, $5
5860 - bne $2, $0, 1f
5861 - nop
5862 - addu $2, $2, 1
5863 -1: lwc1 $20, 56($4)
5864 - lwc1 $21, 60($4)
5865 - lwc1 $22, 64($4)
5866 - lwc1 $23, 68($4)
5867 - lwc1 $24, 72($4)
5868 - lwc1 $25, 76($4)
5869 - lwc1 $26, 80($4)
5870 - lwc1 $27, 84($4)
5871 - lwc1 $28, 88($4)
5872 - lwc1 $29, 92($4)
5873 - lwc1 $30, 96($4)
5874 - lwc1 $31, 100($4)
5875 - lw $ra, 0($4)
5876 - lw $sp, 4($4)
5877 - lw $16, 8($4)
5878 - lw $17, 12($4)
5879 - lw $18, 16($4)
5880 - lw $19, 20($4)
5881 - lw $20, 24($4)
5882 - lw $21, 28($4)
5883 - lw $22, 32($4)
5884 - lw $23, 36($4)
5885 - lw $30, 40($4)
5886 - jr $ra
5887 - lw $28, 44($4)
5888 --- /dev/null
5889 +++ b/src/setjmp/mips/setjmp.S
5890 @@ -0,0 +1,39 @@
5891 +.set noreorder
5892 +
5893 +.global __setjmp
5894 +.global _setjmp
5895 +.global setjmp
5896 +.type __setjmp,@function
5897 +.type _setjmp,@function
5898 +.type setjmp,@function
5899 +__setjmp:
5900 +_setjmp:
5901 +setjmp:
5902 + sw $ra, 0($4)
5903 + sw $sp, 4($4)
5904 + sw $16, 8($4)
5905 + sw $17, 12($4)
5906 + sw $18, 16($4)
5907 + sw $19, 20($4)
5908 + sw $20, 24($4)
5909 + sw $21, 28($4)
5910 + sw $22, 32($4)
5911 + sw $23, 36($4)
5912 + sw $30, 40($4)
5913 + sw $28, 44($4)
5914 +#ifndef __mips_soft_float
5915 + swc1 $20, 56($4)
5916 + swc1 $21, 60($4)
5917 + swc1 $22, 64($4)
5918 + swc1 $23, 68($4)
5919 + swc1 $24, 72($4)
5920 + swc1 $25, 76($4)
5921 + swc1 $26, 80($4)
5922 + swc1 $27, 84($4)
5923 + swc1 $28, 88($4)
5924 + swc1 $29, 92($4)
5925 + swc1 $30, 96($4)
5926 + swc1 $31, 100($4)
5927 +#endif
5928 + jr $ra
5929 + li $2, 0
5930 --- a/src/setjmp/mips/setjmp.s
5931 +++ /dev/null
5932 @@ -1,37 +0,0 @@
5933 -.set noreorder
5934 -
5935 -.global __setjmp
5936 -.global _setjmp
5937 -.global setjmp
5938 -.type __setjmp,@function
5939 -.type _setjmp,@function
5940 -.type setjmp,@function
5941 -__setjmp:
5942 -_setjmp:
5943 -setjmp:
5944 - sw $ra, 0($4)
5945 - sw $sp, 4($4)
5946 - sw $16, 8($4)
5947 - sw $17, 12($4)
5948 - sw $18, 16($4)
5949 - sw $19, 20($4)
5950 - sw $20, 24($4)
5951 - sw $21, 28($4)
5952 - sw $22, 32($4)
5953 - sw $23, 36($4)
5954 - sw $30, 40($4)
5955 - sw $28, 44($4)
5956 - swc1 $20, 56($4)
5957 - swc1 $21, 60($4)
5958 - swc1 $22, 64($4)
5959 - swc1 $23, 68($4)
5960 - swc1 $24, 72($4)
5961 - swc1 $25, 76($4)
5962 - swc1 $26, 80($4)
5963 - swc1 $27, 84($4)
5964 - swc1 $28, 88($4)
5965 - swc1 $29, 92($4)
5966 - swc1 $30, 96($4)
5967 - swc1 $31, 100($4)
5968 - jr $ra
5969 - li $2, 0
5970 --- a/src/setjmp/mipsel-sf/longjmp.sub
5971 +++ /dev/null
5972 @@ -1 +0,0 @@
5973 -../mips-sf/longjmp.s
5974 --- a/src/setjmp/mipsel-sf/setjmp.sub
5975 +++ /dev/null
5976 @@ -1 +0,0 @@
5977 -../mips-sf/setjmp.s
5978 --- a/src/setjmp/sh-nofpu/longjmp.s
5979 +++ /dev/null
5980 @@ -1,22 +0,0 @@
5981 -.global _longjmp
5982 -.global longjmp
5983 -.type _longjmp, @function
5984 -.type longjmp, @function
5985 -_longjmp:
5986 -longjmp:
5987 - mov.l @r4+, r8
5988 - mov.l @r4+, r9
5989 - mov.l @r4+, r10
5990 - mov.l @r4+, r11
5991 - mov.l @r4+, r12
5992 - mov.l @r4+, r13
5993 - mov.l @r4+, r14
5994 - mov.l @r4+, r15
5995 - lds.l @r4+, pr
5996 -
5997 - tst r5, r5
5998 - movt r0
5999 - add r5, r0
6000 -
6001 - rts
6002 - nop
6003 --- a/src/setjmp/sh-nofpu/longjmp.sub
6004 +++ /dev/null
6005 @@ -1 +0,0 @@
6006 -longjmp.s
6007 --- a/src/setjmp/sh-nofpu/setjmp.s
6008 +++ /dev/null
6009 @@ -1,24 +0,0 @@
6010 -.global ___setjmp
6011 -.hidden ___setjmp
6012 -.global __setjmp
6013 -.global _setjmp
6014 -.global setjmp
6015 -.type __setjmp, @function
6016 -.type _setjmp, @function
6017 -.type setjmp, @function
6018 -___setjmp:
6019 -__setjmp:
6020 -_setjmp:
6021 -setjmp:
6022 - add #36, r4
6023 - sts.l pr, @-r4
6024 - mov.l r15 @-r4
6025 - mov.l r14, @-r4
6026 - mov.l r13, @-r4
6027 - mov.l r12, @-r4
6028 - mov.l r11, @-r4
6029 - mov.l r10, @-r4
6030 - mov.l r9, @-r4
6031 - mov.l r8, @-r4
6032 - rts
6033 - mov #0, r0
6034 --- a/src/setjmp/sh-nofpu/setjmp.sub
6035 +++ /dev/null
6036 @@ -1 +0,0 @@
6037 -setjmp.s
6038 --- /dev/null
6039 +++ b/src/setjmp/sh/longjmp.S
6040 @@ -0,0 +1,28 @@
6041 +.global _longjmp
6042 +.global longjmp
6043 +.type _longjmp, @function
6044 +.type longjmp, @function
6045 +_longjmp:
6046 +longjmp:
6047 + mov.l @r4+, r8
6048 + mov.l @r4+, r9
6049 + mov.l @r4+, r10
6050 + mov.l @r4+, r11
6051 + mov.l @r4+, r12
6052 + mov.l @r4+, r13
6053 + mov.l @r4+, r14
6054 + mov.l @r4+, r15
6055 + lds.l @r4+, pr
6056 +#if __SH_FPU_ANY__ || __SH4__
6057 + fmov.s @r4+, fr12
6058 + fmov.s @r4+, fr13
6059 + fmov.s @r4+, fr14
6060 + fmov.s @r4+, fr15
6061 +#endif
6062 +
6063 + tst r5, r5
6064 + movt r0
6065 + add r5, r0
6066 +
6067 + rts
6068 + nop
6069 --- a/src/setjmp/sh/longjmp.s
6070 +++ /dev/null
6071 @@ -1,26 +0,0 @@
6072 -.global _longjmp
6073 -.global longjmp
6074 -.type _longjmp, @function
6075 -.type longjmp, @function
6076 -_longjmp:
6077 -longjmp:
6078 - mov.l @r4+, r8
6079 - mov.l @r4+, r9
6080 - mov.l @r4+, r10
6081 - mov.l @r4+, r11
6082 - mov.l @r4+, r12
6083 - mov.l @r4+, r13
6084 - mov.l @r4+, r14
6085 - mov.l @r4+, r15
6086 - lds.l @r4+, pr
6087 - fmov.s @r4+, fr12
6088 - fmov.s @r4+, fr13
6089 - fmov.s @r4+, fr14
6090 - fmov.s @r4+, fr15
6091 -
6092 - tst r5, r5
6093 - movt r0
6094 - add r5, r0
6095 -
6096 - rts
6097 - nop
6098 --- /dev/null
6099 +++ b/src/setjmp/sh/setjmp.S
6100 @@ -0,0 +1,32 @@
6101 +.global ___setjmp
6102 +.hidden ___setjmp
6103 +.global __setjmp
6104 +.global _setjmp
6105 +.global setjmp
6106 +.type __setjmp, @function
6107 +.type _setjmp, @function
6108 +.type setjmp, @function
6109 +___setjmp:
6110 +__setjmp:
6111 +_setjmp:
6112 +setjmp:
6113 +#if __SH_FPU_ANY__ || __SH4__
6114 + add #52, r4
6115 + fmov.s fr15, @-r4
6116 + fmov.s fr14, @-r4
6117 + fmov.s fr13, @-r4
6118 + fmov.s fr12, @-r4
6119 +#else
6120 + add #36, r4
6121 +#endif
6122 + sts.l pr, @-r4
6123 + mov.l r15, @-r4
6124 + mov.l r14, @-r4
6125 + mov.l r13, @-r4
6126 + mov.l r12, @-r4
6127 + mov.l r11, @-r4
6128 + mov.l r10, @-r4
6129 + mov.l r9, @-r4
6130 + mov.l r8, @-r4
6131 + rts
6132 + mov #0, r0
6133 --- a/src/setjmp/sh/setjmp.s
6134 +++ /dev/null
6135 @@ -1,28 +0,0 @@
6136 -.global ___setjmp
6137 -.hidden ___setjmp
6138 -.global __setjmp
6139 -.global _setjmp
6140 -.global setjmp
6141 -.type __setjmp, @function
6142 -.type _setjmp, @function
6143 -.type setjmp, @function
6144 -___setjmp:
6145 -__setjmp:
6146 -_setjmp:
6147 -setjmp:
6148 - add #52, r4
6149 - fmov.s fr15, @-r4
6150 - fmov.s fr14, @-r4
6151 - fmov.s fr13, @-r4
6152 - fmov.s fr12, @-r4
6153 - sts.l pr, @-r4
6154 - mov.l r15, @-r4
6155 - mov.l r14, @-r4
6156 - mov.l r13, @-r4
6157 - mov.l r12, @-r4
6158 - mov.l r11, @-r4
6159 - mov.l r10, @-r4
6160 - mov.l r9, @-r4
6161 - mov.l r8, @-r4
6162 - rts
6163 - mov #0, r0
6164 --- a/src/setjmp/sheb-nofpu/longjmp.sub
6165 +++ /dev/null
6166 @@ -1 +0,0 @@
6167 -../sh-nofpu/longjmp.s
6168 --- a/src/setjmp/sheb-nofpu/setjmp.sub
6169 +++ /dev/null
6170 @@ -1 +0,0 @@
6171 -../sh-nofpu/setjmp.s
6172 --- a/src/signal/arm/restore.s
6173 +++ b/src/signal/arm/restore.s
6174 @@ -1,3 +1,5 @@
6175 +.syntax unified
6176 +
6177 .global __restore
6178 .type __restore,%function
6179 __restore:
6180 --- a/src/signal/arm/sigsetjmp.s
6181 +++ b/src/signal/arm/sigsetjmp.s
6182 @@ -1,3 +1,4 @@
6183 +.syntax unified
6184 .global sigsetjmp
6185 .global __sigsetjmp
6186 .type sigsetjmp,%function
6187 --- a/src/signal/sigaction.c
6188 +++ b/src/signal/sigaction.c
6189 @@ -17,10 +17,6 @@ void __get_handler_set(sigset_t *set)
6190 int __libc_sigaction(int sig, const struct sigaction *restrict sa, struct sigaction *restrict old)
6191 {
6192 struct k_sigaction ksa, ksa_old;
6193 - if (sig >= (unsigned)_NSIG) {
6194 - errno = EINVAL;
6195 - return -1;
6196 - }
6197 if (sa) {
6198 if ((uintptr_t)sa->sa_handler > 1UL) {
6199 a_or_l(handler_set+(sig-1)/(8*sizeof(long)),
6200 @@ -57,7 +53,7 @@ int __libc_sigaction(int sig, const stru
6201
6202 int __sigaction(int sig, const struct sigaction *restrict sa, struct sigaction *restrict old)
6203 {
6204 - if (sig-32U < 3) {
6205 + if (sig-32U < 3 || sig-1U >= _NSIG-1) {
6206 errno = EINVAL;
6207 return -1;
6208 }
6209 --- a/src/signal/sigsetjmp_tail.c
6210 +++ b/src/signal/sigsetjmp_tail.c
6211 @@ -2,9 +2,7 @@
6212 #include <signal.h>
6213 #include "syscall.h"
6214
6215 -#ifdef SHARED
6216 __attribute__((__visibility__("hidden")))
6217 -#endif
6218 int __sigsetjmp_tail(sigjmp_buf jb, int ret)
6219 {
6220 void *p = jb->__ss;
6221 --- a/src/stdio/getdelim.c
6222 +++ b/src/stdio/getdelim.c
6223 @@ -27,17 +27,18 @@ ssize_t getdelim(char **restrict s, size
6224 for (;;) {
6225 z = memchr(f->rpos, delim, f->rend - f->rpos);
6226 k = z ? z - f->rpos + 1 : f->rend - f->rpos;
6227 - if (i+k >= *n) {
6228 + if (i+k+1 >= *n) {
6229 if (k >= SIZE_MAX/2-i) goto oom;
6230 - *n = i+k+2;
6231 - if (*n < SIZE_MAX/4) *n *= 2;
6232 - tmp = realloc(*s, *n);
6233 + size_t m = i+k+2;
6234 + if (!z && m < SIZE_MAX/4) m += m/2;
6235 + tmp = realloc(*s, m);
6236 if (!tmp) {
6237 - *n = i+k+2;
6238 - tmp = realloc(*s, *n);
6239 + m = i+k+2;
6240 + tmp = realloc(*s, m);
6241 if (!tmp) goto oom;
6242 }
6243 *s = tmp;
6244 + *n = m;
6245 }
6246 memcpy(*s+i, f->rpos, k);
6247 f->rpos += k;
6248 --- /dev/null
6249 +++ b/src/string/arm/__aeabi_memclr.c
6250 @@ -0,0 +1,9 @@
6251 +#include <string.h>
6252 +#include "libc.h"
6253 +
6254 +void __aeabi_memclr(void *dest, size_t n)
6255 +{
6256 + memset(dest, 0, n);
6257 +}
6258 +weak_alias(__aeabi_memclr, __aeabi_memclr4);
6259 +weak_alias(__aeabi_memclr, __aeabi_memclr8);
6260 --- /dev/null
6261 +++ b/src/string/arm/__aeabi_memcpy.c
6262 @@ -0,0 +1,9 @@
6263 +#include <string.h>
6264 +#include "libc.h"
6265 +
6266 +void __aeabi_memcpy(void *restrict dest, const void *restrict src, size_t n)
6267 +{
6268 + memcpy(dest, src, n);
6269 +}
6270 +weak_alias(__aeabi_memcpy, __aeabi_memcpy4);
6271 +weak_alias(__aeabi_memcpy, __aeabi_memcpy8);
6272 --- /dev/null
6273 +++ b/src/string/arm/__aeabi_memmove.c
6274 @@ -0,0 +1,9 @@
6275 +#include <string.h>
6276 +#include "libc.h"
6277 +
6278 +void __aeabi_memmove(void *dest, const void *src, size_t n)
6279 +{
6280 + memmove(dest, src, n);
6281 +}
6282 +weak_alias(__aeabi_memmove, __aeabi_memmove4);
6283 +weak_alias(__aeabi_memmove, __aeabi_memmove8);
6284 --- /dev/null
6285 +++ b/src/string/arm/__aeabi_memset.c
6286 @@ -0,0 +1,9 @@
6287 +#include <string.h>
6288 +#include "libc.h"
6289 +
6290 +void __aeabi_memset(void *dest, size_t n, int c)
6291 +{
6292 + memset(dest, c, n);
6293 +}
6294 +weak_alias(__aeabi_memset, __aeabi_memset4);
6295 +weak_alias(__aeabi_memset, __aeabi_memset8);
6296 --- /dev/null
6297 +++ b/src/string/arm/memcpy.c
6298 @@ -0,0 +1,3 @@
6299 +#if __ARMEB__
6300 +#include "../memcpy.c"
6301 +#endif
6302 --- /dev/null
6303 +++ b/src/string/arm/memcpy_le.S
6304 @@ -0,0 +1,383 @@
6305 +#ifndef __ARMEB__
6306 +
6307 +/*
6308 + * Copyright (C) 2008 The Android Open Source Project
6309 + * All rights reserved.
6310 + *
6311 + * Redistribution and use in source and binary forms, with or without
6312 + * modification, are permitted provided that the following conditions
6313 + * are met:
6314 + * * Redistributions of source code must retain the above copyright
6315 + * notice, this list of conditions and the following disclaimer.
6316 + * * Redistributions in binary form must reproduce the above copyright
6317 + * notice, this list of conditions and the following disclaimer in
6318 + * the documentation and/or other materials provided with the
6319 + * distribution.
6320 + *
6321 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
6322 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
6323 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
6324 + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
6325 + * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
6326 + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
6327 + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
6328 + * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
6329 + * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
6330 + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
6331 + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
6332 + * SUCH DAMAGE.
6333 + */
6334 +
6335 +
6336 +/*
6337 + * Optimized memcpy() for ARM.
6338 + *
6339 + * note that memcpy() always returns the destination pointer,
6340 + * so we have to preserve R0.
6341 + */
6342 +
6343 +/*
6344 + * This file has been modified from the original for use in musl libc.
6345 + * The main changes are: addition of .type memcpy,%function to make the
6346 + * code safely callable from thumb mode, adjusting the return
6347 + * instructions to be compatible with pre-thumb ARM cpus, and removal
6348 + * of prefetch code that is not compatible with older cpus.
6349 + */
6350 +
6351 +.syntax unified
6352 +
6353 +.global memcpy
6354 +.type memcpy,%function
6355 +memcpy:
6356 + /* The stack must always be 64-bits aligned to be compliant with the
6357 + * ARM ABI. Since we have to save R0, we might as well save R4
6358 + * which we can use for better pipelining of the reads below
6359 + */
6360 + .fnstart
6361 + .save {r0, r4, lr}
6362 + stmfd sp!, {r0, r4, lr}
6363 + /* Making room for r5-r11 which will be spilled later */
6364 + .pad #28
6365 + sub sp, sp, #28
6366 +
6367 + /* it simplifies things to take care of len<4 early */
6368 + cmp r2, #4
6369 + blo copy_last_3_and_return
6370 +
6371 + /* compute the offset to align the source
6372 + * offset = (4-(src&3))&3 = -src & 3
6373 + */
6374 + rsb r3, r1, #0
6375 + ands r3, r3, #3
6376 + beq src_aligned
6377 +
6378 + /* align source to 32 bits. We need to insert 2 instructions between
6379 + * a ldr[b|h] and str[b|h] because byte and half-word instructions
6380 + * stall 2 cycles.
6381 + */
6382 + movs r12, r3, lsl #31
6383 + sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */
6384 + ldrbmi r3, [r1], #1
6385 + ldrbcs r4, [r1], #1
6386 + ldrbcs r12,[r1], #1
6387 + strbmi r3, [r0], #1
6388 + strbcs r4, [r0], #1
6389 + strbcs r12,[r0], #1
6390 +
6391 +src_aligned:
6392 +
6393 + /* see if src and dst are aligned together (congruent) */
6394 + eor r12, r0, r1
6395 + tst r12, #3
6396 + bne non_congruent
6397 +
6398 + /* Use post-incriment mode for stm to spill r5-r11 to reserved stack
6399 + * frame. Don't update sp.
6400 + */
6401 + stmea sp, {r5-r11}
6402 +
6403 + /* align the destination to a cache-line */
6404 + rsb r3, r0, #0
6405 + ands r3, r3, #0x1C
6406 + beq congruent_aligned32
6407 + cmp r3, r2
6408 + andhi r3, r2, #0x1C
6409 +
6410 + /* conditionnaly copies 0 to 7 words (length in r3) */
6411 + movs r12, r3, lsl #28
6412 + ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */
6413 + ldmmi r1!, {r8, r9} /* 8 bytes */
6414 + stmcs r0!, {r4, r5, r6, r7}
6415 + stmmi r0!, {r8, r9}
6416 + tst r3, #0x4
6417 + ldrne r10,[r1], #4 /* 4 bytes */
6418 + strne r10,[r0], #4
6419 + sub r2, r2, r3
6420 +
6421 +congruent_aligned32:
6422 + /*
6423 + * here source is aligned to 32 bytes.
6424 + */
6425 +
6426 +cached_aligned32:
6427 + subs r2, r2, #32
6428 + blo less_than_32_left
6429 +
6430 + /*
6431 + * We preload a cache-line up to 64 bytes ahead. On the 926, this will
6432 + * stall only until the requested world is fetched, but the linefill
6433 + * continues in the the background.
6434 + * While the linefill is going, we write our previous cache-line
6435 + * into the write-buffer (which should have some free space).
6436 + * When the linefill is done, the writebuffer will
6437 + * start dumping its content into memory
6438 + *
6439 + * While all this is going, we then load a full cache line into
6440 + * 8 registers, this cache line should be in the cache by now
6441 + * (or partly in the cache).
6442 + *
6443 + * This code should work well regardless of the source/dest alignment.
6444 + *
6445 + */
6446 +
6447 + /* Align the preload register to a cache-line because the cpu does
6448 + * "critical word first" (the first word requested is loaded first).
6449 + */
6450 + @ bic r12, r1, #0x1F
6451 + @ add r12, r12, #64
6452 +
6453 +1: ldmia r1!, { r4-r11 }
6454 + subs r2, r2, #32
6455 +
6456 + /*
6457 + * NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
6458 + * for ARM9 preload will not be safely guarded by the preceding subs.
6459 + * When it is safely guarded the only possibility to have SIGSEGV here
6460 + * is because the caller overstates the length.
6461 + */
6462 + @ ldrhi r3, [r12], #32 /* cheap ARM9 preload */
6463 + stmia r0!, { r4-r11 }
6464 + bhs 1b
6465 +
6466 + add r2, r2, #32
6467 +
6468 +less_than_32_left:
6469 + /*
6470 + * less than 32 bytes left at this point (length in r2)
6471 + */
6472 +
6473 + /* skip all this if there is nothing to do, which should
6474 + * be a common case (if not executed the code below takes
6475 + * about 16 cycles)
6476 + */
6477 + tst r2, #0x1F
6478 + beq 1f
6479 +
6480 + /* conditionnaly copies 0 to 31 bytes */
6481 + movs r12, r2, lsl #28
6482 + ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */
6483 + ldmmi r1!, {r8, r9} /* 8 bytes */
6484 + stmcs r0!, {r4, r5, r6, r7}
6485 + stmmi r0!, {r8, r9}
6486 + movs r12, r2, lsl #30
6487 + ldrcs r3, [r1], #4 /* 4 bytes */
6488 + ldrhmi r4, [r1], #2 /* 2 bytes */
6489 + strcs r3, [r0], #4
6490 + strhmi r4, [r0], #2
6491 + tst r2, #0x1
6492 + ldrbne r3, [r1] /* last byte */
6493 + strbne r3, [r0]
6494 +
6495 + /* we're done! restore everything and return */
6496 +1: ldmfd sp!, {r5-r11}
6497 + ldmfd sp!, {r0, r4, lr}
6498 + bx lr
6499 +
6500 + /********************************************************************/
6501 +
6502 +non_congruent:
6503 + /*
6504 + * here source is aligned to 4 bytes
6505 + * but destination is not.
6506 + *
6507 + * in the code below r2 is the number of bytes read
6508 + * (the number of bytes written is always smaller, because we have
6509 + * partial words in the shift queue)
6510 + */
6511 + cmp r2, #4
6512 + blo copy_last_3_and_return
6513 +
6514 + /* Use post-incriment mode for stm to spill r5-r11 to reserved stack
6515 + * frame. Don't update sp.
6516 + */
6517 + stmea sp, {r5-r11}
6518 +
6519 + /* compute shifts needed to align src to dest */
6520 + rsb r5, r0, #0
6521 + and r5, r5, #3 /* r5 = # bytes in partial words */
6522 + mov r12, r5, lsl #3 /* r12 = right */
6523 + rsb lr, r12, #32 /* lr = left */
6524 +
6525 + /* read the first word */
6526 + ldr r3, [r1], #4
6527 + sub r2, r2, #4
6528 +
6529 + /* write a partial word (0 to 3 bytes), such that destination
6530 + * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
6531 + */
6532 + movs r5, r5, lsl #31
6533 + strbmi r3, [r0], #1
6534 + movmi r3, r3, lsr #8
6535 + strbcs r3, [r0], #1
6536 + movcs r3, r3, lsr #8
6537 + strbcs r3, [r0], #1
6538 + movcs r3, r3, lsr #8
6539 +
6540 + cmp r2, #4
6541 + blo partial_word_tail
6542 +
6543 + /* Align destination to 32 bytes (cache line boundary) */
6544 +1: tst r0, #0x1c
6545 + beq 2f
6546 + ldr r5, [r1], #4
6547 + sub r2, r2, #4
6548 + orr r4, r3, r5, lsl lr
6549 + mov r3, r5, lsr r12
6550 + str r4, [r0], #4
6551 + cmp r2, #4
6552 + bhs 1b
6553 + blo partial_word_tail
6554 +
6555 + /* copy 32 bytes at a time */
6556 +2: subs r2, r2, #32
6557 + blo less_than_thirtytwo
6558 +
6559 + /* Use immediate mode for the shifts, because there is an extra cycle
6560 + * for register shifts, which could account for up to 50% of
6561 + * performance hit.
6562 + */
6563 +
6564 + cmp r12, #24
6565 + beq loop24
6566 + cmp r12, #8
6567 + beq loop8
6568 +
6569 +loop16:
6570 + ldr r12, [r1], #4
6571 +1: mov r4, r12
6572 + ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
6573 + subs r2, r2, #32
6574 + ldrhs r12, [r1], #4
6575 + orr r3, r3, r4, lsl #16
6576 + mov r4, r4, lsr #16
6577 + orr r4, r4, r5, lsl #16
6578 + mov r5, r5, lsr #16
6579 + orr r5, r5, r6, lsl #16
6580 + mov r6, r6, lsr #16
6581 + orr r6, r6, r7, lsl #16
6582 + mov r7, r7, lsr #16
6583 + orr r7, r7, r8, lsl #16
6584 + mov r8, r8, lsr #16
6585 + orr r8, r8, r9, lsl #16
6586 + mov r9, r9, lsr #16
6587 + orr r9, r9, r10, lsl #16
6588 + mov r10, r10, lsr #16
6589 + orr r10, r10, r11, lsl #16
6590 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
6591 + mov r3, r11, lsr #16
6592 + bhs 1b
6593 + b less_than_thirtytwo
6594 +
6595 +loop8:
6596 + ldr r12, [r1], #4
6597 +1: mov r4, r12
6598 + ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
6599 + subs r2, r2, #32
6600 + ldrhs r12, [r1], #4
6601 + orr r3, r3, r4, lsl #24
6602 + mov r4, r4, lsr #8
6603 + orr r4, r4, r5, lsl #24
6604 + mov r5, r5, lsr #8
6605 + orr r5, r5, r6, lsl #24
6606 + mov r6, r6, lsr #8
6607 + orr r6, r6, r7, lsl #24
6608 + mov r7, r7, lsr #8
6609 + orr r7, r7, r8, lsl #24
6610 + mov r8, r8, lsr #8
6611 + orr r8, r8, r9, lsl #24
6612 + mov r9, r9, lsr #8
6613 + orr r9, r9, r10, lsl #24
6614 + mov r10, r10, lsr #8
6615 + orr r10, r10, r11, lsl #24
6616 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
6617 + mov r3, r11, lsr #8
6618 + bhs 1b
6619 + b less_than_thirtytwo
6620 +
6621 +loop24:
6622 + ldr r12, [r1], #4
6623 +1: mov r4, r12
6624 + ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
6625 + subs r2, r2, #32
6626 + ldrhs r12, [r1], #4
6627 + orr r3, r3, r4, lsl #8
6628 + mov r4, r4, lsr #24
6629 + orr r4, r4, r5, lsl #8
6630 + mov r5, r5, lsr #24
6631 + orr r5, r5, r6, lsl #8
6632 + mov r6, r6, lsr #24
6633 + orr r6, r6, r7, lsl #8
6634 + mov r7, r7, lsr #24
6635 + orr r7, r7, r8, lsl #8
6636 + mov r8, r8, lsr #24
6637 + orr r8, r8, r9, lsl #8
6638 + mov r9, r9, lsr #24
6639 + orr r9, r9, r10, lsl #8
6640 + mov r10, r10, lsr #24
6641 + orr r10, r10, r11, lsl #8
6642 + stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
6643 + mov r3, r11, lsr #24
6644 + bhs 1b
6645 +
6646 +less_than_thirtytwo:
6647 + /* copy the last 0 to 31 bytes of the source */
6648 + rsb r12, lr, #32 /* we corrupted r12, recompute it */
6649 + add r2, r2, #32
6650 + cmp r2, #4
6651 + blo partial_word_tail
6652 +
6653 +1: ldr r5, [r1], #4
6654 + sub r2, r2, #4
6655 + orr r4, r3, r5, lsl lr
6656 + mov r3, r5, lsr r12
6657 + str r4, [r0], #4
6658 + cmp r2, #4
6659 + bhs 1b
6660 +
6661 +partial_word_tail:
6662 + /* we have a partial word in the input buffer */
6663 + movs r5, lr, lsl #(31-3)
6664 + strbmi r3, [r0], #1
6665 + movmi r3, r3, lsr #8
6666 + strbcs r3, [r0], #1
6667 + movcs r3, r3, lsr #8
6668 + strbcs r3, [r0], #1
6669 +
6670 + /* Refill spilled registers from the stack. Don't update sp. */
6671 + ldmfd sp, {r5-r11}
6672 +
6673 +copy_last_3_and_return:
6674 + movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
6675 + ldrbmi r2, [r1], #1
6676 + ldrbcs r3, [r1], #1
6677 + ldrbcs r12,[r1]
6678 + strbmi r2, [r0], #1
6679 + strbcs r3, [r0], #1
6680 + strbcs r12,[r0]
6681 +
6682 + /* we're done! restore sp and spilled registers and return */
6683 + add sp, sp, #28
6684 + ldmfd sp!, {r0, r4, lr}
6685 + bx lr
6686 +
6687 +#endif
6688 --- a/src/string/armel/memcpy.s
6689 +++ /dev/null
6690 @@ -1,381 +0,0 @@
6691 -/*
6692 - * Copyright (C) 2008 The Android Open Source Project
6693 - * All rights reserved.
6694 - *
6695 - * Redistribution and use in source and binary forms, with or without
6696 - * modification, are permitted provided that the following conditions
6697 - * are met:
6698 - * * Redistributions of source code must retain the above copyright
6699 - * notice, this list of conditions and the following disclaimer.
6700 - * * Redistributions in binary form must reproduce the above copyright
6701 - * notice, this list of conditions and the following disclaimer in
6702 - * the documentation and/or other materials provided with the
6703 - * distribution.
6704 - *
6705 - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
6706 - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
6707 - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
6708 - * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
6709 - * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
6710 - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
6711 - * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
6712 - * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
6713 - * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
6714 - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
6715 - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
6716 - * SUCH DAMAGE.
6717 - */
6718 -
6719 -
6720 -/*
6721 - * Optimized memcpy() for ARM.
6722 - *
6723 - * note that memcpy() always returns the destination pointer,
6724 - * so we have to preserve R0.
6725 - */
6726 -
6727 -/*
6728 - * This file has been modified from the original for use in musl libc.
6729 - * The main changes are: addition of .type memcpy,%function to make the
6730 - * code safely callable from thumb mode, adjusting the return
6731 - * instructions to be compatible with pre-thumb ARM cpus, and removal
6732 - * of prefetch code that is not compatible with older cpus.
6733 - */
6734 -
6735 -.global memcpy
6736 -.type memcpy,%function
6737 -memcpy:
6738 - /* The stack must always be 64-bits aligned to be compliant with the
6739 - * ARM ABI. Since we have to save R0, we might as well save R4
6740 - * which we can use for better pipelining of the reads below
6741 - */
6742 - .fnstart
6743 - .save {r0, r4, lr}
6744 - stmfd sp!, {r0, r4, lr}
6745 - /* Making room for r5-r11 which will be spilled later */
6746 - .pad #28
6747 - sub sp, sp, #28
6748 -
6749 - /* it simplifies things to take care of len<4 early */
6750 - cmp r2, #4
6751 - blo copy_last_3_and_return
6752 -
6753 - /* compute the offset to align the source
6754 - * offset = (4-(src&3))&3 = -src & 3
6755 - */
6756 - rsb r3, r1, #0
6757 - ands r3, r3, #3
6758 - beq src_aligned
6759 -
6760 - /* align source to 32 bits. We need to insert 2 instructions between
6761 - * a ldr[b|h] and str[b|h] because byte and half-word instructions
6762 - * stall 2 cycles.
6763 - */
6764 - movs r12, r3, lsl #31
6765 - sub r2, r2, r3 /* we know that r3 <= r2 because r2 >= 4 */
6766 - .word 0x44d13001 /* ldrbmi r3, [r1], #1 */
6767 - .word 0x24d14001 /* ldrbcs r4, [r1], #1 */
6768 - .word 0x24d1c001 /* ldrbcs r12,[r1], #1 */
6769 - .word 0x44c03001 /* strbmi r3, [r0], #1 */
6770 - .word 0x24c04001 /* strbcs r4, [r0], #1 */
6771 - .word 0x24c0c001 /* strbcs r12,[r0], #1 */
6772 -
6773 -src_aligned:
6774 -
6775 - /* see if src and dst are aligned together (congruent) */
6776 - eor r12, r0, r1
6777 - tst r12, #3
6778 - bne non_congruent
6779 -
6780 - /* Use post-incriment mode for stm to spill r5-r11 to reserved stack
6781 - * frame. Don't update sp.
6782 - */
6783 - stmea sp, {r5-r11}
6784 -
6785 - /* align the destination to a cache-line */
6786 - rsb r3, r0, #0
6787 - ands r3, r3, #0x1C
6788 - beq congruent_aligned32
6789 - cmp r3, r2
6790 - andhi r3, r2, #0x1C
6791 -
6792 - /* conditionnaly copies 0 to 7 words (length in r3) */
6793 - movs r12, r3, lsl #28
6794 - ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */
6795 - ldmmi r1!, {r8, r9} /* 8 bytes */
6796 - stmcs r0!, {r4, r5, r6, r7}
6797 - stmmi r0!, {r8, r9}
6798 - tst r3, #0x4
6799 - ldrne r10,[r1], #4 /* 4 bytes */
6800 - strne r10,[r0], #4
6801 - sub r2, r2, r3
6802 -
6803 -congruent_aligned32:
6804 - /*
6805 - * here source is aligned to 32 bytes.
6806 - */
6807 -
6808 -cached_aligned32:
6809 - subs r2, r2, #32
6810 - blo less_than_32_left
6811 -
6812 - /*
6813 - * We preload a cache-line up to 64 bytes ahead. On the 926, this will
6814 - * stall only until the requested world is fetched, but the linefill
6815 - * continues in the the background.
6816 - * While the linefill is going, we write our previous cache-line
6817 - * into the write-buffer (which should have some free space).
6818 - * When the linefill is done, the writebuffer will
6819 - * start dumping its content into memory
6820 - *
6821 - * While all this is going, we then load a full cache line into
6822 - * 8 registers, this cache line should be in the cache by now
6823 - * (or partly in the cache).
6824 - *
6825 - * This code should work well regardless of the source/dest alignment.
6826 - *
6827 - */
6828 -
6829 - /* Align the preload register to a cache-line because the cpu does
6830 - * "critical word first" (the first word requested is loaded first).
6831 - */
6832 - @ bic r12, r1, #0x1F
6833 - @ add r12, r12, #64
6834 -
6835 -1: ldmia r1!, { r4-r11 }
6836 - subs r2, r2, #32
6837 -
6838 - /*
6839 - * NOTE: if r12 is more than 64 ahead of r1, the following ldrhi
6840 - * for ARM9 preload will not be safely guarded by the preceding subs.
6841 - * When it is safely guarded the only possibility to have SIGSEGV here
6842 - * is because the caller overstates the length.
6843 - */
6844 - @ ldrhi r3, [r12], #32 /* cheap ARM9 preload */
6845 - stmia r0!, { r4-r11 }
6846 - bhs 1b
6847 -
6848 - add r2, r2, #32
6849 -
6850 -less_than_32_left:
6851 - /*
6852 - * less than 32 bytes left at this point (length in r2)
6853 - */
6854 -
6855 - /* skip all this if there is nothing to do, which should
6856 - * be a common case (if not executed the code below takes
6857 - * about 16 cycles)
6858 - */
6859 - tst r2, #0x1F
6860 - beq 1f
6861 -
6862 - /* conditionnaly copies 0 to 31 bytes */
6863 - movs r12, r2, lsl #28
6864 - ldmcs r1!, {r4, r5, r6, r7} /* 16 bytes */
6865 - ldmmi r1!, {r8, r9} /* 8 bytes */
6866 - stmcs r0!, {r4, r5, r6, r7}
6867 - stmmi r0!, {r8, r9}
6868 - movs r12, r2, lsl #30
6869 - ldrcs r3, [r1], #4 /* 4 bytes */
6870 - .word 0x40d140b2 /* ldrhmi r4, [r1], #2 */ /* 2 bytes */
6871 - strcs r3, [r0], #4
6872 - .word 0x40c040b2 /* strhmi r4, [r0], #2 */
6873 - tst r2, #0x1
6874 - .word 0x15d13000 /* ldrbne r3, [r1] */ /* last byte */
6875 - .word 0x15c03000 /* strbne r3, [r0] */
6876 -
6877 - /* we're done! restore everything and return */
6878 -1: ldmfd sp!, {r5-r11}
6879 - ldmfd sp!, {r0, r4, lr}
6880 - tst lr, #1
6881 - moveq pc, lr
6882 - bx lr
6883 -
6884 - /********************************************************************/
6885 -
6886 -non_congruent:
6887 - /*
6888 - * here source is aligned to 4 bytes
6889 - * but destination is not.
6890 - *
6891 - * in the code below r2 is the number of bytes read
6892 - * (the number of bytes written is always smaller, because we have
6893 - * partial words in the shift queue)
6894 - */
6895 - cmp r2, #4
6896 - blo copy_last_3_and_return
6897 -
6898 - /* Use post-incriment mode for stm to spill r5-r11 to reserved stack
6899 - * frame. Don't update sp.
6900 - */
6901 - stmea sp, {r5-r11}
6902 -
6903 - /* compute shifts needed to align src to dest */
6904 - rsb r5, r0, #0
6905 - and r5, r5, #3 /* r5 = # bytes in partial words */
6906 - mov r12, r5, lsl #3 /* r12 = right */
6907 - rsb lr, r12, #32 /* lr = left */
6908 -
6909 - /* read the first word */
6910 - ldr r3, [r1], #4
6911 - sub r2, r2, #4
6912 -
6913 - /* write a partial word (0 to 3 bytes), such that destination
6914 - * becomes aligned to 32 bits (r5 = nb of words to copy for alignment)
6915 - */
6916 - movs r5, r5, lsl #31
6917 - .word 0x44c03001 /* strbmi r3, [r0], #1 */
6918 - movmi r3, r3, lsr #8
6919 - .word 0x24c03001 /* strbcs r3, [r0], #1 */
6920 - movcs r3, r3, lsr #8
6921 - .word 0x24c03001 /* strbcs r3, [r0], #1 */
6922 - movcs r3, r3, lsr #8
6923 -
6924 - cmp r2, #4
6925 - blo partial_word_tail
6926 -
6927 - /* Align destination to 32 bytes (cache line boundary) */
6928 -1: tst r0, #0x1c
6929 - beq 2f
6930 - ldr r5, [r1], #4
6931 - sub r2, r2, #4
6932 - orr r4, r3, r5, lsl lr
6933 - mov r3, r5, lsr r12
6934 - str r4, [r0], #4
6935 - cmp r2, #4
6936 - bhs 1b
6937 - blo partial_word_tail
6938 -
6939 - /* copy 32 bytes at a time */
6940 -2: subs r2, r2, #32
6941 - blo less_than_thirtytwo
6942 -
6943 - /* Use immediate mode for the shifts, because there is an extra cycle
6944 - * for register shifts, which could account for up to 50% of
6945 - * performance hit.
6946 - */
6947 -
6948 - cmp r12, #24
6949 - beq loop24
6950 - cmp r12, #8
6951 - beq loop8
6952 -
6953 -loop16:
6954 - ldr r12, [r1], #4
6955 -1: mov r4, r12
6956 - ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
6957 - subs r2, r2, #32
6958 - ldrhs r12, [r1], #4
6959 - orr r3, r3, r4, lsl #16
6960 - mov r4, r4, lsr #16
6961 - orr r4, r4, r5, lsl #16
6962 - mov r5, r5, lsr #16
6963 - orr r5, r5, r6, lsl #16
6964 - mov r6, r6, lsr #16
6965 - orr r6, r6, r7, lsl #16
6966 - mov r7, r7, lsr #16
6967 - orr r7, r7, r8, lsl #16
6968 - mov r8, r8, lsr #16
6969 - orr r8, r8, r9, lsl #16
6970 - mov r9, r9, lsr #16
6971 - orr r9, r9, r10, lsl #16
6972 - mov r10, r10, lsr #16
6973 - orr r10, r10, r11, lsl #16
6974 - stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
6975 - mov r3, r11, lsr #16
6976 - bhs 1b
6977 - b less_than_thirtytwo
6978 -
6979 -loop8:
6980 - ldr r12, [r1], #4
6981 -1: mov r4, r12
6982 - ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
6983 - subs r2, r2, #32
6984 - ldrhs r12, [r1], #4
6985 - orr r3, r3, r4, lsl #24
6986 - mov r4, r4, lsr #8
6987 - orr r4, r4, r5, lsl #24
6988 - mov r5, r5, lsr #8
6989 - orr r5, r5, r6, lsl #24
6990 - mov r6, r6, lsr #8
6991 - orr r6, r6, r7, lsl #24
6992 - mov r7, r7, lsr #8
6993 - orr r7, r7, r8, lsl #24
6994 - mov r8, r8, lsr #8
6995 - orr r8, r8, r9, lsl #24
6996 - mov r9, r9, lsr #8
6997 - orr r9, r9, r10, lsl #24
6998 - mov r10, r10, lsr #8
6999 - orr r10, r10, r11, lsl #24
7000 - stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
7001 - mov r3, r11, lsr #8
7002 - bhs 1b
7003 - b less_than_thirtytwo
7004 -
7005 -loop24:
7006 - ldr r12, [r1], #4
7007 -1: mov r4, r12
7008 - ldmia r1!, { r5,r6,r7, r8,r9,r10,r11}
7009 - subs r2, r2, #32
7010 - ldrhs r12, [r1], #4
7011 - orr r3, r3, r4, lsl #8
7012 - mov r4, r4, lsr #24
7013 - orr r4, r4, r5, lsl #8
7014 - mov r5, r5, lsr #24
7015 - orr r5, r5, r6, lsl #8
7016 - mov r6, r6, lsr #24
7017 - orr r6, r6, r7, lsl #8
7018 - mov r7, r7, lsr #24
7019 - orr r7, r7, r8, lsl #8
7020 - mov r8, r8, lsr #24
7021 - orr r8, r8, r9, lsl #8
7022 - mov r9, r9, lsr #24
7023 - orr r9, r9, r10, lsl #8
7024 - mov r10, r10, lsr #24
7025 - orr r10, r10, r11, lsl #8
7026 - stmia r0!, {r3,r4,r5,r6, r7,r8,r9,r10}
7027 - mov r3, r11, lsr #24
7028 - bhs 1b
7029 -
7030 -less_than_thirtytwo:
7031 - /* copy the last 0 to 31 bytes of the source */
7032 - rsb r12, lr, #32 /* we corrupted r12, recompute it */
7033 - add r2, r2, #32
7034 - cmp r2, #4
7035 - blo partial_word_tail
7036 -
7037 -1: ldr r5, [r1], #4
7038 - sub r2, r2, #4
7039 - orr r4, r3, r5, lsl lr
7040 - mov r3, r5, lsr r12
7041 - str r4, [r0], #4
7042 - cmp r2, #4
7043 - bhs 1b
7044 -
7045 -partial_word_tail:
7046 - /* we have a partial word in the input buffer */
7047 - movs r5, lr, lsl #(31-3)
7048 - .word 0x44c03001 /* strbmi r3, [r0], #1 */
7049 - movmi r3, r3, lsr #8
7050 - .word 0x24c03001 /* strbcs r3, [r0], #1 */
7051 - movcs r3, r3, lsr #8
7052 - .word 0x24c03001 /* strbcs r3, [r0], #1 */
7053 -
7054 - /* Refill spilled registers from the stack. Don't update sp. */
7055 - ldmfd sp, {r5-r11}
7056 -
7057 -copy_last_3_and_return:
7058 - movs r2, r2, lsl #31 /* copy remaining 0, 1, 2 or 3 bytes */
7059 - .word 0x44d12001 /* ldrbmi r2, [r1], #1 */
7060 - .word 0x24d13001 /* ldrbcs r3, [r1], #1 */
7061 - .word 0x25d1c000 /* ldrbcs r12,[r1] */
7062 - .word 0x44c02001 /* strbmi r2, [r0], #1 */
7063 - .word 0x24c03001 /* strbcs r3, [r0], #1 */
7064 - .word 0x25c0c000 /* strbcs r12,[r0] */
7065 -
7066 - /* we're done! restore sp and spilled registers and return */
7067 - add sp, sp, #28
7068 - ldmfd sp!, {r0, r4, lr}
7069 - tst lr, #1
7070 - moveq pc, lr
7071 - bx lr
7072 --- a/src/string/armel/memcpy.sub
7073 +++ /dev/null
7074 @@ -1 +0,0 @@
7075 -memcpy.s
7076 --- a/src/string/armhf/memcpy.sub
7077 +++ /dev/null
7078 @@ -1 +0,0 @@
7079 -../armel/memcpy.s
7080 --- a/src/thread/__syscall_cp.c
7081 +++ b/src/thread/__syscall_cp.c
7082 @@ -1,9 +1,7 @@
7083 #include "pthread_impl.h"
7084 #include "syscall.h"
7085
7086 -#ifdef SHARED
7087 __attribute__((__visibility__("hidden")))
7088 -#endif
7089 long __syscall_cp_c();
7090
7091 static long sccp(syscall_arg_t nr,
7092 --- a/src/thread/__tls_get_addr.c
7093 +++ b/src/thread/__tls_get_addr.c
7094 @@ -1,16 +1,16 @@
7095 #include <stddef.h>
7096 #include "pthread_impl.h"
7097 +#include "libc.h"
7098 +
7099 +__attribute__((__visibility__("hidden")))
7100 +void *__tls_get_new(size_t *);
7101
7102 void *__tls_get_addr(size_t *v)
7103 {
7104 pthread_t self = __pthread_self();
7105 -#ifdef SHARED
7106 - __attribute__((__visibility__("hidden")))
7107 - void *__tls_get_new(size_t *);
7108 if (v[0]<=(size_t)self->dtv[0])
7109 return (char *)self->dtv[v[0]]+v[1]+DTP_OFFSET;
7110 return __tls_get_new(v);
7111 -#else
7112 - return (char *)self->dtv[1]+v[1]+DTP_OFFSET;
7113 -#endif
7114 }
7115 +
7116 +weak_alias(__tls_get_addr, __tls_get_new);
7117 --- a/src/thread/aarch64/syscall_cp.s
7118 +++ b/src/thread/aarch64/syscall_cp.s
7119 @@ -17,7 +17,7 @@
7120 __syscall_cp_asm:
7121 __cp_begin:
7122 ldr w0,[x0]
7123 - cbnz w0,1f
7124 + cbnz w0,__cp_cancel
7125 mov x8,x1
7126 mov x0,x2
7127 mov x1,x3
7128 @@ -28,6 +28,5 @@ __cp_begin:
7129 svc 0
7130 __cp_end:
7131 ret
7132 -
7133 - // cbnz might not be able to jump far enough
7134 -1: b __cancel
7135 +__cp_cancel:
7136 + b __cancel
7137 --- /dev/null
7138 +++ b/src/thread/arm/__set_thread_area.c
7139 @@ -0,0 +1,49 @@
7140 +#include <stdint.h>
7141 +#include <elf.h>
7142 +#include "pthread_impl.h"
7143 +#include "libc.h"
7144 +
7145 +#define HWCAP_TLS (1 << 15)
7146 +
7147 +extern const unsigned char __attribute__((__visibility__("hidden")))
7148 + __a_barrier_dummy[], __a_barrier_oldkuser[],
7149 + __a_barrier_v6[], __a_barrier_v7[],
7150 + __a_cas_dummy[], __a_cas_v6[], __a_cas_v7[],
7151 + __a_gettp_dummy[];
7152 +
7153 +#define __a_barrier_kuser 0xffff0fa0
7154 +#define __a_cas_kuser 0xffff0fc0
7155 +#define __a_gettp_kuser 0xffff0fe0
7156 +
7157 +extern uintptr_t __attribute__((__visibility__("hidden")))
7158 + __a_barrier_ptr, __a_cas_ptr, __a_gettp_ptr;
7159 +
7160 +#define SET(op,ver) (__a_##op##_ptr = \
7161 + (uintptr_t)__a_##op##_##ver - (uintptr_t)__a_##op##_dummy)
7162 +
7163 +int __set_thread_area(void *p)
7164 +{
7165 +#if !__ARM_ARCH_7A__ && !__ARM_ARCH_7R__ && __ARM_ARCH < 7
7166 + if (__hwcap & HWCAP_TLS) {
7167 + size_t *aux;
7168 + SET(cas, v7);
7169 + SET(barrier, v7);
7170 + for (aux=libc.auxv; *aux; aux+=2) {
7171 + if (*aux != AT_PLATFORM) continue;
7172 + const char *s = (void *)aux[1];
7173 + if (s[0]!='v' || s[1]!='6' || s[2]-'0'<10u) break;
7174 + SET(cas, v6);
7175 + SET(barrier, v6);
7176 + break;
7177 + }
7178 + } else {
7179 + int ver = *(int *)0xffff0ffc;
7180 + SET(gettp, kuser);
7181 + SET(cas, kuser);
7182 + SET(barrier, kuser);
7183 + if (ver < 2) a_crash();
7184 + if (ver < 3) SET(barrier, oldkuser);
7185 + }
7186 +#endif
7187 + return __syscall(0xf0005, p);
7188 +}
7189 --- a/src/thread/arm/__set_thread_area.s
7190 +++ /dev/null
7191 @@ -1 +0,0 @@
7192 -/* Replaced by C code in arch/arm/src */
7193 --- a/src/thread/arm/__unmapself.s
7194 +++ b/src/thread/arm/__unmapself.s
7195 @@ -1,3 +1,4 @@
7196 +.syntax unified
7197 .text
7198 .global __unmapself
7199 .type __unmapself,%function
7200 --- /dev/null
7201 +++ b/src/thread/arm/atomics.s
7202 @@ -0,0 +1,111 @@
7203 +.syntax unified
7204 +.text
7205 +
7206 +.global __a_barrier
7207 +.hidden __a_barrier
7208 +.type __a_barrier,%function
7209 +__a_barrier:
7210 + ldr ip,1f
7211 + ldr ip,[pc,ip]
7212 + add pc,pc,ip
7213 +1: .word __a_barrier_ptr-1b
7214 +.global __a_barrier_dummy
7215 +.hidden __a_barrier_dummy
7216 +__a_barrier_dummy:
7217 + bx lr
7218 +.global __a_barrier_oldkuser
7219 +.hidden __a_barrier_oldkuser
7220 +__a_barrier_oldkuser:
7221 + push {r0,r1,r2,r3,ip,lr}
7222 + mov r1,r0
7223 + mov r2,sp
7224 + ldr ip,=0xffff0fc0
7225 + mov lr,pc
7226 + mov pc,ip
7227 + pop {r0,r1,r2,r3,ip,lr}
7228 + bx lr
7229 +.global __a_barrier_v6
7230 +.hidden __a_barrier_v6
7231 +__a_barrier_v6:
7232 + mcr p15,0,r0,c7,c10,5
7233 + bx lr
7234 +.global __a_barrier_v7
7235 +.hidden __a_barrier_v7
7236 +__a_barrier_v7:
7237 + .word 0xf57ff05b /* dmb ish */
7238 + bx lr
7239 +
7240 +.global __a_cas
7241 +.hidden __a_cas
7242 +.type __a_cas,%function
7243 +__a_cas:
7244 + ldr ip,1f
7245 + ldr ip,[pc,ip]
7246 + add pc,pc,ip
7247 +1: .word __a_cas_ptr-1b
7248 +.global __a_cas_dummy
7249 +.hidden __a_cas_dummy
7250 +__a_cas_dummy:
7251 + mov r3,r0
7252 + ldr r0,[r2]
7253 + subs r0,r3,r0
7254 + streq r1,[r2]
7255 + bx lr
7256 +.global __a_cas_v6
7257 +.hidden __a_cas_v6
7258 +__a_cas_v6:
7259 + mov r3,r0
7260 + mcr p15,0,r0,c7,c10,5
7261 +1: .word 0xe1920f9f /* ldrex r0,[r2] */
7262 + subs r0,r3,r0
7263 + .word 0x01820f91 /* strexeq r0,r1,[r2] */
7264 + teqeq r0,#1
7265 + beq 1b
7266 + mcr p15,0,r0,c7,c10,5
7267 + bx lr
7268 +.global __a_cas_v7
7269 +.hidden __a_cas_v7
7270 +__a_cas_v7:
7271 + mov r3,r0
7272 + .word 0xf57ff05b /* dmb ish */
7273 +1: .word 0xe1920f9f /* ldrex r0,[r2] */
7274 + subs r0,r3,r0
7275 + .word 0x01820f91 /* strexeq r0,r1,[r2] */
7276 + teqeq r0,#1
7277 + beq 1b
7278 + .word 0xf57ff05b /* dmb ish */
7279 + bx lr
7280 +
7281 +.global __aeabi_read_tp
7282 +.type __aeabi_read_tp,%function
7283 +__aeabi_read_tp:
7284 +
7285 +.global __a_gettp
7286 +.hidden __a_gettp
7287 +.type __a_gettp,%function
7288 +__a_gettp:
7289 + ldr r0,1f
7290 + ldr r0,[pc,r0]
7291 + add pc,pc,r0
7292 +1: .word __a_gettp_ptr-1b
7293 +.global __a_gettp_dummy
7294 +.hidden __a_gettp_dummy
7295 +__a_gettp_dummy:
7296 + mrc p15,0,r0,c13,c0,3
7297 + bx lr
7298 +
7299 +.data
7300 +.global __a_barrier_ptr
7301 +.hidden __a_barrier_ptr
7302 +__a_barrier_ptr:
7303 + .word 0
7304 +
7305 +.global __a_cas_ptr
7306 +.hidden __a_cas_ptr
7307 +__a_cas_ptr:
7308 + .word 0
7309 +
7310 +.global __a_gettp_ptr
7311 +.hidden __a_gettp_ptr
7312 +__a_gettp_ptr:
7313 + .word 0
7314 --- a/src/thread/arm/clone.s
7315 +++ b/src/thread/arm/clone.s
7316 @@ -1,3 +1,4 @@
7317 +.syntax unified
7318 .text
7319 .global __clone
7320 .type __clone,%function
7321 @@ -15,8 +16,6 @@ __clone:
7322 tst r0,r0
7323 beq 1f
7324 ldmfd sp!,{r4,r5,r6,r7}
7325 - tst lr,#1
7326 - moveq pc,lr
7327 bx lr
7328
7329 1: mov r0,r6
7330 --- a/src/thread/arm/syscall_cp.s
7331 +++ b/src/thread/arm/syscall_cp.s
7332 @@ -1,3 +1,4 @@
7333 +.syntax unified
7334 .global __cp_begin
7335 .hidden __cp_begin
7336 .global __cp_end
7337 @@ -22,8 +23,6 @@ __cp_begin:
7338 svc 0
7339 __cp_end:
7340 ldmfd sp!,{r4,r5,r6,r7,lr}
7341 - tst lr,#1
7342 - moveq pc,lr
7343 bx lr
7344 __cp_cancel:
7345 ldmfd sp!,{r4,r5,r6,r7,lr}
7346 --- a/src/thread/microblaze/syscall_cp.s
7347 +++ b/src/thread/microblaze/syscall_cp.s
7348 @@ -11,7 +11,7 @@
7349 __syscall_cp_asm:
7350 __cp_begin:
7351 lwi r5, r5, 0
7352 - bnei r5, __cancel
7353 + bnei r5, __cp_cancel
7354 addi r12, r6, 0
7355 add r5, r7, r0
7356 add r6, r8, r0
7357 @@ -23,3 +23,5 @@ __cp_begin:
7358 __cp_end:
7359 rtsd r15, 8
7360 nop
7361 +__cp_cancel:
7362 + bri __cancel
7363 --- a/src/thread/or1k/syscall_cp.s
7364 +++ b/src/thread/or1k/syscall_cp.s
7365 @@ -12,7 +12,7 @@ __syscall_cp_asm:
7366 __cp_begin:
7367 l.lwz r3, 0(r3)
7368 l.sfeqi r3, 0
7369 - l.bnf __cancel
7370 + l.bnf __cp_cancel
7371 l.ori r11, r4, 0
7372 l.ori r3, r5, 0
7373 l.ori r4, r6, 0
7374 @@ -24,3 +24,6 @@ __cp_begin:
7375 __cp_end:
7376 l.jr r9
7377 l.nop
7378 +__cp_cancel:
7379 + l.j __cancel
7380 + l.nop
7381 --- a/src/thread/powerpc/syscall_cp.s
7382 +++ b/src/thread/powerpc/syscall_cp.s
7383 @@ -38,7 +38,7 @@ __cp_begin:
7384 cmpwi cr7, 0, 0 #compare r0 with 0, store result in cr7.
7385 beq+ cr7, 1f #jump to label 1 if r0 was 0
7386
7387 - b __cancel #else call cancel
7388 + b __cp_cancel #else call cancel
7389 1:
7390 #ok, the cancel flag was not set
7391 # syscall: number goes to r0, the rest 3-8
7392 @@ -55,3 +55,5 @@ __cp_end:
7393 #else negate result.
7394 neg 3, 3
7395 blr
7396 +__cp_cancel:
7397 + b __cancel
7398 --- a/src/thread/pthread_cancel.c
7399 +++ b/src/thread/pthread_cancel.c
7400 @@ -1,12 +1,11 @@
7401 +#define _GNU_SOURCE
7402 #include <string.h>
7403 #include "pthread_impl.h"
7404 #include "syscall.h"
7405 #include "libc.h"
7406
7407 -#ifdef SHARED
7408 __attribute__((__visibility__("hidden")))
7409 -#endif
7410 -long __cancel(), __cp_cancel(), __syscall_cp_asm(), __syscall_cp_c();
7411 +long __cancel(), __syscall_cp_asm(), __syscall_cp_c();
7412
7413 long __cancel()
7414 {
7415 @@ -17,12 +16,6 @@ long __cancel()
7416 return -ECANCELED;
7417 }
7418
7419 -/* If __syscall_cp_asm has adjusted the stack pointer, it must provide a
7420 - * definition of __cp_cancel to undo those adjustments and call __cancel.
7421 - * Otherwise, __cancel provides a definition for __cp_cancel. */
7422 -
7423 -weak_alias(__cancel, __cp_cancel);
7424 -
7425 long __syscall_cp_asm(volatile void *, syscall_arg_t,
7426 syscall_arg_t, syscall_arg_t, syscall_arg_t,
7427 syscall_arg_t, syscall_arg_t, syscall_arg_t);
7428 @@ -52,24 +45,22 @@ static void _sigaddset(sigset_t *set, in
7429 set->__bits[s/8/sizeof *set->__bits] |= 1UL<<(s&8*sizeof *set->__bits-1);
7430 }
7431
7432 -#ifdef SHARED
7433 __attribute__((__visibility__("hidden")))
7434 -#endif
7435 -extern const char __cp_begin[1], __cp_end[1];
7436 +extern const char __cp_begin[1], __cp_end[1], __cp_cancel[1];
7437
7438 static void cancel_handler(int sig, siginfo_t *si, void *ctx)
7439 {
7440 pthread_t self = __pthread_self();
7441 ucontext_t *uc = ctx;
7442 - const char *ip = ((char **)&uc->uc_mcontext)[CANCEL_REG_IP];
7443 + uintptr_t pc = uc->uc_mcontext.MC_PC;
7444
7445 a_barrier();
7446 if (!self->cancel || self->canceldisable == PTHREAD_CANCEL_DISABLE) return;
7447
7448 _sigaddset(&uc->uc_sigmask, SIGCANCEL);
7449
7450 - if (self->cancelasync || ip >= __cp_begin && ip < __cp_end) {
7451 - ((char **)&uc->uc_mcontext)[CANCEL_REG_IP] = (char *)__cp_cancel;
7452 + if (self->cancelasync || pc >= (uintptr_t)__cp_begin && pc < (uintptr_t)__cp_end) {
7453 + uc->uc_mcontext.MC_PC = (uintptr_t)__cp_cancel;
7454 return;
7455 }
7456
7457 --- /dev/null
7458 +++ b/src/thread/sh/__set_thread_area.c
7459 @@ -0,0 +1,40 @@
7460 +#include "pthread_impl.h"
7461 +#include "libc.h"
7462 +#include <elf.h>
7463 +
7464 +/* Also perform sh-specific init */
7465 +
7466 +#define CPU_HAS_LLSC 0x0040
7467 +#define CPU_HAS_CAS_L 0x0400
7468 +
7469 +__attribute__((__visibility__("hidden")))
7470 +extern const char __sh_cas_gusa[], __sh_cas_llsc[], __sh_cas_imask[], __sh_cas_cas_l[];
7471 +
7472 +__attribute__((__visibility__("hidden")))
7473 +const void *__sh_cas_ptr;
7474 +
7475 +__attribute__((__visibility__("hidden")))
7476 +unsigned __sh_nommu;
7477 +
7478 +int __set_thread_area(void *p)
7479 +{
7480 + size_t *aux;
7481 + __asm__ __volatile__ ( "ldc %0, gbr" : : "r"(p) : "memory" );
7482 +#ifndef __SH4A__
7483 + __sh_cas_ptr = __sh_cas_gusa;
7484 +#if !defined(__SH3__) && !defined(__SH4__)
7485 + for (aux=libc.auxv; *aux; aux+=2) {
7486 + if (*aux != AT_PLATFORM) continue;
7487 + const char *s = (void *)aux[1];
7488 + if (s[0]!='s' || s[1]!='h' || s[2]!='2' || s[3]-'0'<10u) break;
7489 + __sh_cas_ptr = __sh_cas_imask;
7490 + __sh_nommu = 1;
7491 + }
7492 +#endif
7493 + if (__hwcap & CPU_HAS_CAS_L)
7494 + __sh_cas_ptr = __sh_cas_cas_l;
7495 + else if (__hwcap & CPU_HAS_LLSC)
7496 + __sh_cas_ptr = __sh_cas_llsc;
7497 +#endif
7498 + return 0;
7499 +}
7500 --- /dev/null
7501 +++ b/src/thread/sh/atomics.s
7502 @@ -0,0 +1,65 @@
7503 +/* Contract for all versions is same as cas.l r2,r3,@r0
7504 + * pr and r1 are also clobbered (by jsr & r1 as temp).
7505 + * r0,r2,r4-r15 must be preserved.
7506 + * r3 contains result (==r2 iff cas succeeded). */
7507 +
7508 + .align 2
7509 +.global __sh_cas_gusa
7510 +.hidden __sh_cas_gusa
7511 +__sh_cas_gusa:
7512 + mov.l r5,@-r15
7513 + mov.l r4,@-r15
7514 + mov r0,r4
7515 + mova 1f,r0
7516 + mov r15,r1
7517 + mov #(0f-1f),r15
7518 +0: mov.l @r4,r5
7519 + cmp/eq r5,r2
7520 + bf 1f
7521 + mov.l r3,@r4
7522 +1: mov r1,r15
7523 + mov r5,r3
7524 + mov r4,r0
7525 + mov.l @r15+,r4
7526 + rts
7527 + mov.l @r15+,r5
7528 +
7529 +.global __sh_cas_llsc
7530 +.hidden __sh_cas_llsc
7531 +__sh_cas_llsc:
7532 + mov r0,r1
7533 + synco
7534 +0: movli.l @r1,r0
7535 + cmp/eq r0,r2
7536 + bf 1f
7537 + mov r3,r0
7538 + movco.l r0,@r1
7539 + bf 0b
7540 + mov r2,r0
7541 +1: synco
7542 + mov r0,r3
7543 + rts
7544 + mov r1,r0
7545 +
7546 +.global __sh_cas_imask
7547 +.hidden __sh_cas_imask
7548 +__sh_cas_imask:
7549 + mov r0,r1
7550 + stc sr,r0
7551 + mov.l r0,@-r15
7552 + or #0xf0,r0
7553 + ldc r0,sr
7554 + mov.l @r1,r0
7555 + cmp/eq r0,r2
7556 + bf 1f
7557 + mov.l r3,@r1
7558 +1: ldc.l @r15+,sr
7559 + mov r0,r3
7560 + rts
7561 + mov r1,r0
7562 +
7563 +.global __sh_cas_cas_l
7564 +.hidden __sh_cas_cas_l
7565 +__sh_cas_cas_l:
7566 + rts
7567 + .word 0x2323 /* cas.l r2,r3,@r0 */
7568 --- a/src/thread/sh/syscall_cp.s
7569 +++ b/src/thread/sh/syscall_cp.s
7570 @@ -14,17 +14,8 @@ __syscall_cp_asm:
7571 __cp_begin:
7572 mov.l @r4, r4
7573 tst r4, r4
7574 - bt 2f
7575 -
7576 - mov.l L1, r0
7577 - braf r0
7578 - nop
7579 -1:
7580 -
7581 -.align 2
7582 -L1: .long __cancel@PLT-(1b-.)
7583 -
7584 -2: mov r5, r3
7585 + bf __cp_cancel
7586 + mov r5, r3
7587 mov r6, r4
7588 mov r7, r5
7589 mov.l @r15, r6
7590 @@ -43,3 +34,12 @@ __cp_end:
7591
7592 rts
7593 nop
7594 +
7595 +__cp_cancel:
7596 + mov.l 2f, r0
7597 + braf r0
7598 + nop
7599 +1:
7600 +
7601 +.align 2
7602 +2: .long __cancel@PCREL-(1b-.)
7603 --- a/src/thread/x32/syscall_cp.s
7604 +++ b/src/thread/x32/syscall_cp.s
7605 @@ -14,7 +14,7 @@ __syscall_cp_internal:
7606 __cp_begin:
7607 mov (%rdi),%eax
7608 test %eax,%eax
7609 - jnz __cancel
7610 + jnz __cp_cancel
7611 mov %rdi,%r11
7612 mov %rsi,%rax
7613 mov %rdx,%rdi
7614 @@ -27,3 +27,5 @@ __cp_begin:
7615 syscall
7616 __cp_end:
7617 ret
7618 +__cp_cancel:
7619 + jmp __cancel
7620 --- a/src/thread/x86_64/syscall_cp.s
7621 +++ b/src/thread/x86_64/syscall_cp.s
7622 @@ -14,7 +14,7 @@ __syscall_cp_asm:
7623 __cp_begin:
7624 mov (%rdi),%eax
7625 test %eax,%eax
7626 - jnz __cancel
7627 + jnz __cp_cancel
7628 mov %rdi,%r11
7629 mov %rsi,%rax
7630 mov %rdx,%rdi
7631 @@ -27,3 +27,5 @@ __cp_begin:
7632 syscall
7633 __cp_end:
7634 ret
7635 +__cp_cancel:
7636 + jmp __cancel