head	1.7;
access;
symbols
	netbsd-11-0-RC3:1.7
	netbsd-11-0-RC2:1.7
	netbsd-11-0-RC1:1.7
	perseant-exfatfs-base-20250801:1.7
	netbsd-11:1.7.0.2
	netbsd-11-base:1.7
	netbsd-10-1-RELEASE:1.5.10.2
	perseant-exfatfs-base-20240630:1.6
	perseant-exfatfs:1.6.0.2
	perseant-exfatfs-base:1.6
	netbsd-8-3-RELEASE:1.4
	netbsd-9-4-RELEASE:1.5.2.1
	netbsd-10-0-RELEASE:1.5.10.1
	netbsd-10-0-RC6:1.5.10.1
	netbsd-10-0-RC5:1.5.10.1
	netbsd-10-0-RC4:1.5.10.1
	netbsd-10-0-RC3:1.5.10.1
	netbsd-10-0-RC2:1.5.10.1
	netbsd-10-0-RC1:1.5.10.1
	netbsd-10:1.5.0.10
	netbsd-10-base:1.5
	netbsd-9-3-RELEASE:1.5
	cjep_sun2x-base1:1.5
	cjep_sun2x:1.5.0.8
	cjep_sun2x-base:1.5
	cjep_staticlib_x-base1:1.5
	netbsd-9-2-RELEASE:1.5
	cjep_staticlib_x:1.5.0.6
	cjep_staticlib_x-base:1.5
	netbsd-9-1-RELEASE:1.5
	phil-wifi-20200421:1.5
	phil-wifi-20200411:1.5
	is-mlppp:1.5.0.4
	is-mlppp-base:1.5
	phil-wifi-20200406:1.5
	netbsd-8-2-RELEASE:1.4
	netbsd-9-0-RELEASE:1.5
	netbsd-9-0-RC2:1.5
	netbsd-9-0-RC1:1.5
	phil-wifi-20191119:1.5
	netbsd-9:1.5.0.2
	netbsd-9-base:1.5
	phil-wifi-20190609:1.5
	netbsd-8-1-RELEASE:1.4
	netbsd-8-1-RC1:1.4
	pgoyette-compat-merge-20190127:1.4.42.1
	pgoyette-compat-20190127:1.5
	pgoyette-compat-20190118:1.5
	pgoyette-compat-1226:1.5
	pgoyette-compat-1126:1.4
	pgoyette-compat-1020:1.4
	pgoyette-compat-0930:1.4
	pgoyette-compat-0906:1.4
	netbsd-7-2-RELEASE:1.4
	pgoyette-compat-0728:1.4
	netbsd-8-0-RELEASE:1.4
	phil-wifi:1.4.0.44
	phil-wifi-base:1.4
	pgoyette-compat-0625:1.4
	netbsd-8-0-RC2:1.4
	pgoyette-compat-0521:1.4
	pgoyette-compat-0502:1.4
	pgoyette-compat-0422:1.4
	netbsd-8-0-RC1:1.4
	pgoyette-compat-0415:1.4
	pgoyette-compat-0407:1.4
	pgoyette-compat-0330:1.4
	pgoyette-compat-0322:1.4
	pgoyette-compat-0315:1.4
	netbsd-7-1-2-RELEASE:1.4
	pgoyette-compat:1.4.0.42
	pgoyette-compat-base:1.4
	netbsd-7-1-1-RELEASE:1.4
	matt-nb8-mediatek:1.4.0.40
	matt-nb8-mediatek-base:1.4
	perseant-stdc-iso10646:1.4.0.38
	perseant-stdc-iso10646-base:1.4
	netbsd-8:1.4.0.36
	netbsd-8-base:1.4
	prg-localcount2-base3:1.4
	prg-localcount2-base2:1.4
	prg-localcount2-base1:1.4
	prg-localcount2:1.4.0.34
	prg-localcount2-base:1.4
	pgoyette-localcount-20170426:1.4
	bouyer-socketcan-base1:1.4
	pgoyette-localcount-20170320:1.4
	netbsd-7-1:1.4.0.32
	netbsd-7-1-RELEASE:1.4
	netbsd-7-1-RC2:1.4
	netbsd-7-nhusb-base-20170116:1.4
	bouyer-socketcan:1.4.0.30
	bouyer-socketcan-base:1.4
	pgoyette-localcount-20170107:1.4
	netbsd-7-1-RC1:1.4
	pgoyette-localcount-20161104:1.4
	netbsd-7-0-2-RELEASE:1.4
	localcount-20160914:1.4
	netbsd-7-nhusb:1.4.0.28
	netbsd-7-nhusb-base:1.4
	pgoyette-localcount-20160806:1.4
	pgoyette-localcount-20160726:1.4
	pgoyette-localcount:1.4.0.26
	pgoyette-localcount-base:1.4
	netbsd-7-0-1-RELEASE:1.4
	netbsd-7-0:1.4.0.24
	netbsd-7-0-RELEASE:1.4
	netbsd-7-0-RC3:1.4
	netbsd-7-0-RC2:1.4
	netbsd-7-0-RC1:1.4
	netbsd-6-0-6-RELEASE:1.4
	netbsd-6-1-5-RELEASE:1.4
	netbsd-7:1.4.0.22
	netbsd-7-base:1.4
	yamt-pagecache-base9:1.4
	yamt-pagecache-tag8:1.4
	netbsd-6-1-4-RELEASE:1.4
	netbsd-6-0-5-RELEASE:1.4
	tls-earlyentropy:1.4.0.20
	tls-earlyentropy-base:1.4
	riastradh-xf86-video-intel-2-7-1-pre-2-21-15:1.4
	riastradh-drm2-base3:1.4
	netbsd-6-1-3-RELEASE:1.4
	netbsd-6-0-4-RELEASE:1.4
	netbsd-6-1-2-RELEASE:1.4
	netbsd-6-0-3-RELEASE:1.4
	netbsd-6-1-1-RELEASE:1.4
	riastradh-drm2-base2:1.4
	riastradh-drm2-base1:1.4
	riastradh-drm2:1.4.0.14
	riastradh-drm2-base:1.4
	netbsd-6-1:1.4.0.18
	netbsd-6-0-2-RELEASE:1.4
	netbsd-6-1-RELEASE:1.4
	netbsd-6-1-RC4:1.4
	netbsd-6-1-RC3:1.4
	agc-symver:1.4.0.16
	agc-symver-base:1.4
	netbsd-6-1-RC2:1.4
	netbsd-6-1-RC1:1.4
	yamt-pagecache-base8:1.4
	netbsd-6-0-1-RELEASE:1.4
	yamt-pagecache-base7:1.4
	matt-nb6-plus-nbase:1.4
	yamt-pagecache-base6:1.4
	netbsd-6-0:1.4.0.12
	netbsd-6-0-RELEASE:1.4
	netbsd-6-0-RC2:1.4
	tls-maxphys:1.4.0.10
	tls-maxphys-base:1.4
	matt-nb6-plus:1.4.0.8
	matt-nb6-plus-base:1.4
	netbsd-6-0-RC1:1.4
	yamt-pagecache-base5:1.4
	yamt-pagecache-base4:1.4
	netbsd-6:1.4.0.6
	netbsd-6-base:1.4
	yamt-pagecache-base3:1.4
	yamt-pagecache-base2:1.4
	yamt-pagecache:1.4.0.4
	yamt-pagecache-base:1.4
	cherry-xenmp:1.4.0.2
	cherry-xenmp-base:1.4;
locks; strict;
comment	@# @;


1.7
date	2024.07.22.23.14.25;	author riastradh;	state Exp;
branches;
next	1.6;
commitid	jOzzf8S2YZYPdTiF;

1.6
date	2023.06.04.01.24.56;	author joerg;	state Exp;
branches
	1.6.2.1;
next	1.5;
commitid	jBAtDE6SYslzbzrE;

1.5
date	2018.12.01.13.01.57;	author skrll;	state Exp;
branches
	1.5.2.1
	1.5.10.1;
next	1.4;
commitid	u4az0fzUYak4362B;

1.4
date	2011.03.10.17.38.30;	author joerg;	state Exp;
branches
	1.4.42.1
	1.4.44.1;
next	1.3;

1.3
date	2011.03.10.17.22.17;	author joerg;	state Exp;
branches;
next	1.2;

1.2
date	2011.03.10.14.27.31;	author joerg;	state Exp;
branches;
next	1.1;

1.1
date	2011.03.09.23.10.07;	author joerg;	state Exp;
branches;
next	;

1.6.2.1
date	2025.08.02.05.55.01;	author perseant;	state Exp;
branches;
next	;
commitid	23j6GFaDws3O875G;

1.5.2.1
date	2023.08.04.12.55.45;	author martin;	state Exp;
branches;
next	1.5.2.2;
commitid	WQUk0iGBkKS39tzE;

1.5.2.2
date	2024.08.07.11.01.57;	author martin;	state Exp;
branches;
next	;
commitid	bcLmwfKbV11GESkF;

1.5.10.1
date	2023.08.01.16.34.56;	author martin;	state Exp;
branches;
next	1.5.10.2;
commitid	ygiHGMga8HEgs6zE;

1.5.10.2
date	2024.08.07.11.00.12;	author martin;	state Exp;
branches;
next	;
commitid	jRVMntqupzW4ESkF;

1.4.42.1
date	2018.12.26.14.01.28;	author pgoyette;	state Exp;
branches;
next	;
commitid	xUhK8IAeBM1azj5B;

1.4.44.1
date	2019.06.10.22.05.29;	author christos;	state Exp;
branches;
next	;
commitid	jtc8rnCzWiEEHGqB;


desc
@@


1.7
log
@ld.elf_so: Sprinkle comments and references for thread-local storage.

Maybe this will help the TLS business to be less mysterious to the
next traveller to pass by here.

Prompted by PR lib/58154.
@
text
@Thread-local storage.

Each thread has a thread control block, or TCB.  The TCB is a
variable-size structure headed by `struct tls_tcb' from <sys/tls.h>,
with:

(a) static thread-local storage for the TLS data of initial objects,
    i.e., those loaded at startup rather than those dynamically loaded
    by dlopen

(b) a pointer to a dynamic thread vector (DTV) for the TLS data
    pointers of objects that use global-dynamic or local-dynamic models
    (typically shared libraries or dlopenable modules)

(c) the pthread_t pointer

The per-thread lwp private pointer, also sometimes called TP (thread
pointer), managed by the _lwp_setprivate and _lwp_setprivate syscalls,
either points at the TCB directly, or, on some architectures, points at

	tp = tcb + sizeof(struct tls_tcb) + TLS_TP_OFFSET.

This bias is chosen for architectures where signed displacements from
TP enable twice the range of static TLS offsets when biased like this.
Architectures with such a tp/tcb offset must provide

void *__lwp_gettcb_fast(void);

in machine/mcontext.h and must define __HAVE___LWP_GETTCB_FAST in
machine/types.h to reflect this; otherwise they must provide
__lwp_getprivate_fast to return the TCB pointer.

Each architecture has one of two TLS variants, variant I or variant II.
Variant I places the static thread-local storage _after_ the fixed
content of the TCB, at increasing addresses (increasing addresses grow
down in diagram):

	+---------------+
	| dtv pointer   |       tcb points here (struct tls_tcb)
	+---------------+
	| pthread_t     |
	+---------------+
	| obj0 tls      |       obj0->tlsoffset = 0
	|               |
	|               |
	+---------------+
	| obj1 tls      |       obj1->tlsoffset = 3
	+---------------+
	| obj2 tls      |       obj2->tlsoffset = 4
	|               |
	.		.
	.		.
	.		.
	|               |
	+---------------+
	| objN tls      |       objN->tlsoffset = k
	+---------------+

Variant II places the static thread-local storage _before_ the fixed
content of the TCB, at decreasing addresses:

	+---------------+
	| objN tls      |       objN->tlsoffset = k
	+---------------+
	| obj(N-1) tls  |       obj(N-1)->tlsoffset = k - 1
	.               .
	.               .
	.               .
	|               |
	+---------------+
	| obj2 tls      |       obj2->tlsoffset = 4
	+---------------+
	| obj1 tls      |       obj1->tlsoffset = 3
	+---------------+
	| obj0 tls      |       obj0->tlsoffset = 0
	|               |
	|               |
	+---------------+
	| tcb pointer   |       tcb points here (struct tls_tcb)
	+---------------+
	| dtv pointer   |
	+---------------+
	| pthread_t     |
	+---------------+

See [ELFTLS] Sec. 3 `Run-Time Handling of TLS', Figs 1 and 2, for
bigger pictures including the DTV and dynamically allocated TLS blocks.

Each architecture also has its own ELF ABI processor supplement with
the architecture-specific relocations and TLS details.

References:

	[ELFTLS] Ulrich Drepper, `ELF Handling For Thread-Local
	Storage', Version 0.21, 2023-08-22.
	https://akkadia.org/drepper/tls.pdf
	https://web.archive.org/web/20240718081934/https://akkadia.org/drepper/tls.pdf

Steps for adding TLS support for a new platform:

(1) Declare TLS variant in machine/types.h by defining either
__HAVE_TLS_VARIANT_I or __HAVE_TLS_VARIANT_II.

(2) _lwp_makecontext has to set the reserved register or kernel
transfer variable in uc_mcontext according to the provided value of
`private'.  Note that _lwp_makecontext takes tcb, not tp, as an
argument, so make sure to adjust it if needed for the tp/tcb offset.
See src/lib/libc/arch/$PLATFORM/gen/_lwp.c.

This is not possible on the VAX as there is no free space in ucontext_t.
This requires either a special version of _lwp_create or versioning
everything using ucontext_t. Debug support depends on getting the data from
ucontext_t, so the second option is possibly required.

(3) _lwp_setprivate(2) has to update the same register as
_lwp_makecontext uses for the private area pointer. Normally
cpu_lwp_setprivate is provided by MD to reflect the kernel view and
enabled by defining __HAVE_CPU_LWP_SETPRIVATE in machine/types.h.
cpu_setmcontext is responsible for keeping the MI l_private field
synchronised by calling lwp_setprivate as needed.

cpu_switchto has to update the mapping.

_lwp_setprivate is used for the initial thread, all other threads
created by libpthread use _lwp_makecontext for this purpose.

(4) Provide __tls_get_addr and possible other MD functions for dynamic
TLS offset computation. If such alternative entry points exist (currently
only i386), also add a weak reference to 0 in src/lib/libc/tls/tls.c.

The generic implementation can be found in tls.c and is used with
__HAVE_COMMON___TLS_GET_ADDR. It depends on __lwp_getprivate_fast
(see below).

(5) Implement the necessary relocation records in mdreloc.c.  There are
typically three relocation types found in dynamic binaries:

(a) R_TYPE(TLS_DTPOFF): Offset inside the module.  The common TLS code
ensures that the DTV vector points to offset 0 inside the module TLS block.
This is normally def->st_value + rela->r_addend.

(b) R_TYPE(TLS_DTPMOD): Module index.

(c) R_TYPE(TLS_TPOFF): Static TLS offset.  The code has to check whether
the static TLS offset for this module has been allocated
(defobj->tls_static) and otherwise call _rtld_tls_offset_allocate().  This
may fail if no static space is available and the object has been pulled
in via dlopen(3). It can also fail if the TLS area has already been used
via a global-dynamic allocation.

For TLS Variant I, this is typically:

def->st_value + rela->r_addend + defobj->tlsoffset + sizeof(struct tls_tcb)

e.g. the relocation doesn't include the fixed TCB.

For TLS Variant II, this is typically:

def->st_value - defobj->tlsoffset + rela->r_addend

e.g. starting offset is counting down from the TCB.

(6) If there is a tp/tcb offset, implement

	__lwp_gettcb_fast()
	__lwp_settcb()

in machine/mcontext.h and set

	__HAVE___LWP_GETTCB_FAST
	__HAVE___LWP_SETTCB

in machine/types.h.

Otherwise, implement __lwp_getprivate_fast() in machine/mcontext.h and
set __HAVE___LWP_GETPRIVATE_FAST in machine/types.h.

(7) Test using src/tests/lib/libc/tls and src/tests/libexec/ld.elf_so.
Make sure with "objdump -R" that t_tls_dynamic has two TPOFF
relocations and h_tls_dlopen.so.1 and libh_tls_dynamic.so.1 have both
two DTPMOD and DTPOFF relocations.
@


1.6
log
@Fix interactions of initial-exec TLS model and dlopen

(1) If an initial-exec relocation was used for a non-local symbol
(i.e. the definition of the symbol is in a different DSO), the
computation of the static TLS offset used the wrong DSO.
This would effectively mean the wrong address was computed
(PR toolchain/50277, PR pkg/57445).

Fix this by forcing the computation of the correct DSO (the one defining
the symbol).

This code uses __UNCONST to avoid the vast interface changes for this
special case.

(2) If symbols from a DSO loaded via dlopen are used with both
global-dynamic/local-dynamic and initial-exec relocations AND
a initial-exec relocation was resolved first in a thread, a split brain
situation could exist where the dynamic relocations would use one memory
block (separate allocation) and the initial-exec relocations the static
per-thread TLS space.

(3) If the initial-exec relocation in (2) is seen after any thread has
already used a GD/LD allocation, bail out. Since IE relocations are used
only in the GOT, this will prevent the dlopen. This is a bit more
aggressive than necessary, but a full blown reference counting doesn't
seem to be justified.
@
text
@d1 98
d104 5
a108 3
(2) _lwp_makecontext has to set the reserved register or kernel transfer
variable in uc_mcontext to the provided value of 'private'. See
src/lib/libc/arch/$PLATFORM/gen/_lwp.c.
d163 14
a176 2
(6) Implement __lwp_getprivate_fast() in machine/mcontext.h and set
__HAVE___LWP_GETPRIVATE_FAST in machine/types.h.
d178 4
a181 3
(7) Test using src/tests/lib/libc/tls.  Make sure with "objdump -R" that
t_tls_dynamic has two TPOFF relocations and h_tls_dlopen.so.1 and
libh_tls_dynamic.so.1 have both two DTPMOD and DTPOFF relocations.
@


1.6.2.1
log
@Sync with HEAD
@
text
@a0 98
Thread-local storage.

Each thread has a thread control block, or TCB.  The TCB is a
variable-size structure headed by `struct tls_tcb' from <sys/tls.h>,
with:

(a) static thread-local storage for the TLS data of initial objects,
    i.e., those loaded at startup rather than those dynamically loaded
    by dlopen

(b) a pointer to a dynamic thread vector (DTV) for the TLS data
    pointers of objects that use global-dynamic or local-dynamic models
    (typically shared libraries or dlopenable modules)

(c) the pthread_t pointer

The per-thread lwp private pointer, also sometimes called TP (thread
pointer), managed by the _lwp_setprivate and _lwp_setprivate syscalls,
either points at the TCB directly, or, on some architectures, points at

	tp = tcb + sizeof(struct tls_tcb) + TLS_TP_OFFSET.

This bias is chosen for architectures where signed displacements from
TP enable twice the range of static TLS offsets when biased like this.
Architectures with such a tp/tcb offset must provide

void *__lwp_gettcb_fast(void);

in machine/mcontext.h and must define __HAVE___LWP_GETTCB_FAST in
machine/types.h to reflect this; otherwise they must provide
__lwp_getprivate_fast to return the TCB pointer.

Each architecture has one of two TLS variants, variant I or variant II.
Variant I places the static thread-local storage _after_ the fixed
content of the TCB, at increasing addresses (increasing addresses grow
down in diagram):

	+---------------+
	| dtv pointer   |       tcb points here (struct tls_tcb)
	+---------------+
	| pthread_t     |
	+---------------+
	| obj0 tls      |       obj0->tlsoffset = 0
	|               |
	|               |
	+---------------+
	| obj1 tls      |       obj1->tlsoffset = 3
	+---------------+
	| obj2 tls      |       obj2->tlsoffset = 4
	|               |
	.		.
	.		.
	.		.
	|               |
	+---------------+
	| objN tls      |       objN->tlsoffset = k
	+---------------+

Variant II places the static thread-local storage _before_ the fixed
content of the TCB, at decreasing addresses:

	+---------------+
	| objN tls      |       objN->tlsoffset = k
	+---------------+
	| obj(N-1) tls  |       obj(N-1)->tlsoffset = k - 1
	.               .
	.               .
	.               .
	|               |
	+---------------+
	| obj2 tls      |       obj2->tlsoffset = 4
	+---------------+
	| obj1 tls      |       obj1->tlsoffset = 3
	+---------------+
	| obj0 tls      |       obj0->tlsoffset = 0
	|               |
	|               |
	+---------------+
	| tcb pointer   |       tcb points here (struct tls_tcb)
	+---------------+
	| dtv pointer   |
	+---------------+
	| pthread_t     |
	+---------------+

See [ELFTLS] Sec. 3 `Run-Time Handling of TLS', Figs 1 and 2, for
bigger pictures including the DTV and dynamically allocated TLS blocks.

Each architecture also has its own ELF ABI processor supplement with
the architecture-specific relocations and TLS details.

References:

	[ELFTLS] Ulrich Drepper, `ELF Handling For Thread-Local
	Storage', Version 0.21, 2023-08-22.
	https://akkadia.org/drepper/tls.pdf
	https://web.archive.org/web/20240718081934/https://akkadia.org/drepper/tls.pdf

d6 3
a8 5
(2) _lwp_makecontext has to set the reserved register or kernel
transfer variable in uc_mcontext according to the provided value of
`private'.  Note that _lwp_makecontext takes tcb, not tp, as an
argument, so make sure to adjust it if needed for the tp/tcb offset.
See src/lib/libc/arch/$PLATFORM/gen/_lwp.c.
d63 2
a64 14
(6) If there is a tp/tcb offset, implement

	__lwp_gettcb_fast()
	__lwp_settcb()

in machine/mcontext.h and set

	__HAVE___LWP_GETTCB_FAST
	__HAVE___LWP_SETTCB

in machine/types.h.

Otherwise, implement __lwp_getprivate_fast() in machine/mcontext.h and
set __HAVE___LWP_GETPRIVATE_FAST in machine/types.h.
d66 3
a68 4
(7) Test using src/tests/lib/libc/tls and src/tests/libexec/ld.elf_so.
Make sure with "objdump -R" that t_tls_dynamic has two TPOFF
relocations and h_tls_dlopen.so.1 and libh_tls_dynamic.so.1 have both
two DTPMOD and DTPOFF relocations.
@


1.5
log
@Fix typos
@
text
@d46 1
a46 1
(defobj->tls_done) and otherwise call _rtld_tls_offset_allocate().  This
d48 2
a49 1
in via dlopen(3).
@


1.5.2.1
log
@Pull up following revision(s), all via patch,
(requested by riastradh in ticket #1699):

	distrib/sets/lists/tests/shl.mi: revision 1.14
	distrib/sets/lists/tests/shl.mi: revision 1.15
	distrib/sets/lists/tests/shl.mi: revision 1.16
	tests/libexec/ld.elf_so/helper_def_static/h_def_static.c: revision 1.1
	tests/libexec/ld.elf_so/helper_def_dynamic/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_def_dynamic/Makefile: revision 1.2
	tests/libexec/ld.elf_so/helper_onlyuse_static/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_onlyuse_static/Makefile: revision 1.2
	libexec/ld.elf_so/arch/mips/mips_reloc.c: revision 1.75
	distrib/sets/lists/tests/mi: revision 1.1265
	libexec/ld.elf_so/arch/sh3/mdreloc.c: revision 1.36
	libexec/ld.elf_so/rtld.c: revision 1.214
	tests/libexec/ld.elf_so/helper_onlydef_static/Makefile: revision 1.1
	distrib/sets/lists/debug/mi: revision 1.400
	tests/libexec/ld.elf_so/helper_onlydef_static/Makefile: revision 1.2
	distrib/sets/lists/debug/mi: revision 1.401
	distrib/sets/lists/debug/mi: revision 1.402
	tests/libexec/ld.elf_so/helper_dso2/Makefile: revision 1.2
	distrib/sets/lists/debug/mi: revision 1.403
	tests/libexec/ld.elf_so/helper_symver_dso0/Makefile: revision 1.2
	libexec/ld.elf_so/arch/x86_64/mdreloc.c: revision 1.48
	distrib/sets/lists/debug/mi: revision 1.406
	tests/libexec/ld.elf_so/helper_use_dynamic/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_use_dynamic/Makefile: revision 1.2
	tests/libexec/ld.elf_so/helper_ifunc_dso/Makefile: revision 1.2
	libexec/ld.elf_so/arch/sparc64/mdreloc.c: revision 1.70
	libexec/ld.elf_so/arch/aarch64/mdreloc.c: revision 1.18
	tests/libexec/ld.elf_so/helper_abuse_dynamic/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_abuse_dynamic/Makefile: revision 1.2
	tests/libexec/ld.elf_so/Makefile: revision 1.13
	libexec/ld.elf_so/arch/arm/mdreloc.c: revision 1.46
	libexec/ld.elf_so/rtld.h: revision 1.146
	tests/libexec/ld.elf_so/Makefile: revision 1.14
	distrib/sets/lists/debug/shl.mi: revision 1.306
	tests/libexec/ld.elf_so/Makefile: revision 1.15
	tests/libexec/ld.elf_so/helper_abuse_static/Makefile: revision 1.1
	distrib/sets/lists/debug/shl.mi: revision 1.307
	tests/libexec/ld.elf_so/Makefile: revision 1.16
	tests/libexec/ld.elf_so/helper_abuse_static/Makefile: revision 1.2
	distrib/sets/lists/debug/shl.mi: revision 1.308
	tests/libexec/ld.elf_so/Makefile: revision 1.17
	distrib/sets/lists/debug/shl.mi: revision 1.309
	tests/libexec/ld.elf_so/Makefile: revision 1.18
	tests/libexec/ld.elf_so/Makefile: revision 1.19
	libexec/ld.elf_so/tls.c: revision 1.16
	libexec/ld.elf_so/tls.c: revision 1.17
	libexec/ld.elf_so/tls.c: revision 1.18
	libexec/ld.elf_so/tls.c: revision 1.19
	tests/libexec/ld.elf_so/helper_onlydef_static/h_onlydef_static.c: revision 1.1
	tests/libexec/ld.elf_so/helper_use_static/h_use_static.c: revision 1.1
	tests/libexec/ld.elf_so/helper_use_static/h_use_static.c: revision 1.2
	tests/libexec/ld.elf_so/helper_def_static/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_def_static/Makefile: revision 1.2
	libexec/ld.elf_so/arch/hppa/hppa_reloc.c: revision 1.50
	distrib/sets/lists/debug/shl.mi: revision 1.310
	libexec/ld.elf_so/README.TLS: revision 1.6
	distrib/sets/lists/debug/shl.mi: revision 1.311
	distrib/sets/lists/debug/shl.mi: revision 1.314
	tests/libexec/ld.elf_so/helper_dso3/Makefile: revision 1.2
	tests/libexec/ld.elf_so/helper_symver_dso1/Makefile: revision 1.4
	libexec/ld.elf_so/arch/powerpc/ppc_reloc.c: revision 1.63
	tests/libexec/ld.elf_so/helper_def_dynamic/h_def_dynamic.c: revision 1.1
	tests/libexec/ld.elf_so/helper_onlydef/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_onlydef/Makefile: revision 1.2
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.10
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.11
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.12
	libexec/ld.elf_so/map_object.c: revision 1.66
	tests/libexec/ld.elf_so/helper.mk: revision 1.1
	libexec/ld.elf_so/arch/sparc/mdreloc.c: revision 1.57
	libexec/ld.elf_so/map_object.c: revision 1.67
	tests/libexec/ld.elf_so/helper_onlydef/h_onlydef.c: revision 1.1
	tests/libexec/ld.elf_so/helper_symver_dso2/Makefile: revision 1.4
	tests/libexec/ld.elf_so/helper_use_static/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_use_static/Makefile: revision 1.2
	tests/libexec/ld.elf_so/helper_use_static/Makefile: revision 1.3
	tests/libexec/ld.elf_so/helper_use_dynamic/h_use_dynamic.c: revision 1.1
	tests/libexec/ld.elf_so/helper_abuse_static/h_abuse_static.c: revision 1.1
	libexec/ld.elf_so/arch/riscv/mdreloc.c: revision 1.9
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.1
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.2
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.3
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.4
	tests/libexec/ld.elf_so/helper_onlyctor_dynamic/Makefile: revision 1.1
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.5
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.6
	libexec/ld.elf_so/arch/m68k/mdreloc.c: revision 1.34
	tests/libexec/ld.elf_so/helper_onlyctor_dynamic/Makefile: revision 1.2
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.7
	libexec/ld.elf_so/arch/i386/mdreloc.c: revision 1.42
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.8
	libexec/ld.elf_so/arch/i386/mdreloc.c: revision 1.43
	libexec/ld.elf_so/arch/or1k/mdreloc.c: revision 1.4
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.9
	tests/libexec/ld.elf_so/helper_onlyuse_dynamic/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_onlyuse_dynamic/Makefile: revision 1.2
	tests/libexec/ld.elf_so/helper_abuse_dynamic/h_abuse_dynamic.c: revision 1.1
	tests/libexec/ld.elf_so/helper_onlyctor_dynamic/h_onlyctor_dynamic.c: revision 1.1
	tests/libexec/ld.elf_so/helper_onlyuse_static/h_onlyuse_static.c: revision 1.1
	tests/libexec/ld.elf_so/helper_onlyuse_dynamic/h_onlyuse_dynamic.c: revision 1.1
	tests/libexec/ld.elf_so/helper_dso1/Makefile: revision 1.2
	distrib/sets/lists/tests/shl.mi: revision 1.12
	distrib/sets/lists/tests/shl.mi: revision 1.13
	libexec/ld.elf_so/arch/alpha/alpha_reloc.c: revision 1.44

ld.elf_so: New test for extern initial-exec TLS, PR toolchain/50277.

ld.elf_so: Fix extern TLS test to match PR toolchain/50277.
Now it's actually testing the problem.
ld.elf_so: Nix inadvertently committed private test program.
ld.elf_so: Fix set lists for MKDEBUG=yes builds with t_tls_extern.

ld.elf_so: Sprinkle tls debug messages.

ld.elf_so: Make tls alloc debug messages more detailed and greppable.

ld.elf_so: Test variations on PR toolchain/50277.

ld.elf_so: Test extern dynamic TLS too.

ld.elf_so: Factor out logic in TLS tests to make writing more easier.
No functional change intended.

ld.elf_so: Test TLS abuse of static def, dynamic use and vice versa.

ld.elf_so: Shorter test names.
No functional non-cosmetic change intended.

ld.elf_so: Separately test eager and lazy resolution of def tls ptr.
eager: before loading use library
lazy: after loading use library

Add recent ld.elf_so test helpers debug info
ld.elf_so: Add new files to debug/shl.mi.

ld.elf_so: tls_extern dynamic_defabuse_eager must xfail differently.
If a symbol has already been resolved as dynamic TLS, any library
that tries to use it as static TLS cannot be dlopened.

ld.elf_so: Test another edge case of mismatched TLS models.
One library defines a symbol and _doesn't_ use it, so it has no
indication of whether the symbol is for static TLS or dynamic TLS,
and then two other libraries use it in different ways.

ld.elf_so: Test dynamic-then-static abuse via ctor.

ld.elf_so: Fix missing tab in debug/shl.mi in last change.

Fix interactions of initial-exec TLS model and dlopen

(1) If an initial-exec relocation was used for a non-local symbol
(i.e. the definition of the symbol is in a different DSO), the
computation of the static TLS offset used the wrong DSO.

This would effectively mean the wrong address was computed
(PR toolchain/50277, PR pkg/57445).
Fix this by forcing the computation of the correct DSO (the one defining
the symbol).
This code uses __UNCONST to avoid the vast interface changes for this
special case.

(2) If symbols from a DSO loaded via dlopen are used with both
global-dynamic/local-dynamic and initial-exec relocations AND
a initial-exec relocation was resolved first in a thread, a split brain
situation could exist where the dynamic relocations would use one memory
block (separate allocation) and the initial-exec relocations the static
per-thread TLS space.

(3) If the initial-exec relocation in (2) is seen after any thread has
already used a GD/LD allocation, bail out. Since IE relocations are used
only in the GOT, this will prevent the dlopen. This is a bit more
aggressive than necessary, but a full blown reference counting doesn't
seem to be justified.
Avoid using uninitialized variable "symnum" when building with DEBUG
enabled by borrowing the rdbg_symname() macro from arch/x86_64.
ld.elf_so: Sprinkle more debug messages on dlopen and error.

PR pkg/57445

Fix MKDEBUGLIB build by adding these installed files to the debug
set list.

One could argue that these files are not of any use, so why install
them?  I don't have a good argument either way, and this is (for
now) a simple work-around for PR bin/57455   Please feel free to
commit a different fix to avoid installing these files at all.

Fix markup of libh_ MKDEBUGLIB=yes only files

TLS variant I archs need to fudge the offset by the size of the TCB.
tests/libexec/ld.elf_so: Fix helper library makefiles.
1. Consolidate logic into a single helper.mk to reduce duplication.
2. Set NO* variables, not MK* variables which are reserved for user.
3. Avoid eager X!= in favour of lazy ${X:sh}.
4. Mark _g.a set list entries obsolete.  Never should've been built!
PR misc/57462
@
text
@d46 1
a46 1
(defobj->tls_static) and otherwise call _rtld_tls_offset_allocate().  This
d48 1
a48 2
in via dlopen(3). It can also fail if the TLS area has already been used
via a global-dynamic allocation.
@


1.5.2.2
log
@Pull up following revision(s) (requested by riastradh in ticket #1864):

	libexec/ld.elf_so/tls.c: revision 1.15
	libexec/ld.elf_so/arch/aarch64/rtld_start.S: revision 1.6
	libexec/ld.elf_so/arch/aarch64/rtld_start.S: revision 1.7
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.15
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.16
	libexec/ld.elf_so/README.TLS: revision 1.7
	libexec/ld.elf_so/tls.c: revision 1.20
	libexec/ld.elf_so/tls.c: revision 1.21

Alignment. NFCI.

ld.elf_so: Sprinkle comments and references for thread-local storage.

Maybe this will help the TLS business to be less mysterious to the
next traveller to pass by here.
Prompted by PR lib/58154.

ld.elf_so: Add comments explaining DTV allocation size.
Patch by pho@@ for PR lib/58154.

tests/libexec/ld.elf_so/t_tls_extern: Test PR lib/58154.

ld.elf_so aarch64/rtld_start.S: Sprinkle comments.
No functional change intended.
Prompted by PR lib/58154.

ld.elf_so aarch64/rtld_start.S: Fix dynamic TLS fast path branch.
Bug found and patch prepared by pho@@.
PR lib/58154
@
text
@a0 98
Thread-local storage.

Each thread has a thread control block, or TCB.  The TCB is a
variable-size structure headed by `struct tls_tcb' from <sys/tls.h>,
with:

(a) static thread-local storage for the TLS data of initial objects,
    i.e., those loaded at startup rather than those dynamically loaded
    by dlopen

(b) a pointer to a dynamic thread vector (DTV) for the TLS data
    pointers of objects that use global-dynamic or local-dynamic models
    (typically shared libraries or dlopenable modules)

(c) the pthread_t pointer

The per-thread lwp private pointer, also sometimes called TP (thread
pointer), managed by the _lwp_setprivate and _lwp_setprivate syscalls,
either points at the TCB directly, or, on some architectures, points at

	tp = tcb + sizeof(struct tls_tcb) + TLS_TP_OFFSET.

This bias is chosen for architectures where signed displacements from
TP enable twice the range of static TLS offsets when biased like this.
Architectures with such a tp/tcb offset must provide

void *__lwp_gettcb_fast(void);

in machine/mcontext.h and must define __HAVE___LWP_GETTCB_FAST in
machine/types.h to reflect this; otherwise they must provide
__lwp_getprivate_fast to return the TCB pointer.

Each architecture has one of two TLS variants, variant I or variant II.
Variant I places the static thread-local storage _after_ the fixed
content of the TCB, at increasing addresses (increasing addresses grow
down in diagram):

	+---------------+
	| dtv pointer   |       tcb points here (struct tls_tcb)
	+---------------+
	| pthread_t     |
	+---------------+
	| obj0 tls      |       obj0->tlsoffset = 0
	|               |
	|               |
	+---------------+
	| obj1 tls      |       obj1->tlsoffset = 3
	+---------------+
	| obj2 tls      |       obj2->tlsoffset = 4
	|               |
	.		.
	.		.
	.		.
	|               |
	+---------------+
	| objN tls      |       objN->tlsoffset = k
	+---------------+

Variant II places the static thread-local storage _before_ the fixed
content of the TCB, at decreasing addresses:

	+---------------+
	| objN tls      |       objN->tlsoffset = k
	+---------------+
	| obj(N-1) tls  |       obj(N-1)->tlsoffset = k - 1
	.               .
	.               .
	.               .
	|               |
	+---------------+
	| obj2 tls      |       obj2->tlsoffset = 4
	+---------------+
	| obj1 tls      |       obj1->tlsoffset = 3
	+---------------+
	| obj0 tls      |       obj0->tlsoffset = 0
	|               |
	|               |
	+---------------+
	| tcb pointer   |       tcb points here (struct tls_tcb)
	+---------------+
	| dtv pointer   |
	+---------------+
	| pthread_t     |
	+---------------+

See [ELFTLS] Sec. 3 `Run-Time Handling of TLS', Figs 1 and 2, for
bigger pictures including the DTV and dynamically allocated TLS blocks.

Each architecture also has its own ELF ABI processor supplement with
the architecture-specific relocations and TLS details.

References:

	[ELFTLS] Ulrich Drepper, `ELF Handling For Thread-Local
	Storage', Version 0.21, 2023-08-22.
	https://akkadia.org/drepper/tls.pdf
	https://web.archive.org/web/20240718081934/https://akkadia.org/drepper/tls.pdf

d6 3
a8 5
(2) _lwp_makecontext has to set the reserved register or kernel
transfer variable in uc_mcontext according to the provided value of
`private'.  Note that _lwp_makecontext takes tcb, not tp, as an
argument, so make sure to adjust it if needed for the tp/tcb offset.
See src/lib/libc/arch/$PLATFORM/gen/_lwp.c.
d63 2
a64 14
(6) If there is a tp/tcb offset, implement

	__lwp_gettcb_fast()
	__lwp_settcb()

in machine/mcontext.h and set

	__HAVE___LWP_GETTCB_FAST
	__HAVE___LWP_SETTCB

in machine/types.h.

Otherwise, implement __lwp_getprivate_fast() in machine/mcontext.h and
set __HAVE___LWP_GETPRIVATE_FAST in machine/types.h.
d66 3
a68 4
(7) Test using src/tests/lib/libc/tls and src/tests/libexec/ld.elf_so.
Make sure with "objdump -R" that t_tls_dynamic has two TPOFF
relocations and h_tls_dlopen.so.1 and libh_tls_dynamic.so.1 have both
two DTPMOD and DTPOFF relocations.
@


1.5.10.1
log
@Pull up following revision(s) (requested by riastradh in ticket #297):

	distrib/sets/lists/tests/shl.mi: revision 1.14
	distrib/sets/lists/tests/shl.mi: revision 1.15
	distrib/sets/lists/tests/shl.mi: revision 1.16
	tests/libexec/ld.elf_so/helper_def_static/h_def_static.c: revision 1.1
	tests/libexec/ld.elf_so/helper_def_dynamic/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_def_dynamic/Makefile: revision 1.2
	tests/libexec/ld.elf_so/helper_onlyuse_static/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_onlyuse_static/Makefile: revision 1.2
	libexec/ld.elf_so/arch/mips/mips_reloc.c: revision 1.75
	distrib/sets/lists/tests/mi: revision 1.1265
	libexec/ld.elf_so/arch/sh3/mdreloc.c: revision 1.36
	libexec/ld.elf_so/rtld.c: revision 1.214
	tests/libexec/ld.elf_so/helper_onlydef_static/Makefile: revision 1.1
	distrib/sets/lists/debug/mi: revision 1.400
	tests/libexec/ld.elf_so/helper_onlydef_static/Makefile: revision 1.2
	distrib/sets/lists/debug/mi: revision 1.401
	distrib/sets/lists/debug/mi: revision 1.402
	tests/libexec/ld.elf_so/helper_dso2/Makefile: revision 1.2
	distrib/sets/lists/debug/mi: revision 1.403
	tests/libexec/ld.elf_so/helper_symver_dso0/Makefile: revision 1.2
	libexec/ld.elf_so/arch/x86_64/mdreloc.c: revision 1.48
	distrib/sets/lists/debug/mi: revision 1.406
	tests/libexec/ld.elf_so/helper_use_dynamic/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_use_dynamic/Makefile: revision 1.2
	tests/libexec/ld.elf_so/helper_ifunc_dso/Makefile: revision 1.2
	libexec/ld.elf_so/arch/sparc64/mdreloc.c: revision 1.70
	libexec/ld.elf_so/arch/aarch64/mdreloc.c: revision 1.18
	tests/libexec/ld.elf_so/helper_abuse_dynamic/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_abuse_dynamic/Makefile: revision 1.2
	tests/libexec/ld.elf_so/Makefile: revision 1.13
	libexec/ld.elf_so/arch/arm/mdreloc.c: revision 1.46
	libexec/ld.elf_so/rtld.h: revision 1.146
	tests/libexec/ld.elf_so/Makefile: revision 1.14
	distrib/sets/lists/debug/shl.mi: revision 1.306
	tests/libexec/ld.elf_so/Makefile: revision 1.15
	tests/libexec/ld.elf_so/helper_abuse_static/Makefile: revision 1.1
	distrib/sets/lists/debug/shl.mi: revision 1.307
	tests/libexec/ld.elf_so/Makefile: revision 1.16
	tests/libexec/ld.elf_so/helper_abuse_static/Makefile: revision 1.2
	distrib/sets/lists/debug/shl.mi: revision 1.308
	tests/libexec/ld.elf_so/Makefile: revision 1.17
	distrib/sets/lists/debug/shl.mi: revision 1.309
	tests/libexec/ld.elf_so/Makefile: revision 1.18
	tests/libexec/ld.elf_so/Makefile: revision 1.19
	libexec/ld.elf_so/tls.c: revision 1.16
	libexec/ld.elf_so/tls.c: revision 1.17
	libexec/ld.elf_so/tls.c: revision 1.18
	libexec/ld.elf_so/tls.c: revision 1.19
	tests/libexec/ld.elf_so/helper_onlydef_static/h_onlydef_static.c: revision 1.1
	tests/libexec/ld.elf_so/helper_use_static/h_use_static.c: revision 1.1
	tests/libexec/ld.elf_so/helper_use_static/h_use_static.c: revision 1.2
	tests/libexec/ld.elf_so/helper_def_static/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_def_static/Makefile: revision 1.2
	libexec/ld.elf_so/arch/hppa/hppa_reloc.c: revision 1.50
	distrib/sets/lists/debug/shl.mi: revision 1.310
	libexec/ld.elf_so/README.TLS: revision 1.6
	distrib/sets/lists/debug/shl.mi: revision 1.311
	distrib/sets/lists/debug/shl.mi: revision 1.314
	tests/libexec/ld.elf_so/helper_dso3/Makefile: revision 1.2
	tests/libexec/ld.elf_so/helper_symver_dso1/Makefile: revision 1.4
	libexec/ld.elf_so/arch/powerpc/ppc_reloc.c: revision 1.63
	tests/libexec/ld.elf_so/helper_def_dynamic/h_def_dynamic.c: revision 1.1
	tests/libexec/ld.elf_so/helper_onlydef/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_onlydef/Makefile: revision 1.2
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.10
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.11
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.12
	libexec/ld.elf_so/map_object.c: revision 1.66
	tests/libexec/ld.elf_so/helper.mk: revision 1.1
	libexec/ld.elf_so/arch/sparc/mdreloc.c: revision 1.57
	libexec/ld.elf_so/map_object.c: revision 1.67
	tests/libexec/ld.elf_so/helper_onlydef/h_onlydef.c: revision 1.1
	tests/libexec/ld.elf_so/helper_symver_dso2/Makefile: revision 1.4
	tests/libexec/ld.elf_so/helper_use_static/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_use_static/Makefile: revision 1.2
	tests/libexec/ld.elf_so/helper_use_static/Makefile: revision 1.3
	tests/libexec/ld.elf_so/helper_use_dynamic/h_use_dynamic.c: revision 1.1
	tests/libexec/ld.elf_so/helper_abuse_static/h_abuse_static.c: revision 1.1
	libexec/ld.elf_so/arch/riscv/mdreloc.c: revision 1.9
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.1
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.2
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.3
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.4
	tests/libexec/ld.elf_so/helper_onlyctor_dynamic/Makefile: revision 1.1
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.5
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.6
	libexec/ld.elf_so/arch/m68k/mdreloc.c: revision 1.34
	tests/libexec/ld.elf_so/helper_onlyctor_dynamic/Makefile: revision 1.2
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.7
	libexec/ld.elf_so/arch/i386/mdreloc.c: revision 1.42
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.8
	libexec/ld.elf_so/arch/i386/mdreloc.c: revision 1.43
	libexec/ld.elf_so/arch/or1k/mdreloc.c: revision 1.4
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.9
	tests/libexec/ld.elf_so/helper_onlyuse_dynamic/Makefile: revision 1.1
	tests/libexec/ld.elf_so/helper_onlyuse_dynamic/Makefile: revision 1.2
	tests/libexec/ld.elf_so/helper_abuse_dynamic/h_abuse_dynamic.c: revision 1.1
	tests/libexec/ld.elf_so/helper_onlyctor_dynamic/h_onlyctor_dynamic.c: revision 1.1
	tests/libexec/ld.elf_so/helper_onlyuse_static/h_onlyuse_static.c: revision 1.1
	tests/libexec/ld.elf_so/helper_onlyuse_dynamic/h_onlyuse_dynamic.c: revision 1.1
	tests/libexec/ld.elf_so/helper_dso1/Makefile: revision 1.2
	distrib/sets/lists/tests/shl.mi: revision 1.12
	distrib/sets/lists/tests/shl.mi: revision 1.13
	libexec/ld.elf_so/arch/alpha/alpha_reloc.c: revision 1.44
	(all via patch)

ld.elf_so: New test for extern initial-exec TLS, PR toolchain/50277.

ld.elf_so: Fix extern TLS test to match PR toolchain/50277.
Now it's actually testing the problem.
ld.elf_so: Nix inadvertently committed private test program.
ld.elf_so: Fix set lists for MKDEBUG=yes builds with t_tls_extern.

ld.elf_so: Sprinkle tls debug messages.

ld.elf_so: Make tls alloc debug messages more detailed and greppable.

ld.elf_so: Test variations on PR toolchain/50277.

ld.elf_so: Test extern dynamic TLS too.

ld.elf_so: Factor out logic in TLS tests to make writing more easier.
No functional change intended.

ld.elf_so: Test TLS abuse of static def, dynamic use and vice versa.

ld.elf_so: Shorter test names.
No functional non-cosmetic change intended.

ld.elf_so: Separately test eager and lazy resolution of def tls ptr.
eager: before loading use library
lazy: after loading use library

Add recent ld.elf_so test helpers debug info
ld.elf_so: Add new files to debug/shl.mi.

ld.elf_so: tls_extern dynamic_defabuse_eager must xfail differently.
If a symbol has already been resolved as dynamic TLS, any library
that tries to use it as static TLS cannot be dlopened.

ld.elf_so: Test another edge case of mismatched TLS models.
One library defines a symbol and _doesn't_ use it, so it has no
indication of whether the symbol is for static TLS or dynamic TLS,
and then two other libraries use it in different ways.

ld.elf_so: Test dynamic-then-static abuse via ctor.

ld.elf_so: Fix missing tab in debug/shl.mi in last change.

Fix interactions of initial-exec TLS model and dlopen
(1) If an initial-exec relocation was used for a non-local symbol
(i.e. the definition of the symbol is in a different DSO), the
computation of the static TLS offset used the wrong DSO.
This would effectively mean the wrong address was computed
(PR toolchain/50277, PR pkg/57445).
Fix this by forcing the computation of the correct DSO (the one defining
the symbol).
This code uses __UNCONST to avoid the vast interface changes for this
special case.
(2) If symbols from a DSO loaded via dlopen are used with both
global-dynamic/local-dynamic and initial-exec relocations AND
a initial-exec relocation was resolved first in a thread, a split brain
situation could exist where the dynamic relocations would use one memory
block (separate allocation) and the initial-exec relocations the static
per-thread TLS space.
(3) If the initial-exec relocation in (2) is seen after any thread has
already used a GD/LD allocation, bail out. Since IE relocations are used
only in the GOT, this will prevent the dlopen. This is a bit more
aggressive than necessary, but a full blown reference counting doesn't
seem to be justified.
Avoid using uninitialized variable "symnum" when building with DEBUG
enabled by borrowing the rdbg_symname() macro from arch/x86_64.
ld.elf_so: Sprinkle more debug messages on dlopen and error.
PR pkg/57445
Fix MKDEBUGLIB build by adding these installed files to the debug
set list.
XXX
One could argue that these files are not of any use, so why install
them?  I don't have a good argument either way, and this is (for
now) a simple work-around for PR bin/57455   Please feel free to
commit a different fix to avoid installing these files at all.
Fix markup of libh_ MKDEBUGLIB=yes only files
TLS variant I archs need to fudge the offset by the size of the TCB.
tests/libexec/ld.elf_so: Fix helper library makefiles.
1. Consolidate logic into a single helper.mk to reduce duplication.
2. Set NO* variables, not MK* variables which are reserved for user.
3. Avoid eager X!= in favour of lazy ${X:sh}.
4. Mark _g.a set list entries obsolete.  Never should've been built!
PR misc/57462
@
text
@d46 1
a46 1
(defobj->tls_static) and otherwise call _rtld_tls_offset_allocate().  This
d48 1
a48 2
in via dlopen(3). It can also fail if the TLS area has already been used
via a global-dynamic allocation.
@


1.5.10.2
log
@Pull up following revision(s) (requested by riastradh in ticket #777):

	libexec/ld.elf_so/tls.c: revision 1.15
	libexec/ld.elf_so/arch/aarch64/rtld_start.S: revision 1.6
	libexec/ld.elf_so/arch/aarch64/rtld_start.S: revision 1.7
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.15
	tests/libexec/ld.elf_so/t_tls_extern.c: revision 1.16
	libexec/ld.elf_so/README.TLS: revision 1.7
	libexec/ld.elf_so/tls.c: revision 1.20
	libexec/ld.elf_so/tls.c: revision 1.21

Alignment. NFCI.

ld.elf_so: Sprinkle comments and references for thread-local storage.

Maybe this will help the TLS business to be less mysterious to the
next traveller to pass by here.
Prompted by PR lib/58154.

ld.elf_so: Add comments explaining DTV allocation size.
Patch by pho@@ for PR lib/58154.

tests/libexec/ld.elf_so/t_tls_extern: Test PR lib/58154.

ld.elf_so aarch64/rtld_start.S: Sprinkle comments.
No functional change intended.
Prompted by PR lib/58154.

ld.elf_so aarch64/rtld_start.S: Fix dynamic TLS fast path branch.
Bug found and patch prepared by pho@@.
PR lib/58154
@
text
@a0 98
Thread-local storage.

Each thread has a thread control block, or TCB.  The TCB is a
variable-size structure headed by `struct tls_tcb' from <sys/tls.h>,
with:

(a) static thread-local storage for the TLS data of initial objects,
    i.e., those loaded at startup rather than those dynamically loaded
    by dlopen

(b) a pointer to a dynamic thread vector (DTV) for the TLS data
    pointers of objects that use global-dynamic or local-dynamic models
    (typically shared libraries or dlopenable modules)

(c) the pthread_t pointer

The per-thread lwp private pointer, also sometimes called TP (thread
pointer), managed by the _lwp_setprivate and _lwp_setprivate syscalls,
either points at the TCB directly, or, on some architectures, points at

	tp = tcb + sizeof(struct tls_tcb) + TLS_TP_OFFSET.

This bias is chosen for architectures where signed displacements from
TP enable twice the range of static TLS offsets when biased like this.
Architectures with such a tp/tcb offset must provide

void *__lwp_gettcb_fast(void);

in machine/mcontext.h and must define __HAVE___LWP_GETTCB_FAST in
machine/types.h to reflect this; otherwise they must provide
__lwp_getprivate_fast to return the TCB pointer.

Each architecture has one of two TLS variants, variant I or variant II.
Variant I places the static thread-local storage _after_ the fixed
content of the TCB, at increasing addresses (increasing addresses grow
down in diagram):

	+---------------+
	| dtv pointer   |       tcb points here (struct tls_tcb)
	+---------------+
	| pthread_t     |
	+---------------+
	| obj0 tls      |       obj0->tlsoffset = 0
	|               |
	|               |
	+---------------+
	| obj1 tls      |       obj1->tlsoffset = 3
	+---------------+
	| obj2 tls      |       obj2->tlsoffset = 4
	|               |
	.		.
	.		.
	.		.
	|               |
	+---------------+
	| objN tls      |       objN->tlsoffset = k
	+---------------+

Variant II places the static thread-local storage _before_ the fixed
content of the TCB, at decreasing addresses:

	+---------------+
	| objN tls      |       objN->tlsoffset = k
	+---------------+
	| obj(N-1) tls  |       obj(N-1)->tlsoffset = k - 1
	.               .
	.               .
	.               .
	|               |
	+---------------+
	| obj2 tls      |       obj2->tlsoffset = 4
	+---------------+
	| obj1 tls      |       obj1->tlsoffset = 3
	+---------------+
	| obj0 tls      |       obj0->tlsoffset = 0
	|               |
	|               |
	+---------------+
	| tcb pointer   |       tcb points here (struct tls_tcb)
	+---------------+
	| dtv pointer   |
	+---------------+
	| pthread_t     |
	+---------------+

See [ELFTLS] Sec. 3 `Run-Time Handling of TLS', Figs 1 and 2, for
bigger pictures including the DTV and dynamically allocated TLS blocks.

Each architecture also has its own ELF ABI processor supplement with
the architecture-specific relocations and TLS details.

References:

	[ELFTLS] Ulrich Drepper, `ELF Handling For Thread-Local
	Storage', Version 0.21, 2023-08-22.
	https://akkadia.org/drepper/tls.pdf
	https://web.archive.org/web/20240718081934/https://akkadia.org/drepper/tls.pdf

d6 3
a8 5
(2) _lwp_makecontext has to set the reserved register or kernel
transfer variable in uc_mcontext according to the provided value of
`private'.  Note that _lwp_makecontext takes tcb, not tp, as an
argument, so make sure to adjust it if needed for the tp/tcb offset.
See src/lib/libc/arch/$PLATFORM/gen/_lwp.c.
d63 2
a64 14
(6) If there is a tp/tcb offset, implement

	__lwp_gettcb_fast()
	__lwp_settcb()

in machine/mcontext.h and set

	__HAVE___LWP_GETTCB_FAST
	__HAVE___LWP_SETTCB

in machine/types.h.

Otherwise, implement __lwp_getprivate_fast() in machine/mcontext.h and
set __HAVE___LWP_GETPRIVATE_FAST in machine/types.h.
d66 3
a68 4
(7) Test using src/tests/lib/libc/tls and src/tests/libexec/ld.elf_so.
Make sure with "objdump -R" that t_tls_dynamic has two TPOFF
relocations and h_tls_dlopen.so.1 and libh_tls_dynamic.so.1 have both
two DTPMOD and DTPOFF relocations.
@


1.4
log
@Add some clarifications
@
text
@d32 1
a32 1
__HAVE_COMMON___TLS_GET_ADDR. It depends on ___lwp_getprivate_fast
d62 1
a62 1
(6) Implement _lwp_getprivate_fast() in machine/mcontext.h and set
@


1.4.44.1
log
@Sync with HEAD
@
text
@d32 1
a32 1
__HAVE_COMMON___TLS_GET_ADDR. It depends on __lwp_getprivate_fast
d62 1
a62 1
(6) Implement __lwp_getprivate_fast() in machine/mcontext.h and set
@


1.4.42.1
log
@Sync with HEAD, resolve a few conflicts
@
text
@d32 1
a32 1
__HAVE_COMMON___TLS_GET_ADDR. It depends on __lwp_getprivate_fast
d62 1
a62 1
(6) Implement __lwp_getprivate_fast() in machine/mcontext.h and set
@


1.3
log
@Add source reference for _lwp_makecontext and
__HAVE___LWP_GETPRIVATE_FAST. Fix enumeration.
@
text
@d16 7
a22 2
_lwp_makecontext. cpu_lwp_setprivate has to call _lwp_setprivate(2) to
reflect the kernel view. cpu_switch has to update the mapping.
d32 2
a33 1
__HAVE_COMMON___TLS_GET_ADDR. It depends on ___lwp_getprivate_fast.
@


1.2
log
@Fix prototype for __tls_get_addr. Add a generic implementation of it
using __tls_get_addr. Update TLS notes.
@
text
@d7 2
a8 1
variable in uc_mcontext to the provided value of 'private'.
d57 1
a57 1
__HAVE___LWP_GETPRIVATE_FAST.
d59 1
a59 1
(8) Test using src/tests/lib/libc/tls.  Make sure with "objdump -R" that
@


1.1
log
@Add TLS support infrastructure. For dynamic binaries, ld.elf_so exports
_rtld_tls_allocate and _rtld_tls_free. libpthread uses this functions to
setup the thread private area of all new threads. ld.elf_so is
responsible for setting up the private area for the initial thread.
Similar functions are called from _libc_init for static binaries, using
dl_iterate_phdr to access the ELF Program Header.

Add test cases to exercise the different TLS storage models. Test cases
are compiled and installed on all platforms, but are skipped on
platforms not marked for TLS support.

This material is based upon work partially supported by
The NetBSD Foundation under a contract with Joerg Sonnenberger.

It is inspired by the TLS support in FreeBSD by Doug Rabson and the
clean ups of the DragonFly port of the original FreeBSD modifications.
@
text
@d6 1
a6 5
(2) crt0.o has to call _rtld_tls_static_setup() if _DYNAMIC == NULL.
This part is already done if the new src/lib/csu/arch layout is used
by the architecture.

(3) _lwp_makecontext has to set the reserved register or kernel transfer
d14 1
a14 1
(4) _lwp_setprivate(2) has to update the same register as
d21 1
a21 1
(5) Provide __tls_get_addr and possible other MD functions for dynamic
d25 2
a26 19
The generic implementation is:

#include <sys/cdefs.h>
#include <sys/tls.h>
#include <lwp.h>

/* Weak entry is overriden by ld.elf_so for dynamic linkage */
weak_alias(__tls_get_addr, __libc__tls_get_addr)

void *
__libc__tls_get_addr(size_t idx[2])
{
	struct tls_tcb *tcb;

	tcb = _lwp_getprivate();
	return _rtld_tls_get_addr(tcb, idx[0], idx[1]);
}

XXX Document optimisations based idx[0]
d28 1
a28 1
(6) Implement the necessary relocation records in mdreloc.c.  There are
d55 1
a55 1
(7) Implement _lwp_getprivate_fast() in machine/mcontext.h and set
@

