Hi all, After a long hiatus, I've returned to address review comments on the Windows TLS patch. Attached here is the final patch from this effort. Ok for merge? Will need help from Windows maintainers to commit once this is approved.
best regards, Julian >From 8a89e2a0eb070fbcac5892839253e484a7188eba Mon Sep 17 00:00:00 2001 From: Julian Waters <tanksherma...@gmail.com> Date: Tue, 15 Oct 2024 20:56:22 +0800 Subject: [PATCH] Implement Windows TLS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch implements native Thread Local Storage access on Windows, as motivated by PR80881. Currently, Thread Local Storage accesses on Windows relies on emulation, which is detrimental to performance in certain applications, notably the Python Interpreter and the gcc port of the Java Virtual Machine. This patch was heavily inspired by Daniel Green's original work on native Windows Thread Local Storage from over a decade ago, which can be found at https://github.com/venix1/MinGW-GDC/blob/master/patches/mingw-tls-gcc-4.8.patch as a reference. gcc/ChangeLog: * config/i386/i386.cc (ix86_legitimate_constant_p): Handle new UNSPEC. (legitimate_pic_operand_p): Handle new UNSPEC. (legitimate_pic_address_disp_p): Handle new UNSPEC. (ix86_legitimate_address_p): Handle new UNSPEC. (ix86_tls_index_symbol): New symbol for _tls_index. (ix86_tls_index): Handle creation of _tls_index symbol. (legitimize_tls_address): Create thread local access sequence. (output_pic_addr_const): Handle new UNSPEC. (i386_output_dwarf_dtprel): Handle new UNSPEC. (i386_asm_output_addr_const_extra): Handle new UNSPEC. * config/i386/i386.h (TARGET_WIN32_TLS): Define. * config/i386/i386.md: New UNSPEC. * config/i386/predicates.md: Handle new UNSPEC. * config/mingw/mingw32.h (TARGET_WIN32_TLS): Define. (TARGET_ASM_SELECT_SECTION): Define. (DEFAULT_TLS_SEG_REG): Define. * config/mingw/winnt.cc (mingw_pe_select_section): Select proper TLS section. (mingw_pe_unique_section): Handle TLS section. * config/mingw/winnt.h (mingw_pe_select_section): Declare. * configure: Regenerate. * configure.ac: New check for broken linker thread local support Co-authored-by: Eric Botcazou <botca...@adacore.com> Co-authored-by: Uroš Bizjak <ubiz...@gmail.com> Co-authored-by: Liu Hao <lh_mo...@126.com> Signed-off-by: Julian Waters <tanksherma...@gmail.com> --- gcc/config/i386/i386.cc | 61 ++++++++++++++++++++++++++++++++++- gcc/config/i386/i386.h | 1 + gcc/config/i386/i386.md | 1 + gcc/config/i386/predicates.md | 1 + gcc/config/mingw/mingw32.h | 9 ++++++ gcc/config/mingw/winnt.cc | 14 ++++++++ gcc/config/mingw/winnt.h | 1 + gcc/configure | 29 +++++++++++++++++ gcc/configure.ac | 29 +++++++++++++++++ 9 files changed, 145 insertions(+), 1 deletion(-) diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 2f840338138..2b40a203a61 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -11363,6 +11363,9 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x) x = XVECEXP (x, 0, 0); return (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); + case UNSPEC_SECREL32: + x = XVECEXP (x, 0, 0); + return GET_CODE (x) == SYMBOL_REF; default: return false; } @@ -11499,6 +11502,9 @@ legitimate_pic_operand_p (rtx x) x = XVECEXP (inner, 0, 0); return (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); + case UNSPEC_SECREL32: + x = XVECEXP (inner, 0, 0); + return GET_CODE (x) == SYMBOL_REF; case UNSPEC_MACHOPIC_OFFSET: return legitimate_pic_address_disp_p (x); default: @@ -11679,6 +11685,9 @@ legitimate_pic_address_disp_p (rtx disp) disp = XVECEXP (disp, 0, 0); return (GET_CODE (disp) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); + case UNSPEC_SECREL32: + disp = XVECEXP (disp, 0, 0); + return GET_CODE (disp) == SYMBOL_REF; } return false; @@ -11956,6 +11965,7 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict, case UNSPEC_INDNTPOFF: case UNSPEC_NTPOFF: case UNSPEC_DTPOFF: + case UNSPEC_SECREL32: break; default: @@ -11981,7 +11991,8 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict, || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF - && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) + && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF + && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32)) /* Non-constant pic memory reference. */ return false; } @@ -12305,6 +12316,22 @@ get_thread_pointer (machine_mode tp_mode, bool to_reg) return tp; } +/* Construct the SYMBOL_REF for the _tls_index symbol. */ + +static GTY(()) rtx ix86_tls_index_symbol; + +static rtx +ix86_tls_index (void) +{ + if (!ix86_tls_index_symbol) + ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index"); + + if (flag_pic) + return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_index_symbol), UNSPEC_PCREL)); + else + return ix86_tls_index_symbol; +} + /* Construct the SYMBOL_REF for the tls_get_addr function. */ static GTY(()) rtx ix86_tls_symbol; @@ -12363,6 +12390,26 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) machine_mode tp_mode = Pmode; int type; +#if TARGET_WIN32_TLS + off = gen_const_mem (SImode, ix86_tls_index ()); + set_mem_alias_set (off, GOT_ALIAS_SET); + + tp = gen_const_mem (Pmode, GEN_INT (TARGET_64BIT ? 88 : 44)); + set_mem_addr_space (tp, DEFAULT_TLS_SEG_REG); + + if (TARGET_64BIT) + off = convert_to_mode (Pmode, off, 1); + + base = force_reg (Pmode, off); + tp = copy_to_mode_reg (Pmode, tp); + + tp = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, tp, gen_rtx_MULT (Pmode, base, GEN_INT (UNITS_PER_WORD)))); + set_mem_alias_set (tp, GOT_ALIAS_SET); + + base = force_reg (Pmode, tp); + + return gen_rtx_PLUS (Pmode, base, gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_SECREL32))); +#else /* Fall back to global dynamic model if tool chain cannot support local dynamic. */ if (TARGET_SUN_TLS && !TARGET_64BIT @@ -12585,6 +12632,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov) } return dest; +#endif } /* Return true if the TLS address requires insn using integer registers. @@ -13054,6 +13102,9 @@ output_pic_addr_const (FILE *file, rtx x, int code) case UNSPEC_INDNTPOFF: fputs ("@indntpoff", file); break; + case UNSPEC_SECREL32: + fputs ("@secrel32", file); + break; #if TARGET_MACHO case UNSPEC_MACHOPIC_OFFSET: putc ('-', file); @@ -13079,7 +13130,11 @@ i386_output_dwarf_dtprel (FILE *file, int size, rtx x) { fputs (ASM_LONG, file); output_addr_const (file, x); +#if TARGET_WIN32_TLS + fputs ("@secrel32", file); +#else fputs ("@dtpoff", file); +#endif switch (size) { case 4: @@ -14832,6 +14887,10 @@ i386_asm_output_addr_const_extra (FILE *file, rtx x) output_addr_const (file, op); fputs ("@indntpoff", file); break; + case UNSPEC_SECREL32: + output_addr_const (file, op); + fputs ("@secrel32", file); + break; #if TARGET_MACHO case UNSPEC_MACHOPIC_OFFSET: output_addr_const (file, op); diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 18aa42da3be..02bf357d776 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -531,6 +531,7 @@ extern unsigned char ix86_prefetch_sse; #define TARGET_GNU2_TLS (ix86_tls_dialect == TLS_DIALECT_GNU2) #define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS) #define TARGET_SUN_TLS 0 +#define TARGET_WIN32_TLS 0 #ifndef TARGET_64BIT_DEFAULT #define TARGET_64BIT_DEFAULT 0 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 44ee94a3e41..7ee6ed60801 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -79,6 +79,7 @@ UNSPEC_MACHOPIC_OFFSET UNSPEC_PCREL UNSPEC_SIZEOF + UNSPEC_SECREL32 ;; Prologue support UNSPEC_STACK_ALLOC diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 4b23e18eaf4..fbf2dd9fc51 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -218,6 +218,7 @@ case UNSPEC_DTPOFF: case UNSPEC_GOTNTPOFF: case UNSPEC_NTPOFF: + case UNSPEC_SECREL32: return true; default: break; diff --git a/gcc/config/mingw/mingw32.h b/gcc/config/mingw/mingw32.h index 10bcd293527..be2461f4db8 100644 --- a/gcc/config/mingw/mingw32.h +++ b/gcc/config/mingw/mingw32.h @@ -308,6 +308,15 @@ do { \ #undef TARGET_N_FORMAT_TYPES #define TARGET_N_FORMAT_TYPES 3 +#undef TARGET_WIN32_TLS +#define TARGET_WIN32_TLS 1 + +#undef TARGET_ASM_SELECT_SECTION +#define TARGET_ASM_SELECT_SECTION mingw_pe_select_section + +#undef DEFAULT_TLS_SEG_REG +#define DEFAULT_TLS_SEG_REG (TARGET_64BIT ? ADDR_SPACE_SEG_GS : ADDR_SPACE_SEG_FS) + #define HAVE_ENABLE_EXECUTE_STACK #undef CHECK_EXECUTE_STACK_ENABLED #define CHECK_EXECUTE_STACK_ENABLED flag_setstackexecutable diff --git a/gcc/config/mingw/winnt.cc b/gcc/config/mingw/winnt.cc index 08a761d3a6d..f22496615ed 100644 --- a/gcc/config/mingw/winnt.cc +++ b/gcc/config/mingw/winnt.cc @@ -391,6 +391,15 @@ i386_pe_strip_name_encoding_full (const char *str) return name; } +section * +mingw_pe_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align) +{ + if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl)) + return get_named_section (decl, ".tls$", reloc); + else + return default_select_section (decl, reloc, align); +} + void mingw_pe_unique_section (tree decl, int reloc) { @@ -415,6 +424,8 @@ mingw_pe_unique_section (tree decl, int reloc) prefix = ".text$"; else if (decl_readonly_section (decl, reloc)) prefix = ".rdata$"; + else if (DECL_THREAD_LOCAL_P (decl)) + prefix = ".tls$"; else prefix = ".data$"; len = strlen (name) + strlen (prefix); @@ -489,6 +500,9 @@ mingw_pe_asm_named_section (const char *name, unsigned int flags, *f++ = 'e'; #endif + if (strcmp (name, ".tls$") == 0) + *f++ = 'd'; + if ((flags & (SECTION_CODE | SECTION_WRITE)) == 0) /* readonly data */ { diff --git a/gcc/config/mingw/winnt.h b/gcc/config/mingw/winnt.h index fa2d6c09ab7..23f4dc94ec5 100644 --- a/gcc/config/mingw/winnt.h +++ b/gcc/config/mingw/winnt.h @@ -31,6 +31,7 @@ extern void mingw_pe_file_end (void); extern void mingw_pe_maybe_record_exported_symbol (tree, const char *, int); extern void mingw_pe_record_stub (const char *, bool); extern unsigned int mingw_pe_section_type_flags (tree, const char *, int); +extern section *mingw_pe_select_section (tree, int, unsigned HOST_WIDE_INT); extern void mingw_pe_unique_section (tree, int); extern bool mingw_pe_valid_dllimport_attribute_p (const_tree); diff --git a/gcc/configure b/gcc/configure index 16965953f05..776b0628c60 100755 --- a/gcc/configure +++ b/gcc/configure @@ -27864,6 +27864,35 @@ if test $gcc_cv_as_tls = yes; then fi fi +case $target_os in + win32 | pe | cygwin* | mingw32*) + if test $set_have_as_tls = yes; then + # Hack to check whether ld breaks on @secrel32 for Windows + if test $in_tree_ld = yes; then + if test "$gcc_cv_gld_major_version" -eq 2 -a "$gcc_cv_gld_minor_version" -ge 44 -o "$gcc_cv_gld_major_version" -gt 2; then + : # ld support for @secrel32 was fixed in this version + else + as_fn_error $? "ld version is known to have broken secrel32 relocations, configure without --enable-tls or with --disable-tls to remove this error. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80881 for more information." "$LINENO" 5 + fi + elif test x$gcc_cv_as != x -a x$gcc_cv_ld != x -a x$gcc_cv_objdump != x; then + echo '.text' > conftest.s + echo 'foo: nop' >> conftest.s + echo '.data' >> conftest.s + echo '.secrel32 foo' >> conftest.s + if $gcc_cv_as -o conftest.o conftest.s > /dev/null 2>&1 && $gcc_cv_ld -o conftest.exe conftest.o > /dev/null; then + if $gcc_cv_objdump -h conftest.exe | grep '\.reloc\>' > /dev/null; then + as_fn_error $? "ld has broken secrel32 relocations, configure without --enable-tls or with --disable-tls to remove this error. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80881 for more information." "$LINENO" 5 + fi + else + as_fn_error $? "Error occurred while checking for broken secrel32 relocations. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80881 for more information." "$LINENO" 5 + fi + rm -f conftest.s conftest.o conftest.exe + else + as_fn_error $? "Cannot check for broken secrel32 relocations to determine --enable-tls support. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80881 for more information." "$LINENO" 5 + fi + fi + ;; +esac if test $set_have_as_tls = yes ; then $as_echo "#define HAVE_AS_TLS 1" >>confdefs.h diff --git a/gcc/configure.ac b/gcc/configure.ac index 9f67e62950a..b6db9edfc83 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -4207,6 +4207,35 @@ else [$tls_as_opt], [$conftest_s],, [set_have_as_tls=yes]) fi +case $target_os in + win32 | pe | cygwin* | mingw32*) + if test $set_have_as_tls = yes; then + # Hack to check whether ld breaks on @secrel32 for Windows + if test $in_tree_ld = yes; then + if test "$gcc_cv_gld_major_version" -eq 2 -a "$gcc_cv_gld_minor_version" -ge 44 -o "$gcc_cv_gld_major_version" -gt 2; then + : # ld support for @secrel32 was fixed in this version + else + AC_MSG_ERROR([ld version is known to have broken secrel32 relocations, configure without --enable-tls or with --disable-tls to remove this error. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80881 for more information.]) + fi + elif test x$gcc_cv_as != x -a x$gcc_cv_ld != x -a x$gcc_cv_objdump != x; then + echo '.text' > conftest.s + echo 'foo: nop' >> conftest.s + echo '.data' >> conftest.s + echo '.secrel32 foo' >> conftest.s + if $gcc_cv_as -o conftest.o conftest.s > /dev/null 2>&1 && $gcc_cv_ld -o conftest.exe conftest.o > /dev/null; then + if $gcc_cv_objdump -h conftest.exe | grep '\.reloc\>' > /dev/null; then + AC_MSG_ERROR([ld has broken secrel32 relocations, configure without --enable-tls or with --disable-tls to remove this error. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80881 for more information.]) + fi + else + AC_MSG_ERROR([Error occurred while checking for broken secrel32 relocations. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80881 for more information.]) + fi + rm -f conftest.s conftest.o conftest.exe + else + AC_MSG_ERROR([Cannot check for broken secrel32 relocations to determine --enable-tls support. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80881 for more information.]) + fi + fi + ;; +esac if test $set_have_as_tls = yes ; then AC_DEFINE(HAVE_AS_TLS, 1, [Define if your assembler and linker support thread-local storage.]) -- 2.45.2