Hello everyone,

This patch is an initial implementation of native Thread Local Storage on 
Windows, which currently emulates TLS via emutls. This was heavily referenced 
from Daniel Green's original work with Windows TLS from a decade ago, so credit 
should be attributed to him as well (https://github.com/venix1 with the 
original implementation being 
https://github.com/venix1/MinGW-GDC/blob/master/patches/mingw-tls-gcc-4.8.patch).
 TLS support still requires a bug in ld to be fixed, and the work for that is 
currently underway (With thanks to Jan Beulich). Note that native TLS is still 
disabled by default for Windows, and has to be explicitly be enabled via the 
--enable-tls switch during configure time. There are some issues with this 
implementation, namely that the TLS section is only emitted with the w section 
flag, and does not have the d flag emitted alongside it (I am unsure whether as 
requires the d flag or not), the TLS init method being emitted has not yet been 
rewritten to work on Windows (I do not know how to do this), and that the last 
step of the TLS access contains an inefficiency due to the patch zero extending 
the TLS symbol, which causes an extra instruction to be emitted. This is 
unfortunate, but I could not find a way to implement this without the zero 
extending, as all other alternatives would crash when trying to compile libgcc 
or libgomp. If anyone has suggestions to fix this inefficient extra 
instruction, as well as the other issues with the implementation, I would be 
more than happy to apply the changes to the patch. As always, I do not have any 
write access to gcc, and once the green light is given for this patch I need 
help in committing it to gcc. The patch is attached at the very end of this mail

best regards,
Julian

P.S. The demonstration of the extra unrequired instruction is shown here, by 
comparing it to clang (Both at -O3):

thread_local int local = 2;

int main() {
    local = 7;
}

clang:
mov     eax, dword ptr [rip + _tls_index]
mov     rcx, qword ptr gs:[88]
mov     rsi, qword ptr [rcx + 8*rax]
mov     dword ptr [rsi + local@SECREL32], 7 <------ Notice how clang moves 7 
into the calculated TLS address in one step

gcc:

mov     eax, DWORD PTR [rip+_tls_index]
mov     rdx, QWORD PTR gs:[88]
mov     rax, QWORD PTR [rdx+rax*8]
lea     edx, local@secrel32 <------ gcc first loads the TLS offset
mov     DWORD PTR [rdx+rax], 7 <------ Then adds it to the thread pointer, 
before moving, which is not necessary

gcc/config/i386/ChangeLog:

        * i386.cc
            (mingw_w64_pe_select_section): New method.
            (ix86_legitimate_constant_p): Handle new relocation.
            (legitimate_pic_operand_p): Handle new relocation.
            (legitimate_pic_address_disp_p): Handle new relocation.
            (ix86_legitimate_address_p): Handle new relocation.
            (legitimize_tls_address): Handle new Thread Local Storage model.
            (output_pic_addr_const): Handle new relocation.
            (i386_output_dwarf_dtprel): Handle new relocation.
            (i386_asm_output_addr_const_extra): Handle new relocation.

        * i386.h: New TARGET_WIN32_TLS flag.

        * i386.md: Define UNSPEC_SECREL32, UNSPEC_TLS_WIN32 and handle new RTL 
template.

        * mingw-w64.h: Define TARGET_ASM_SELECT_SECTION and TARGET_WIN32_TLS.

        * predicates.md: Handle new relocation.

gcc/config/mingw/ChangeLog:

        * winnt.cc (mingw_pe_unique_section): Emit new TLS section.

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 45320124b91..c1e6760a073 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -789,6 +789,20 @@ x86_64_elf_select_section (tree decl, int reloc,
   return default_elf_select_section (decl, reloc, align);
 }
 
+ATTRIBUTE_UNUSED static section *
+mingw_w64_pe_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT 
align)
+{
+  if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
+    {
+      if (!DECL_P (decl))
+       decl = NULL_TREE;
+
+      return get_named_section (decl, ".tls$", reloc);
+    }
+  else
+    return default_select_section (decl, reloc, align);
+}
+
 /* Select a set of attributes for section NAME based on the properties
    of DECL and whether or not RELOC indicates that DECL's initializer
    might contain runtime relocations.  */
@@ -11170,6 +11184,9 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
            x = XVECEXP (x, 0, 0);
            return (GET_CODE (x) == SYMBOL_REF
                    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
+         case UNSPEC_SECREL32:
+           x = XVECEXP (x, 0, 0);
+           return GET_CODE (x) == SYMBOL_REF;
          default:
            return false;
          }
@@ -11306,6 +11323,9 @@ legitimate_pic_operand_p (rtx x)
            x = XVECEXP (inner, 0, 0);
            return (GET_CODE (x) == SYMBOL_REF
                    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+         case UNSPEC_SECREL32:
+           x = XVECEXP (inner, 0, 0);
+           return GET_CODE (x) == SYMBOL_REF;
          case UNSPEC_MACHOPIC_OFFSET:
            return legitimate_pic_address_disp_p (x);
          default:
@@ -11486,6 +11506,9 @@ legitimate_pic_address_disp_p (rtx disp)
       disp = XVECEXP (disp, 0, 0);
       return (GET_CODE (disp) == SYMBOL_REF
              && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
+    case UNSPEC_SECREL32:
+      disp = XVECEXP (disp, 0, 0);
+      return GET_CODE (disp) == SYMBOL_REF;
     }
 
   return false;
@@ -11763,6 +11786,7 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool 
strict,
          case UNSPEC_INDNTPOFF:
          case UNSPEC_NTPOFF:
          case UNSPEC_DTPOFF:
+         case UNSPEC_SECREL32:
            break;
 
          default:
@@ -12165,6 +12189,14 @@ ix86_tls_module_base (void)
 rtx
 legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
 {
+#if TARGET_WIN32_TLS
+  rtx base = gen_reg_rtx (Pmode);
+
+  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, gen_rtx_SET (base, 
gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLS_WIN32)), 
gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
+
+  // Only 64-bit is supported
+  return gen_rtx_PLUS (Pmode, base, gen_rtx_ZERO_EXTEND (Pmode, gen_rtx_CONST 
(SImode, gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_SECREL32))));
+#else
   rtx dest, base, off;
   rtx pic = NULL_RTX, tp = NULL_RTX;
   machine_mode tp_mode = Pmode;
@@ -12403,6 +12435,7 @@ legitimize_tls_address (rtx x, enum tls_model model, 
bool for_mov)
     }
 
   return dest;
+#endif
 }
 
 /* Return true if the TLS address requires insn using integer registers.
@@ -12865,6 +12898,9 @@ output_pic_addr_const (FILE *file, rtx x, int code)
        case UNSPEC_INDNTPOFF:
          fputs ("@indntpoff", file);
          break;
+       case UNSPEC_SECREL32:
+         fputs ("@secrel32", file);
+         break;
 #if TARGET_MACHO
        case UNSPEC_MACHOPIC_OFFSET:
          putc ('-', file);
@@ -12890,7 +12926,11 @@ i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
 {
   fputs (ASM_LONG, file);
   output_addr_const (file, x);
+#if TARGET_WIN32_TLS
+  fputs ("@secrel32", file);
+#else
   fputs ("@dtpoff", file);
+#endif
   switch (size)
     {
     case 4:
@@ -14643,6 +14683,10 @@ i386_asm_output_addr_const_extra (FILE *file, rtx x)
       output_addr_const (file, op);
       fputs ("@indntpoff", file);
       break;
+    case UNSPEC_SECREL32:
+      output_addr_const (file, op);
+      fputs ("@secrel32", file);
+      break;
 #if TARGET_MACHO
     case UNSPEC_MACHOPIC_OFFSET:
       output_addr_const (file, op);
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index c1ec92ffb15..ae12304fe06 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -506,6 +506,7 @@ extern unsigned char ix86_prefetch_sse;
 #define TARGET_GNU2_TLS                (ix86_tls_dialect == TLS_DIALECT_GNU2)
 #define TARGET_ANY_GNU_TLS     (TARGET_GNU_TLS || TARGET_GNU2_TLS)
 #define TARGET_SUN_TLS         0
+#define TARGET_WIN32_TLS       0
 
 #ifndef TARGET_64BIT_DEFAULT
 #define TARGET_64BIT_DEFAULT 0
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8d269feee83..00de288ce98 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -79,6 +79,7 @@
   UNSPEC_MACHOPIC_OFFSET
   UNSPEC_PCREL
   UNSPEC_SIZEOF
+  UNSPEC_SECREL32
 
   ;; Prologue support
   UNSPEC_STACK_ALLOC
@@ -94,6 +95,7 @@
   UNSPEC_TLS_LD_BASE
   UNSPEC_TLSDESC
   UNSPEC_TLS_IE_SUN
+  UNSPEC_TLS_WIN32
 
   ;; Other random patterns
   UNSPEC_SCAS
@@ -22709,6 +22711,14 @@
       (clobber (match_dup 5))
       (clobber (reg:CC FLAGS_REG))])])
 
+(define_insn ""
+  [(set (match_operand:P 0 "register_operand" "=r")
+       (unspec:P [(const_int 0)] UNSPEC_TLS_WIN32))
+       (clobber (match_scratch:P 1 "=r"))]
+  "TARGET_WIN32_TLS"
+  "mov{l}\t{_tls_index(%%rip), %k0|%k0, DWORD PTR 
[rip+_tls_index]}\;mov{q}\t{%%gs:88, %1|%1, QWORD PTR 
gs:[88]}\;mov{q}\t{(%1,%0,8), %0|%0, QWORD PTR [%1+%0*8]}"
+  [(set_attr "type" "multi")])
+
 ;; Load and add the thread base pointer from %<tp_seg>:0.
 (define_expand "get_thread_pointer<mode>"
   [(set (match_operand:PTR 0 "register_operand")
diff --git a/gcc/config/i386/mingw-w64.h b/gcc/config/i386/mingw-w64.h
index 0a9986c44d4..47c76eb85a2 100644
--- a/gcc/config/i386/mingw-w64.h
+++ b/gcc/config/i386/mingw-w64.h
@@ -135,3 +135,9 @@ along with GCC; see the file COPYING3.  If not see
    original mingw32.  */
 #undef TARGET_LIBC_HAS_FUNCTION
 #define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function
+
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION mingw_w64_pe_select_section
+
+#undef TARGET_WIN32_TLS
+#define TARGET_WIN32_TLS 1
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 053312bbe27..d83b27355cc 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -218,6 +218,7 @@
          case UNSPEC_DTPOFF:
          case UNSPEC_GOTNTPOFF:
          case UNSPEC_NTPOFF:
+         case UNSPEC_SECREL32:
            return true;
          default:
            break;
diff --git a/gcc/config/mingw/winnt.cc b/gcc/config/mingw/winnt.cc
index 803e5f5ec85..5721298b224 100644
--- a/gcc/config/mingw/winnt.cc
+++ b/gcc/config/mingw/winnt.cc
@@ -415,6 +415,8 @@ mingw_pe_unique_section (tree decl, int reloc)
     prefix = ".text$";
   else if (decl_readonly_section (decl, reloc))
     prefix = ".rdata$";
+  else if (DECL_THREAD_LOCAL_P (decl))
+    prefix = ".tls$";
   else
     prefix = ".data$";
   len = strlen (name) + strlen (prefix);

Reply via email to