On Wed, Mar 12, 2014 at 10:52 PM, Wei Mi <w...@google.com> wrote: > I saw the problem last patch had on ia32. Without explicit call in rtl > template, scheduler may schedule the sp adjusting insn across tls > descriptor and break the alignment assumption. > I am testing the updated patch on x86_64. > > Can we combine the last two patches, both adding call explicitly in > rtl template for tls_local_dynamic_base_32/tls_global_dynamic_32, and > set ix86_tls_descriptor_calls_expanded_in_cfun to true only after > reload complete? >
My ia32 change generates much worse code: [hjl@gnu-6 gcc]$ cat /tmp/c.i static __thread char ccc, bbb; int __cxa_get_globals() { return &ccc - &bbb; } [hjl@gnu-6 gcc]$ ./xgcc -B./ -S -O2 -fPIC /tmp/c.i [hjl@gnu-6 gcc]$ cat c.s .file "c.i" .section .text.unlikely,"ax",@progbits .LCOLDB0: .text .LHOTB0: .p2align 4,,15 .globl __cxa_get_globals .type __cxa_get_globals, @function __cxa_get_globals: .LFB0: .cfi_startproc subq $8, %rsp .cfi_def_cfa_offset 16 leaq ccc@tlsld(%rip), %rdi call __tls_get_addr@PLT addq $8, %rsp .cfi_def_cfa_offset 8 leaq ccc@dtpoff(%rax), %rcx leaq bbb@dtpoff(%rax), %rdx movq %rcx, %rax subq %rdx, %rax ret .cfi_endproc .LFE0: .size __cxa_get_globals, .-__cxa_get_globals .section .text.unlikely .LCOLDE0: .text .LHOTE0: .section .tbss,"awT",@nobits .type bbb, @object .size bbb, 1 bbb: .zero 1 .type ccc, @object .size ccc, 1 ccc: .zero 1 .ident "GCC: (GNU) 4.9.0 20140312 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-6 gcc]$ cat /tmp/c.i static __thread char ccc, bbb; int __cxa_get_globals() { return &ccc - &bbb; } [hjl@gnu-6 gcc]$ ./xgcc -B./ -S -O2 -fPIC /tmp/c.i -m32 [hjl@gnu-6 gcc]$ cat c.s .file "c.i" .section .text.unlikely,"ax",@progbits .LCOLDB0: .text .LHOTB0: .p2align 4,,15 .globl __cxa_get_globals .type __cxa_get_globals, @function __cxa_get_globals: .LFB0: .cfi_startproc pushl %esi .cfi_def_cfa_offset 8 .cfi_offset 6, -8 pushl %ebx .cfi_def_cfa_offset 12 .cfi_offset 3, -12 call __x86.get_pc_thunk.bx addl $_GLOBAL_OFFSET_TABLE_, %ebx subl $4, %esp .cfi_def_cfa_offset 16 leal ccc@tlsldm(%ebx), %eax call ___tls_get_addr@PLT leal ccc@dtpoff(%eax), %esi leal ccc@tlsldm(%ebx), %eax call ___tls_get_addr@PLT addl $4, %esp .cfi_def_cfa_offset 12 leal bbb@dtpoff(%eax), %eax popl %ebx .cfi_restore 3 .cfi_def_cfa_offset 8 subl %eax, %esi movl %esi, %eax popl %esi .cfi_restore 6 .cfi_def_cfa_offset 4 ret .cfi_endproc Maybe we should keep the original patterns and split them to add CALL. -- H.J.