> Can we combine the last two patches, both adding call explicitly in > rtl template for tls_local_dynamic_base_32/tls_global_dynamic_32, and > set ix86_tls_descriptor_calls_expanded_in_cfun to true only after > reload complete? >
Hi H.J. I attached the patch which combined your two patches and the fix in legitimize_tls_address. I tried pr58066.c and c.i in ia32/x32/x86_64, the code looked fine. Do you think it is ok? Thanks, Wei.
Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 208410) +++ config/i386/i386.c (working copy) @@ -9082,7 +9082,7 @@ ix86_frame_pointer_required (void) we've not got a leaf function. */ if (TARGET_OMIT_LEAF_FRAME_POINTER && (!crtl->is_leaf - || ix86_current_function_calls_tls_descriptor)) + || ix86_tls_descriptor_calls_expanded_in_cfun)) return true; if (crtl->profile && !flag_fentry) @@ -9331,7 +9331,7 @@ ix86_select_alt_pic_regnum (void) { if (crtl->is_leaf && !crtl->profile - && !ix86_current_function_calls_tls_descriptor) + && !ix86_tls_descriptor_calls_expanded_in_cfun) { int i, drap; /* Can't use the same register for both PIC and DRAP. */ @@ -9490,20 +9490,28 @@ ix86_compute_frame_layout (struct ix86_f frame->nregs = ix86_nsaved_regs (); frame->nsseregs = ix86_nsaved_sseregs (); - stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; - preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; - /* 64-bit MS ABI seem to require stack alignment to be always 16 except for function prologues and leaf. */ - if ((TARGET_64BIT_MS_ABI && preferred_alignment < 16) + if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128) && (!crtl->is_leaf || cfun->calls_alloca != 0 - || ix86_current_function_calls_tls_descriptor)) + || ix86_tls_descriptor_calls_expanded_in_cfun)) { - preferred_alignment = 16; - stack_alignment_needed = 16; crtl->preferred_stack_boundary = 128; crtl->stack_alignment_needed = 128; } + /* preferred_stack_boundary is never updated for call expanded from + tls descriptor. Update it here. We don't update it in expand stage + because tls calls may be optimized away. */ + else if (ix86_tls_descriptor_calls_expanded_in_cfun + && crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY) + { + crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY; + if (crtl->stack_alignment_needed < PREFERRED_STACK_BOUNDARY) + crtl->stack_alignment_needed = PREFERRED_STACK_BOUNDARY; + } + + stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; + preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; gcc_assert (!size || stack_alignment_needed); gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); @@ -9608,7 +9616,7 @@ ix86_compute_frame_layout (struct ix86_f || size != 0 || !crtl->is_leaf || cfun->calls_alloca - || ix86_current_function_calls_tls_descriptor) + || ix86_tls_descriptor_calls_expanded_in_cfun) offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed; /* Frame pointer points here. */ @@ -9623,7 +9631,7 @@ ix86_compute_frame_layout (struct ix86_f of stack frame are unused. */ if (ACCUMULATE_OUTGOING_ARGS && (!crtl->is_leaf || cfun->calls_alloca - || ix86_current_function_calls_tls_descriptor)) + || ix86_tls_descriptor_calls_expanded_in_cfun)) { offset += crtl->outgoing_args_size; frame->outgoing_arguments_size = crtl->outgoing_args_size; @@ -9634,7 +9642,7 @@ ix86_compute_frame_layout (struct ix86_f /* Align stack boundary. Only needed if we're calling another function or using alloca. */ if (!crtl->is_leaf || cfun->calls_alloca - || ix86_current_function_calls_tls_descriptor) + || ix86_tls_descriptor_calls_expanded_in_cfun) offset = (offset + preferred_alignment - 1) & -preferred_alignment; /* We've reached end of stack frame. */ @@ -9650,7 +9658,7 @@ ix86_compute_frame_layout (struct ix86_f if (ix86_using_red_zone () && crtl->sp_is_unchanging && crtl->is_leaf - && !ix86_current_function_calls_tls_descriptor) + && !ix86_tls_descriptor_calls_expanded_in_cfun) { frame->red_zone_size = to_allocate; if (frame->save_regs_using_mov) @@ -10623,7 +10631,7 @@ ix86_finalize_stack_realign_flags (void) && crtl->is_leaf && flag_omit_frame_pointer && crtl->sp_is_unchanging - && !ix86_current_function_calls_tls_descriptor + && !ix86_tls_descriptor_calls_expanded_in_cfun && !crtl->accesses_prior_frames && !cfun->calls_alloca && !crtl->calls_eh_return @@ -13437,26 +13445,25 @@ legitimize_tls_address (rtx x, enum tls_ else { rtx caddr = ix86_tls_get_addr (); + rtx ax = gen_rtx_REG (Pmode, AX_REG); + rtx insns; + start_sequence (); if (TARGET_64BIT) - { - rtx rax = gen_rtx_REG (Pmode, AX_REG); - rtx insns; + emit_call_insn + (ix86_gen_tls_global_dynamic_64 (ax, x, caddr)); + else + emit_call_insn + (gen_tls_global_dynamic_32 (ax, x, pic, caddr)); - start_sequence (); - emit_call_insn - (ix86_gen_tls_global_dynamic_64 (rax, x, caddr)); - insns = get_insns (); - end_sequence (); + insns = get_insns (); + end_sequence (); - if (GET_MODE (x) != Pmode) - x = gen_rtx_ZERO_EXTEND (Pmode, x); + if (GET_MODE (x) != Pmode) + x = gen_rtx_ZERO_EXTEND (Pmode, x); - RTL_CONST_CALL_P (insns) = 1; - emit_libcall_block (insns, dest, rax, x); - } - else - emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr)); + RTL_CONST_CALL_P (insns) = 1; + emit_libcall_block (insns, dest, ax, x); } break; @@ -13490,28 +13497,28 @@ legitimize_tls_address (rtx x, enum tls_ else { rtx caddr = ix86_tls_get_addr (); + rtx ax = gen_rtx_REG (Pmode, AX_REG); + rtx insns, eqv; + + start_sequence (); if (TARGET_64BIT) - { - rtx rax = gen_rtx_REG (Pmode, AX_REG); - rtx insns, eqv; + emit_call_insn + (ix86_gen_tls_local_dynamic_base_64 (ax, caddr)); + else + emit_call_insn + (gen_tls_local_dynamic_base_32 (ax, pic, caddr)); - start_sequence (); - emit_call_insn - (ix86_gen_tls_local_dynamic_base_64 (rax, caddr)); - insns = get_insns (); - end_sequence (); - - /* Attach a unique REG_EQUAL, to allow the RTL optimizers to - share the LD_BASE result with other LD model accesses. */ - eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), - UNSPEC_TLS_LD_BASE); + insns = get_insns (); + end_sequence (); - RTL_CONST_CALL_P (insns) = 1; - emit_libcall_block (insns, base, rax, eqv); - } - else - emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr)); + /* Attach a unique REG_EQUAL, to allow the RTL optimizers to + share the LD_BASE result with other LD model accesses. */ + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_TLS_LD_BASE); + + RTL_CONST_CALL_P (insns) = 1; + emit_libcall_block (insns, base, ax, eqv); } off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); Index: config/i386/i386.h =================================================================== --- config/i386/i386.h (revision 208410) +++ config/i386/i386.h (working copy) @@ -2451,13 +2451,7 @@ struct GTY(()) machine_function { #define ix86_current_function_needs_cld (cfun->machine->needs_cld) #define ix86_tls_descriptor_calls_expanded_in_cfun \ (cfun->machine->tls_descriptor_call_expanded_p) -/* Since tls_descriptor_call_expanded is not cleared, even if all TLS - calls are optimized away, we try to detect cases in which it was - optimized away. Since such instructions (use (reg REG_SP)), we can - verify whether there's any such instruction live by testing that - REG_SP is live. */ -#define ix86_current_function_calls_tls_descriptor \ - (ix86_tls_descriptor_calls_expanded_in_cfun && df_regs_ever_live_p (SP_REG)) + #define ix86_static_chain_on_stack (cfun->machine->static_chain_on_stack) /* Control behavior of x86_file_start. */ Index: config/i386/i386.md =================================================================== --- config/i386/i386.md (revision 208410) +++ config/i386/i386.md (working copy) @@ -12857,15 +12857,16 @@ ;; Note that these code sequences must appear exactly as shown ;; in order to allow linker relaxation. -(define_insn "*tls_global_dynamic_32_gnu" +(define_insn_and_split "*tls_global_dynamic_32_gnu" [(set (match_operand:SI 0 "register_operand" "=a") - (unspec:SI - [(match_operand:SI 1 "register_operand" "b") - (match_operand 2 "tls_symbolic_operand") - (match_operand 3 "constant_call_address_operand" "z")] - UNSPEC_TLS_GD)) - (clobber (match_scratch:SI 4 "=d")) - (clobber (match_scratch:SI 5 "=c")) + (call:SI + (mem:QI (match_operand 3 "constant_call_address_operand" "z")) + (match_operand 4))) + (unspec:SI [(match_operand:SI 1 "register_operand" "b") + (match_operand 2 "tls_symbolic_operand")] + UNSPEC_TLS_GD) + (clobber (match_scratch:SI 5 "=d")) + (clobber (match_scratch:SI 6 "=c")) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && TARGET_GNU_TLS" { @@ -12879,21 +12880,38 @@ #endif return "call\t%P3"; } + "reload_completed" + [(parallel + [(set (match_dup 0) + (call:SI + (mem:QI (match_dup 3)) + (match_dup 4))) + (unspec:SI [(match_dup 1) + (match_dup 2)] + UNSPEC_TLS_GD) + (clobber (match_dup 5)) + (clobber (match_dup 6)) + (clobber (reg:CC FLAGS_REG))])] +{ + ix86_tls_descriptor_calls_expanded_in_cfun = true; +} [(set_attr "type" "multi") (set_attr "length" "12")]) (define_expand "tls_global_dynamic_32" [(parallel [(set (match_operand:SI 0 "register_operand") - (unspec:SI [(match_operand:SI 2 "register_operand") - (match_operand 1 "tls_symbolic_operand") - (match_operand 3 "constant_call_address_operand")] - UNSPEC_TLS_GD)) + (call:SI + (mem:QI (match_operand 3 "constant_call_address_operand")) + (const_int 0))) + (unspec:SI [(match_operand:SI 2 "register_operand") + (match_operand 1 "tls_symbolic_operand")] + UNSPEC_TLS_GD) (clobber (match_scratch:SI 4)) (clobber (match_scratch:SI 5)) (clobber (reg:CC FLAGS_REG))])]) -(define_insn "*tls_global_dynamic_64_<mode>" +(define_insn_and_split "*tls_global_dynamic_64_<mode>" [(set (match_operand:P 0 "register_operand" "=a") (call:P (mem:QI (match_operand 2 "constant_call_address_operand" "z")) @@ -12912,11 +12930,22 @@ return "call\t%p2@plt"; return "call\t%P2"; } + "reload_completed" + [(parallel + [(set (match_dup 0) + (call:P + (mem:QI (match_dup 2)) + (match_dup 3))) + (unspec:P [(match_dup 1)] + UNSPEC_TLS_GD)])] +{ + ix86_tls_descriptor_calls_expanded_in_cfun = true; +} [(set_attr "type" "multi") (set (attr "length") (symbol_ref "TARGET_X32 ? 15 : 16"))]) -(define_insn "*tls_global_dynamic_64_largepic" +(define_insn_and_split "*tls_global_dynamic_64_largepic" [(set (match_operand:DI 0 "register_operand" "=a") (call:DI (mem:QI (plus:DI (match_operand:DI 2 "register_operand" "b") @@ -12935,6 +12964,18 @@ output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands); return "call\t{*%%rax|rax}"; } + "reload_completed" + [(parallel + [(set (match_dup 0) + (call:P + (mem:QI (plus:DI (match_dup 2) + (match_dup 3))) + (match_operand 4))) + (unspec:P [(match_dup 1)] + UNSPEC_TLS_GD)])] +{ + ix86_tls_descriptor_calls_expanded_in_cfun = true; +} [(set_attr "type" "multi") (set_attr "length" "22")]) @@ -12948,14 +12989,15 @@ UNSPEC_TLS_GD)])] "TARGET_64BIT") -(define_insn "*tls_local_dynamic_base_32_gnu" +(define_insn_and_split "*tls_local_dynamic_base_32_gnu" [(set (match_operand:SI 0 "register_operand" "=a") - (unspec:SI - [(match_operand:SI 1 "register_operand" "b") - (match_operand 2 "constant_call_address_operand" "z")] - UNSPEC_TLS_LD_BASE)) - (clobber (match_scratch:SI 3 "=d")) - (clobber (match_scratch:SI 4 "=c")) + (call:SI + (mem:QI (match_operand 2 "constant_call_address_operand" "z")) + (match_operand 3))) + (unspec:SI [(match_operand:SI 1 "register_operand" "b")] + UNSPEC_TLS_LD_BASE) + (clobber (match_scratch:SI 4 "=d")) + (clobber (match_scratch:SI 5 "=c")) (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && TARGET_GNU_TLS" { @@ -12970,21 +13012,36 @@ } return "call\t%P2"; } + "reload_completed" + [(parallel + [(set (match_dup 0) + (call:SI + (mem:QI (match_dup 2)) + (match_dup 3))) + (unspec:SI [(match_dup 1)] + UNSPEC_TLS_LD_BASE) + (clobber (match_dup 4)) + (clobber (match_dup 5)) + (clobber (reg:CC FLAGS_REG))])] +{ + ix86_tls_descriptor_calls_expanded_in_cfun = true; +} [(set_attr "type" "multi") (set_attr "length" "11")]) (define_expand "tls_local_dynamic_base_32" [(parallel [(set (match_operand:SI 0 "register_operand") - (unspec:SI - [(match_operand:SI 1 "register_operand") - (match_operand 2 "constant_call_address_operand")] - UNSPEC_TLS_LD_BASE)) + (call:SI + (mem:QI (match_operand 2 "constant_call_address_operand")) + (const_int 0))) + (unspec:SI [(match_operand:SI 1 "register_operand")] + UNSPEC_TLS_LD_BASE) (clobber (match_scratch:SI 3)) (clobber (match_scratch:SI 4)) (clobber (reg:CC FLAGS_REG))])]) -(define_insn "*tls_local_dynamic_base_64_<mode>" +(define_insn_and_split "*tls_local_dynamic_base_64_<mode>" [(set (match_operand:P 0 "register_operand" "=a") (call:P (mem:QI (match_operand 1 "constant_call_address_operand" "z")) @@ -12998,10 +13055,20 @@ return "call\t%p1@plt"; return "call\t%P1"; } + "reload_completed" + [(parallel + [(set (match_dup 0) + (call:P + (mem:QI (match_dup 1)) + (match_dup 2))) + (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)])] +{ + ix86_tls_descriptor_calls_expanded_in_cfun = true; +} [(set_attr "type" "multi") (set_attr "length" "12")]) -(define_insn "*tls_local_dynamic_base_64_largepic" +(define_insn_and_split "*tls_local_dynamic_base_64_largepic" [(set (match_operand:DI 0 "register_operand" "=a") (call:DI (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b") @@ -13019,6 +13086,17 @@ output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands); return "call\t{*%%rax|rax}"; } + "reload_completed" + [(parallel + [(set (match_dup 0) + (call:DI + (mem:QI (plus:DI (match_dup 1) + (match_dup 2))) + (match_dup 3))) + (unspec:DI [(const_int 0)] UNSPEC_TLS_LD_BASE)])] +{ + ix86_tls_descriptor_calls_expanded_in_cfun = true; +} [(set_attr "type" "multi") (set_attr "length" "22")]) @@ -13031,32 +13109,6 @@ (unspec:P [(const_int 0)] UNSPEC_TLS_LD_BASE)])] "TARGET_64BIT") -;; Local dynamic of a single variable is a lose. Show combine how -;; to convert that back to global dynamic. - -(define_insn_and_split "*tls_local_dynamic_32_once" - [(set (match_operand:SI 0 "register_operand" "=a") - (plus:SI - (unspec:SI [(match_operand:SI 1 "register_operand" "b") - (match_operand 2 "constant_call_address_operand" "z")] - UNSPEC_TLS_LD_BASE) - (const:SI (unspec:SI - [(match_operand 3 "tls_symbolic_operand")] - UNSPEC_DTPOFF)))) - (clobber (match_scratch:SI 4 "=d")) - (clobber (match_scratch:SI 5 "=c")) - (clobber (reg:CC FLAGS_REG))] - "" - "#" - "" - [(parallel - [(set (match_dup 0) - (unspec:SI [(match_dup 1) (match_dup 3) (match_dup 2)] - UNSPEC_TLS_GD)) - (clobber (match_dup 4)) - (clobber (match_dup 5)) - (clobber (reg:CC FLAGS_REG))])]) - ;; Segment register for the thread base ptr load (define_mode_attr tp_seg [(SI "gs") (DI "fs")]) @@ -13167,7 +13219,6 @@ "!TARGET_64BIT && TARGET_GNU2_TLS" { operands[3] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0]; - ix86_tls_descriptor_calls_expanded_in_cfun = true; }) (define_insn "*tls_dynamic_gnu2_lea_32" @@ -13183,7 +13234,7 @@ (set_attr "length" "6") (set_attr "length_address" "4")]) -(define_insn "*tls_dynamic_gnu2_call_32" +(define_insn_and_split "*tls_dynamic_gnu2_call_32" [(set (match_operand:SI 0 "register_operand" "=a") (unspec:SI [(match_operand 1 "tls_symbolic_operand") (match_operand:SI 2 "register_operand" "0") @@ -13194,6 +13245,17 @@ (clobber (reg:CC FLAGS_REG))] "!TARGET_64BIT && TARGET_GNU2_TLS" "call\t{*%a1@TLSCALL(%2)|[DWORD PTR [%2+%a1@TLSCALL]]}" + "reload_completed" + [(set (match_dup 0) + (unspec:SI [(match_dup 1) + (match_dup 2) + (match_dup 3) + (reg:SI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))] +{ + ix86_tls_descriptor_calls_expanded_in_cfun = true; +} [(set_attr "type" "call") (set_attr "length" "2") (set_attr "length_address" "0")]) @@ -13231,7 +13293,6 @@ "TARGET_64BIT && TARGET_GNU2_TLS" { operands[2] = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : operands[0]; - ix86_tls_descriptor_calls_expanded_in_cfun = true; }) (define_insn "*tls_dynamic_gnu2_lea_64" @@ -13245,7 +13306,7 @@ (set_attr "length" "7") (set_attr "length_address" "4")]) -(define_insn "*tls_dynamic_gnu2_call_64" +(define_insn_and_split "*tls_dynamic_gnu2_call_64" [(set (match_operand:DI 0 "register_operand" "=a") (unspec:DI [(match_operand 1 "tls_symbolic_operand") (match_operand:DI 2 "register_operand" "0") @@ -13254,6 +13315,16 @@ (clobber (reg:CC FLAGS_REG))] "TARGET_64BIT && TARGET_GNU2_TLS" "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}" + "reload_completed" + [(set (match_dup 0) + (unspec:SI [(match_dup 1) + (match_dup 2) + (reg:DI SP_REG)] + UNSPEC_TLSDESC)) + (clobber (reg:CC FLAGS_REG))] +{ + ix86_tls_descriptor_calls_expanded_in_cfun = true; +} [(set_attr "type" "call") (set_attr "length" "2") (set_attr "length_address" "0")])