Hello! Now that %ebx is no more fixed, we can remove all PIC related complications in atomic_compare_and_swap<dwi>_doubleword pattern. The immediate consequence is, that we avoid "hidden" xchgs that clobbered unwinding state.
Earlier fix by Ian [1] partly solved this issue using various .cfi directives to fixup the mess, but these were not available on the systems without .cfi directives (e.g. Centos 5 and Solaris). The patch fixes this problem for good by removing problematic alternative that tried to skip %ebx allocations. 2014-10-15 Uros Bizjak <ubiz...@gmail.com> PR go/59432 * config/i386/sync.md (atomic_compare_and_swap<dwi>_doubleword): Remove the second alternative. (regprefix): Remove mode attribute. (atomic_compare_and_swap<mode>): Do not fixup operand 2. * config/i386/predicates.md (cmpxchg8b_pic_memory_operand): Remove. Revert: 2013-11-05 Ian Lance Taylor <i...@google.com> * config/i386/sync.md (atomic_compare_and_swap<dwi>_doubleword): If possible, add .cfi directives to record change to bx. * config/i386/i386.c (ix86_emit_cfi): New function. * config/i386/i386-protos.h (ix86_emit_cfi): Declare. Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32} on Fedora 20 and Centos 5.11, where fixes Go sync/atomic failure on 32bit targets. Patch was committed to mainline SVN. Uros
Index: predicates.md =================================================================== --- predicates.md (revision 216245) +++ predicates.md (working copy) @@ -1092,43 +1092,6 @@ return parts.disp != NULL_RTX; }) -;; Return true if OP is memory operand which will need zero or -;; one register at most, not counting stack pointer or frame pointer. -(define_predicate "cmpxchg8b_pic_memory_operand" - (match_operand 0 "memory_operand") -{ - struct ix86_address parts; - int ok; - - if (TARGET_64BIT || !flag_pic) - return true; - - ok = ix86_decompose_address (XEXP (op, 0), &parts); - gcc_assert (ok); - - if (parts.base && GET_CODE (parts.base) == SUBREG) - parts.base = SUBREG_REG (parts.base); - if (parts.index && GET_CODE (parts.index) == SUBREG) - parts.index = SUBREG_REG (parts.index); - - if (parts.base == NULL_RTX - || parts.base == arg_pointer_rtx - || parts.base == frame_pointer_rtx - || parts.base == hard_frame_pointer_rtx - || parts.base == stack_pointer_rtx) - return true; - - if (parts.index == NULL_RTX - || parts.index == arg_pointer_rtx - || parts.index == frame_pointer_rtx - || parts.index == hard_frame_pointer_rtx - || parts.index == stack_pointer_rtx) - return true; - - return false; -}) - - ;; Return true if OP is memory operand that cannot be represented ;; by the modRM array. (define_predicate "long_memory_operand" Index: i386-protos.h =================================================================== --- i386-protos.h (revision 216245) +++ i386-protos.h (working copy) @@ -142,7 +142,6 @@ extern void ix86_split_lshr (rtx *, rtx, enum mach extern rtx ix86_find_base_term (rtx); extern bool ix86_check_movabs (rtx, int); extern void ix86_split_idivmod (enum machine_mode, rtx[], bool); -extern bool ix86_emit_cfi (); extern rtx assign_386_stack_local (enum machine_mode, enum ix86_stack_slot); extern int ix86_attr_length_immediate_default (rtx_insn *, bool); Index: sync.md =================================================================== --- sync.md (revision 216245) +++ sync.md (working copy) @@ -351,10 +351,9 @@ else { enum machine_mode hmode = <CASHMODE>mode; - rtx lo_o, lo_e, lo_n, hi_o, hi_e, hi_n, mem; + rtx lo_o, lo_e, lo_n, hi_o, hi_e, hi_n; lo_o = operands[1]; - mem = operands[2]; lo_e = operands[3]; lo_n = operands[4]; hi_o = gen_highpart (hmode, lo_o); @@ -364,12 +363,9 @@ lo_e = gen_lowpart (hmode, lo_e); lo_n = gen_lowpart (hmode, lo_n); - if (!cmpxchg8b_pic_memory_operand (mem, <MODE>mode)) - mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0))); - emit_insn (gen_atomic_compare_and_swap<mode>_doubleword - (lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n, operands[6])); + (lo_o, hi_o, operands[2], lo_e, hi_e, lo_n, hi_n, operands[6])); } ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), @@ -398,57 +394,27 @@ ;; That said, in order to take advantage of possible lower-subreg opts, ;; treat all of the integral operands in the same way. -;; Operands 5 and 6 really need to be different registers, which in -;; this case means op5 must not be ecx. If op5 and op6 are the same -;; (like when the input is -1LL) GCC might chose to allocate op5 to ecx, -;; like op6. This breaks, as the xchg will move the PIC register -;; contents to %ecx then --> boom. - (define_mode_attr doublemodesuffix [(SI "8") (DI "16")]) -(define_mode_attr regprefix [(SI "e") (DI "r")]) (define_insn "atomic_compare_and_swap<dwi>_doubleword" - [(set (match_operand:DWIH 0 "register_operand" "=a,a") + [(set (match_operand:DWIH 0 "register_operand" "=a") (unspec_volatile:DWIH - [(match_operand:<DWI> 2 "cmpxchg8b_pic_memory_operand" "+m,m") - (match_operand:DWIH 3 "register_operand" "0,0") - (match_operand:DWIH 4 "register_operand" "1,1") - (match_operand:DWIH 5 "register_operand" "b,!*r") - (match_operand:DWIH 6 "register_operand" "c,c") + [(match_operand:<DWI> 2 "memory_operand" "+m") + (match_operand:DWIH 3 "register_operand" "0") + (match_operand:DWIH 4 "register_operand" "1") + (match_operand:DWIH 5 "register_operand" "b") + (match_operand:DWIH 6 "register_operand" "c") (match_operand:SI 7 "const_int_operand")] UNSPECV_CMPXCHG)) - (set (match_operand:DWIH 1 "register_operand" "=d,d") + (set (match_operand:DWIH 1 "register_operand" "=d") (unspec_volatile:DWIH [(const_int 0)] UNSPECV_CMPXCHG)) (set (match_dup 2) (unspec_volatile:<DWI> [(const_int 0)] UNSPECV_CMPXCHG)) (set (reg:CCZ FLAGS_REG) - (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG)) - (clobber (match_scratch:DWIH 8 "=X,&5"))] + (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))] "TARGET_CMPXCHG<doublemodesuffix>B" -{ - bool swap = REGNO (operands[5]) != BX_REG; - const char *xchg = "xchg{<imodesuffix>}\t%%<regprefix>bx, %5"; + "lock{%;} %K7cmpxchg<doublemodesuffix>b\t%2") - if (swap) - { - output_asm_insn (xchg, operands); - if (ix86_emit_cfi ()) - { - output_asm_insn (".cfi_remember_state", operands); - output_asm_insn (".cfi_register\t%%<regprefix>bx, %5", operands); - } - } - output_asm_insn ("lock{%;} %K7cmpxchg<doublemodesuffix>b\t%2", operands); - if (swap) - { - output_asm_insn (xchg, operands); - if (ix86_emit_cfi ()) - output_asm_insn (".cfi_restore_state", operands); - } - - return ""; -}) - ;; For operand 2 nonmemory_operand predicate is used instead of ;; register_operand to allow combiner to better optimize atomic ;; additions of constants. Index: i386.c =================================================================== --- i386.c (revision 216245) +++ i386.c (working copy) @@ -17778,14 +17778,6 @@ ix86_split_idivmod (enum machine_mode mode, rtx op emit_label (end_label); } -/* Whether it is OK to emit CFI directives when emitting asm code. */ - -bool -ix86_emit_cfi () -{ - return dwarf2out_do_cfi_asm (); -} - #define LEA_MAX_STALL (3) #define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)