This pattern will only be used with the __sync functions, because we do not yet have a bare TImode atomic load.
* config/aarch64/aarch64.c (aarch64_gen_compare_reg): Add support for NE comparison of TImode values. (aarch64_print_operand): Extend %R to handle general registers. (aarch64_emit_load_exclusive): Add support for TImode. (aarch64_emit_store_exclusive): Likewise. (aarch64_atomic_ool_func): Likewise. (aarch64_ool_cas_names): Likewise. * config/aarch64/atomics.md (@atomic_compare_and_swap<ALLI_TI>): Change iterator from ALLI to ALLI_TI. (@atomic_compare_and_swap<JUST_TI>): New. (@atomic_compare_and_swap<JUST_TI>_lse): New. (aarch64_load_exclusive_pair): New. (aarch64_store_exclusive_pair): New. * config/aarch64/iterators.md (JUST_TI): New. * config/aarch64/lse.c (cas): Add support for SIZE == 16. * config/aarch64/t-lse (S0, O0): Split out cas. (LSE_OBJS): Include $(O0). --- gcc/config/aarch64/aarch64-protos.h | 2 +- gcc/config/aarch64/aarch64.c | 72 ++++++++++++++++++----- libgcc/config/aarch64/lse.c | 48 ++++++++++----- gcc/config/aarch64/atomics.md | 91 +++++++++++++++++++++++++++-- gcc/config/aarch64/iterators.md | 3 + libgcc/config/aarch64/t-lse | 10 +++- 6 files changed, 189 insertions(+), 37 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index c7b96b12bbe..f735c4e5ad8 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -626,7 +626,7 @@ bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT); struct atomic_ool_names { - const char *str[4][4]; + const char *str[5][4]; }; rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx, diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index ce4d7e51d00..ac2f055a09e 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1610,10 +1610,33 @@ emit_set_insn (rtx x, rtx y) rtx aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y) { - machine_mode mode = SELECT_CC_MODE (code, x, y); - rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM); + machine_mode cmp_mode = GET_MODE (x); + machine_mode cc_mode; + rtx cc_reg; - emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); + if (cmp_mode == E_TImode) + { + gcc_assert (code == NE); + + cc_mode = E_CCmode; + cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); + + rtx x_lo = operand_subword (x, 0, 0, TImode); + rtx y_lo = operand_subword (y, 0, 0, TImode); + emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo)); + + rtx x_hi = operand_subword (x, 1, 0, TImode); + rtx y_hi = operand_subword (y, 1, 0, TImode); + emit_insn (gen_ccmpdi (cc_reg, cc_reg, x_hi, y_hi, + gen_rtx_EQ (cc_mode, cc_reg, const0_rtx), + GEN_INT (AARCH64_EQ))); + } + else + { + cc_mode = SELECT_CC_MODE (code, x, y); + cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); + emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y)); + } return cc_reg; } @@ -6693,7 +6716,7 @@ sizetochar (int size) 'S/T/U/V': Print a FP/SIMD register name for a register list. The register printed is the FP/SIMD register name of X + 0/1/2/3 for S/T/U/V. - 'R': Print a scalar FP/SIMD register name + 1. + 'R': Print a scalar Integer/FP/SIMD register name + 1. 'X': Print bottom 16 bits of integer constant in hex. 'w/x': Print a general register name or the zero register (32-bit or 64-bit). @@ -6885,12 +6908,13 @@ aarch64_print_operand (FILE *f, rtx x, int code) break; case 'R': - if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) - { - output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); - return; - } - asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1); + if (REG_P (x) && FP_REGNUM_P (REGNO (x))) + asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1); + else if (REG_P (x) && GP_REGNUM_P (REGNO (x))) + asm_fprintf (f, "x%d", REGNO (x) - R0_REGNUM + 1); + else + output_operand_lossage ("incompatible register operand for '%%%c'", + code); break; case 'X': @@ -14143,16 +14167,26 @@ static void aarch64_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, rtx model_rtx) { - emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx)); + if (mode == E_TImode) + emit_insn (gen_aarch64_load_exclusive_pair (gen_lowpart (DImode, rval), + gen_highpart (DImode, rval), + mem, model_rtx)); + else + emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx)); } /* Emit store exclusive. */ static void aarch64_emit_store_exclusive (machine_mode mode, rtx bval, - rtx rval, rtx mem, rtx model_rtx) + rtx mem, rtx val, rtx model_rtx) { - emit_insn (gen_aarch64_store_exclusive (mode, bval, rval, mem, model_rtx)); + if (mode == E_TImode) + emit_insn (gen_aarch64_store_exclusive_pair + (bval, mem, operand_subword (val, 0, 0, TImode), + operand_subword (val, 1, 0, TImode), model_rtx)); + else + emit_insn (gen_aarch64_store_exclusive (mode, bval, mem, val, model_rtx)); } /* Mark the previous jump instruction as unlikely. */ @@ -14164,7 +14198,7 @@ aarch64_emit_unlikely_jump (rtx insn) add_reg_br_prob_note (jump, profile_probability::very_unlikely ()); } -/* We store the names of the various atomic helpers in a 4x4 array. +/* We store the names of the various atomic helpers in a 5x4 array. Return the libcall function given MODE, MODEL and NAMES. */ rtx @@ -14188,6 +14222,9 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx, case E_DImode: mode_idx = 3; break; + case E_TImode: + mode_idx = 4; + break; default: gcc_unreachable (); } @@ -14222,9 +14259,11 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx, "__aa64_" #B #N "_rel", \ "__aa64_" #B #N "_acq_rel" } -#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8) +#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \ + { NULL, NULL, NULL, NULL } +#define DEF5(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), DEF0(B, 16) -static const atomic_ool_names aarch64_ool_cas_names = { { DEF4(cas) } }; +static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } }; const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } }; const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } }; const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } }; @@ -14247,6 +14286,7 @@ const atomic_ool_names aarch64_ool_steor_names = { { DEF4(eor) } }; #undef DEF0 #undef DEF4 +#undef DEF5 /* Expand a compare and swap pattern. */ diff --git a/libgcc/config/aarch64/lse.c b/libgcc/config/aarch64/lse.c index 68ca7df667b..f6114add71a 100644 --- a/libgcc/config/aarch64/lse.c +++ b/libgcc/config/aarch64/lse.c @@ -91,6 +91,7 @@ asm(".arch armv8-a+lse"); #elif SIZE == 4 || SIZE == 8 # define S "" # define MASK "" +#elif SIZE == 16 #else # error #endif @@ -98,9 +99,11 @@ asm(".arch armv8-a+lse"); #if SIZE < 8 # define T unsigned int # define W "w" -#else +#elif SIZE == 8 # define T unsigned long long # define W "" +#else +# define T unsigned __int128 #endif #if MODEL == 1 @@ -138,19 +141,38 @@ T NAME(cas) (T cmp, T new, T *ptr) unsigned tmp; if (have_atomics) - __asm__("cas" A L S " %"W"0, %"W"2, %1" - : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp)); + { +#if SIZE == 16 + __asm__("casp" A L " %0, %R0, %2, %R2, %1" + : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp)); +#else + __asm__("cas" A L S " %"W"0, %"W"2, %1" + : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp)); +#endif + } else - __asm__( - "0: " - "ld" A "xr"S" %"W"0, %1\n\t" - "cmp %"W"0, %"W"4" MASK "\n\t" - "bne 1f\n\t" - "st" L "xr"S" %w2, %"W"3, %1\n\t" - "cbnz %w2, 0b\n" - "1:" - : "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp)); - + { +#if SIZE == 16 + __asm__("0: " + "ld" A "xp %0, %R0, %1\n\t" + "cmp %0, %4\n\t" + "ccmp %R0, %R4, #0, eq\n\t" + "bne 1f\n\t" + "st" L "xp %w2, %3, %R3, %1\n\t" + "cbnz %w2, 0b\n" + "1:" + : "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp)); +#else + __asm__("0: " + "ld" A "xr"S" %"W"0, %1\n\t" + "cmp %"W"0, %"W"4" MASK "\n\t" + "bne 1f\n\t" + "st" L "xr"S" %w2, %"W"3, %1\n\t" + "cbnz %w2, 0b\n" + "1:" + : "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp)); +#endif + } return old; } #endif diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md index 72f9962fe55..fe604606bdd 100644 --- a/gcc/config/aarch64/atomics.md +++ b/gcc/config/aarch64/atomics.md @@ -22,10 +22,10 @@ (define_expand "@atomic_compare_and_swap<mode>" [(match_operand:SI 0 "register_operand" "") ;; bool out - (match_operand:ALLI 1 "register_operand" "") ;; val out - (match_operand:ALLI 2 "aarch64_sync_memory_operand" "") ;; memory - (match_operand:ALLI 3 "nonmemory_operand" "") ;; expected - (match_operand:ALLI 4 "aarch64_reg_or_zero" "") ;; desired + (match_operand:ALLI_TI 1 "register_operand" "") ;; val out + (match_operand:ALLI_TI 2 "aarch64_sync_memory_operand" "") ;; memory + (match_operand:ALLI_TI 3 "nonmemory_operand" "") ;; expected + (match_operand:ALLI_TI 4 "aarch64_reg_or_zero" "") ;; desired (match_operand:SI 5 "const_int_operand") ;; is_weak (match_operand:SI 6 "const_int_operand") ;; mod_s (match_operand:SI 7 "const_int_operand")] ;; mod_f @@ -88,6 +88,30 @@ } ) +(define_insn_and_split "@aarch64_compare_and_swap<mode>" + [(set (reg:CC CC_REGNUM) ;; bool out + (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) + (set (match_operand:JUST_TI 0 "register_operand" "=&r") ;; val out + (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory + (set (match_dup 1) + (unspec_volatile:JUST_TI + [(match_operand:JUST_TI 2 "register_operand" "r") ;; expect + (match_operand:JUST_TI 3 "aarch64_reg_or_zero" "rZ") ;; desired + (match_operand:SI 4 "const_int_operand") ;; is_weak + (match_operand:SI 5 "const_int_operand") ;; mod_s + (match_operand:SI 6 "const_int_operand")] ;; mod_f + UNSPECV_ATOMIC_CMPSW)) + (clobber (match_scratch:SI 7 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_split_compare_and_swap (operands); + DONE; + } +) + (define_insn "@aarch64_compare_and_swap<mode>_lse" [(set (match_operand:SI 0 "register_operand" "+r") ;; val out (zero_extend:SI @@ -133,6 +157,28 @@ return "casal<atomic_sfx>\t%<w>0, %<w>2, %1"; }) +(define_insn "@aarch64_compare_and_swap<mode>_lse" + [(set (match_operand:JUST_TI 0 "register_operand" "+r") ;; val out + (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory + (set (match_dup 1) + (unspec_volatile:JUST_TI + [(match_dup 0) ;; expect + (match_operand:JUST_TI 2 "register_operand" "r") ;; desired + (match_operand:SI 3 "const_int_operand")] ;; mod_s + UNSPECV_ATOMIC_CMPSW))] + "TARGET_LSE" +{ + enum memmodel model = memmodel_from_int (INTVAL (operands[3])); + if (is_mm_relaxed (model)) + return "casp\t%0, %R0, %2, %R2, %1"; + else if (is_mm_acquire (model) || is_mm_consume (model)) + return "caspa\t%0, %R0, %2, %R2, %1"; + else if (is_mm_release (model)) + return "caspl\t%0, %R0, %2, %R2, %1"; + else + return "caspal\t%0, %R0, %2, %R2, %1"; +}) + (define_expand "atomic_exchange<mode>" [(match_operand:ALLI 0 "register_operand" "") (match_operand:ALLI 1 "aarch64_sync_memory_operand" "") @@ -650,6 +696,24 @@ } ) +(define_insn "aarch64_load_exclusive_pair" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI + [(match_operand:TI 2 "aarch64_sync_memory_operand" "Q") + (match_operand:SI 3 "const_int_operand")] + UNSPECV_LX)) + (set (match_operand:DI 1 "register_operand" "=r") + (unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LX))] + "" + { + enum memmodel model = memmodel_from_int (INTVAL (operands[3])); + if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) + return "ldxp\t%0, %1, %2"; + else + return "ldaxp\t%0, %1, %2"; + } +) + (define_insn "@aarch64_store_exclusive<mode>" [(set (match_operand:SI 0 "register_operand" "=&r") (unspec_volatile:SI [(const_int 0)] UNSPECV_SX)) @@ -668,6 +732,25 @@ } ) +(define_insn "aarch64_store_exclusive_pair" + [(set (match_operand:SI 0 "register_operand" "=&r") + (unspec_volatile:SI [(const_int 0)] UNSPECV_SX)) + (set (match_operand:TI 1 "aarch64_sync_memory_operand" "=Q") + (unspec_volatile:TI + [(match_operand:DI 2 "aarch64_reg_or_zero" "rZ") + (match_operand:DI 3 "aarch64_reg_or_zero" "rZ") + (match_operand:SI 4 "const_int_operand")] + UNSPECV_SX))] + "" + { + enum memmodel model = memmodel_from_int (INTVAL (operands[3])); + if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) + return "stxp\t%w0, %x2, %x3, %1"; + else + return "stlxp\t%w0, %x2, %x3, %1"; + } +) + (define_expand "mem_thread_fence" [(match_operand:SI 0 "const_int_operand" "")] "" diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 524e4e6929b..dd26bdbbc6b 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -29,6 +29,9 @@ ;; Iterator for HI, SI, DI, some instructions can only work on these modes. (define_mode_iterator GPI_I16 [(HI "AARCH64_ISA_F16") SI DI]) +;; "Iterator" for just TI -- features like @pattern only work with iterators. +(define_mode_iterator JUST_TI [TI]) + ;; Iterator for QI and HI modes (define_mode_iterator SHORT [QI HI]) diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse index e862b0c2448..534ff6efea8 100644 --- a/libgcc/config/aarch64/t-lse +++ b/libgcc/config/aarch64/t-lse @@ -18,15 +18,19 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -# CAS, Swap, Load-and-operate have 4 sizes and 4 memory models -S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), cas swp ldadd ldclr ldeor ldset)) +# Compare-and-swap has 5 sizes and 4 memory models. +S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas)) +O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0))) + +# Swap, Load-and-operate have 4 sizes and 4 memory models +S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset)) O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1))) # Store-and-operate has 4 sizes but only 2 memory models (relaxed, release). S2 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), stadd stclr steor stset)) O2 := $(foreach m, 1 3, $(addsuffix _$(m)$(objext), $(S2))) -LSE_OBJS := $(O1) $(O2) +LSE_OBJS := $(O0) $(O1) $(O2) libgcc-objects += $(LSE_OBJS) have_atomic$(objext) -- 2.17.1