On Mon, Sep 21, 2015 at 12:31:21PM +0100, Matthew Wahab wrote: > I've also removed the CC clobber from the _lse patterns, it was overly > cautious. > > Update patch attached, Ok for trunk? > Matthew
OK. Thanks, James > > gcc/ > 2015-09-21 Matthew Wahab <matthew.wa...@arm.com> > > * config/aarch64/aarch64-protos.h > (aarch64_atomic_ldop_supported_p): Declare. > * config/aarch64/aarch64.c (aarch64_atomic_ldop_supported_p): New. > (enum aarch64_atomic_load_op_code): New. > (aarch64_emit_atomic_load_op): New. > (aarch64_gen_atomic_ldop): Update to support load-operate > patterns. > * config/aarch64/atomics.md (atomic_<atomic_optab><mode>): Change > to an expander. > (aarch64_atomic_<atomic_optab><mode>): New. > (aarch64_atomic_<atomic_optab><mode>_lse): New. > (atomic_fetch_<atomic_optab><mode>): Change to an expander. > (aarch64_atomic_fetch_<atomic_optab><mode>): New. > (aarch64_atomic_fetch_<atomic_optab><mode>_lse): New. > > gcc/testsuite/ > 2015-09-21 Matthew Wahab <matthew.wa...@arm.com> > > * gcc.target/aarch64/atomic-inst-ldadd.c: New. > * gcc.target/aarch64/atomic-inst-ldlogic.c: New. > > From 5d1bc64d7e9509374076e4c4ff5a285d4b073f24 Mon Sep 17 00:00:00 2001 > From: Matthew Wahab <matthew.wa...@arm.com> > Date: Fri, 7 Aug 2015 17:10:42 +0100 > Subject: [PATCH 4/5] Use atomic instructions for fetch-update patterns. > > Change-Id: I39759f02e61039067ccaabfd52039e4804eddf2f > --- > gcc/config/aarch64/aarch64-protos.h | 2 + > gcc/config/aarch64/aarch64.c | 175 > ++++++++++++++++++++- > gcc/config/aarch64/atomics.md | 101 ++++++++++-- > .../gcc.target/aarch64/atomic-inst-ldadd.c | 58 +++++++ > .../gcc.target/aarch64/atomic-inst-ldlogic.c | 109 +++++++++++++ > 5 files changed, 433 insertions(+), 12 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c > > diff --git a/gcc/config/aarch64/aarch64-protos.h > b/gcc/config/aarch64/aarch64-protos.h > index eba4c76..76ebd6f 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -378,6 +378,8 @@ rtx aarch64_load_tp (rtx); > void aarch64_expand_compare_and_swap (rtx op[]); > void aarch64_split_compare_and_swap (rtx op[]); > void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx); > + > +bool aarch64_atomic_ldop_supported_p (enum rtx_code); > void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx); > void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index dc05c6e..3a1b434 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -11064,6 +11064,32 @@ aarch64_expand_compare_and_swap (rtx operands[]) > emit_insn (gen_rtx_SET (bval, x)); > } > > +/* Test whether the target supports using a atomic load-operate instruction. > + CODE is the operation and AFTER is TRUE if the data in memory after the > + operation should be returned and FALSE if the data before the operation > + should be returned. Returns FALSE if the operation isn't supported by the > + architecture. */ > + > +bool > +aarch64_atomic_ldop_supported_p (enum rtx_code code) > +{ > + if (!TARGET_LSE) > + return false; > + > + switch (code) > + { > + case SET: > + case AND: > + case IOR: > + case XOR: > + case MINUS: > + case PLUS: > + return true; > + default: > + return false; > + } > +} > + > /* Emit a barrier, that is appropriate for memory model MODEL, at the end of > a > sequence implementing an atomic operation. */ > > @@ -11206,26 +11232,169 @@ aarch64_emit_atomic_swap (machine_mode mode, rtx > dst, rtx value, > emit_insn (gen (dst, mem, value, model)); > } > > -/* Emit an atomic operation where the architecture supports it. */ > +/* Operations supported by aarch64_emit_atomic_load_op. */ > + > +enum aarch64_atomic_load_op_code > +{ > + AARCH64_LDOP_PLUS, /* A + B */ > + AARCH64_LDOP_XOR, /* A ^ B */ > + AARCH64_LDOP_OR, /* A | B */ > + AARCH64_LDOP_BIC /* A & ~B */ > +}; > + > +/* Emit an atomic load-operate. */ > + > +static void > +aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code, > + machine_mode mode, rtx dst, rtx src, > + rtx mem, rtx model) > +{ > + typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx); > + const aarch64_atomic_load_op_fn plus[] = > + { > + gen_aarch64_atomic_loadaddqi, > + gen_aarch64_atomic_loadaddhi, > + gen_aarch64_atomic_loadaddsi, > + gen_aarch64_atomic_loadadddi > + }; > + const aarch64_atomic_load_op_fn eor[] = > + { > + gen_aarch64_atomic_loadeorqi, > + gen_aarch64_atomic_loadeorhi, > + gen_aarch64_atomic_loadeorsi, > + gen_aarch64_atomic_loadeordi > + }; > + const aarch64_atomic_load_op_fn ior[] = > + { > + gen_aarch64_atomic_loadsetqi, > + gen_aarch64_atomic_loadsethi, > + gen_aarch64_atomic_loadsetsi, > + gen_aarch64_atomic_loadsetdi > + }; > + const aarch64_atomic_load_op_fn bic[] = > + { > + gen_aarch64_atomic_loadclrqi, > + gen_aarch64_atomic_loadclrhi, > + gen_aarch64_atomic_loadclrsi, > + gen_aarch64_atomic_loadclrdi > + }; > + aarch64_atomic_load_op_fn gen; > + int idx = 0; > + > + switch (mode) > + { > + case QImode: idx = 0; break; > + case HImode: idx = 1; break; > + case SImode: idx = 2; break; > + case DImode: idx = 3; break; > + default: > + gcc_unreachable (); > + } > + > + switch (code) > + { > + case AARCH64_LDOP_PLUS: gen = plus[idx]; break; > + case AARCH64_LDOP_XOR: gen = eor[idx]; break; > + case AARCH64_LDOP_OR: gen = ior[idx]; break; > + case AARCH64_LDOP_BIC: gen = bic[idx]; break; > + default: > + gcc_unreachable (); > + } > + > + emit_insn (gen (dst, mem, src, model)); > +} > + > +/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the > + location to store the data read from memory. MEM is the memory location > to > + read and modify. MODEL_RTX is the memory ordering to use. VALUE is the > + second operand for the operation. Either OUT_DATA or OUT_RESULT, but not > + both, can be NULL. */ > > void > aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, > rtx mem, rtx value, rtx model_rtx) > { > machine_mode mode = GET_MODE (mem); > + machine_mode wmode = (mode == DImode ? DImode : SImode); > + const bool short_mode = (mode < SImode); > + aarch64_atomic_load_op_code ldop_code; > + rtx src; > + rtx x; > + > + if (out_data) > + out_data = gen_lowpart (mode, out_data); > > - out_data = gen_lowpart (mode, out_data); > + /* Make sure the value is in a register, putting it into a destination > + register if it needs to be manipulated. */ > + if (!register_operand (value, mode) > + || code == AND || code == MINUS) > + { > + src = out_data; > + emit_move_insn (src, gen_lowpart (mode, value)); > + } > + else > + src = value; > + gcc_assert (register_operand (src, mode)); > > + /* Preprocess the data for the operation as necessary. If the operation is > + a SET then emit a swap instruction and finish. */ > switch (code) > { > case SET: > - aarch64_emit_atomic_swap (mode, out_data, value, mem, model_rtx); > + aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx); > return; > > + case MINUS: > + /* Negate the value and treat it as a PLUS. */ > + { > + rtx neg_src; > + > + /* Resize the value if necessary. */ > + if (short_mode) > + src = gen_lowpart (wmode, src); > + > + neg_src = gen_rtx_NEG (wmode, src); > + emit_insn (gen_rtx_SET (src, neg_src)); > + > + if (short_mode) > + src = gen_lowpart (mode, src); > + } > + /* Fall-through. */ > + case PLUS: > + ldop_code = AARCH64_LDOP_PLUS; > + break; > + > + case IOR: > + ldop_code = AARCH64_LDOP_OR; > + break; > + > + case XOR: > + ldop_code = AARCH64_LDOP_XOR; > + break; > + > + case AND: > + { > + rtx not_src; > + > + /* Resize the value if necessary. */ > + if (short_mode) > + src = gen_lowpart (wmode, src); > + > + not_src = gen_rtx_NOT (wmode, src); > + emit_insn (gen_rtx_SET (src, not_src)); > + > + if (short_mode) > + src = gen_lowpart (mode, src); > + } > + ldop_code = AARCH64_LDOP_BIC; > + break; > + > default: > /* The operation can't be done with atomic instructions. */ > gcc_unreachable (); > } > + > + aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, > model_rtx); > } > > /* Split an atomic operation. */ > diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md > index 11a9d13..e0d8856 100644 > --- a/gcc/config/aarch64/atomics.md > +++ b/gcc/config/aarch64/atomics.md > @@ -225,23 +225,63 @@ > } > ) > > -(define_insn_and_split "atomic_<atomic_optab><mode>" > +(define_expand "atomic_<atomic_optab><mode>" > + [(match_operand:ALLI 0 "aarch64_sync_memory_operand" "") > + (atomic_op:ALLI > + (match_operand:ALLI 1 "<atomic_op_operand>" "") > + (match_operand:SI 2 "const_int_operand"))] > + "" > + { > + rtx (*gen) (rtx, rtx, rtx); > + > + /* Use an atomic load-operate instruction when possible. */ > + if (aarch64_atomic_ldop_supported_p (<CODE>)) > + gen = gen_aarch64_atomic_<atomic_optab><mode>_lse; > + else > + gen = gen_aarch64_atomic_<atomic_optab><mode>; > + > + emit_insn (gen (operands[0], operands[1], operands[2])); > + > + DONE; > + } > +) > + > +(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>" > + [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q") > + (unspec_volatile:ALLI > + [(atomic_op:ALLI (match_dup 0) > + (match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>")) > + (match_operand:SI 2 "const_int_operand")] > + UNSPECV_ATOMIC_OP)) > + (clobber (reg:CC CC_REGNUM)) > + (clobber (match_scratch:ALLI 3 "=&r")) > + (clobber (match_scratch:SI 4 "=&r"))] > + "" > + "#" > + "&& reload_completed" > + [(const_int 0)] > + { > + aarch64_split_atomic_op (<CODE>, NULL, operands[3], operands[0], > + operands[1], operands[2], operands[4]); > + DONE; > + } > +) > + > +(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>_lse" > [(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q") > (unspec_volatile:ALLI > [(atomic_op:ALLI (match_dup 0) > (match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>")) > - (match_operand:SI 2 "const_int_operand")] ;; model > + (match_operand:SI 2 "const_int_operand")] > UNSPECV_ATOMIC_OP)) > - (clobber (reg:CC CC_REGNUM)) > - (clobber (match_scratch:ALLI 3 "=&r")) > - (clobber (match_scratch:SI 4 "=&r"))] > - "" > + (clobber (match_scratch:ALLI 3 "=&r"))] > + "TARGET_LSE" > "#" > "&& reload_completed" > [(const_int 0)] > { > - aarch64_split_atomic_op (<CODE>, NULL, operands[3], operands[0], > - operands[1], operands[2], operands[4]); > + aarch64_gen_atomic_ldop (<CODE>, operands[3], operands[0], > + operands[1], operands[2]); > DONE; > } > ) > @@ -268,7 +308,30 @@ > } > ) > > -(define_insn_and_split "atomic_fetch_<atomic_optab><mode>" > +;; Load-operate-store, returning the updated memory data. > + > +(define_expand "atomic_fetch_<atomic_optab><mode>" > + [(match_operand:ALLI 0 "register_operand" "") > + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "") > + (atomic_op:ALLI > + (match_operand:ALLI 2 "<atomic_op_operand>" "") > + (match_operand:SI 3 "const_int_operand"))] > + "" > +{ > + rtx (*gen) (rtx, rtx, rtx, rtx); > + > + /* Use an atomic load-operate instruction when possible. */ > + if (aarch64_atomic_ldop_supported_p (<CODE>)) > + gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>_lse; > + else > + gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>; > + > + emit_insn (gen (operands[0], operands[1], operands[2], operands[3])); > + > + DONE; > +}) > + > +(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>" > [(set (match_operand:ALLI 0 "register_operand" "=&r") > (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) > (set (match_dup 1) > @@ -291,6 +354,26 @@ > } > ) > > +(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>_lse" > + [(set (match_operand:ALLI 0 "register_operand" "=&r") > + (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) > + (set (match_dup 1) > + (unspec_volatile:ALLI > + [(atomic_op:ALLI (match_dup 1) > + (match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>")) > + (match_operand:SI 3 "const_int_operand")] > + UNSPECV_ATOMIC_LDOP))] > + "TARGET_LSE" > + "#" > + "&& reload_completed" > + [(const_int 0)] > + { > + aarch64_gen_atomic_ldop (<CODE>, operands[0], operands[1], > + operands[2], operands[3]); > + DONE; > + } > +) > + > (define_insn_and_split "atomic_fetch_nand<mode>" > [(set (match_operand:ALLI 0 "register_operand" "=&r") > (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) > diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c > b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c > new file mode 100644 > index 0000000..c21d2ed > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c > @@ -0,0 +1,58 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=armv8-a+lse" } */ > + > +/* Test ARMv8.1-A Load-ADD instruction. */ > + > +#include "atomic-inst-ops.inc" > + > +#define TEST TEST_ONE > + > +#define LOAD_ADD(FN, TY, MODEL) > \ > + TY FNNAME (FN, TY) (TY* val, TY* foo) > \ > + { \ > + return __atomic_fetch_add (val, foo, MODEL); \ > + } > + > +#define LOAD_ADD_NORETURN(FN, TY, MODEL) \ > + void FNNAME (FN, TY) (TY* val, TY* foo) \ > + { \ > + __atomic_fetch_add (val, foo, MODEL); \ > + } > + > +#define LOAD_SUB(FN, TY, MODEL) > \ > + TY FNNAME (FN, TY) (TY* val, TY* foo) > \ > + { \ > + return __atomic_fetch_sub (val, foo, MODEL); \ > + } > + > +#define LOAD_SUB_NORETURN(FN, TY, MODEL) \ > + void FNNAME (FN, TY) (TY* val, TY* foo) \ > + { \ > + __atomic_fetch_sub (val, foo, MODEL); \ > + } > + > + > +TEST (load_add, LOAD_ADD) > +TEST (load_add_notreturn, LOAD_ADD_NORETURN) > + > +TEST (load_sub, LOAD_SUB) > +TEST (load_sub_notreturn, LOAD_SUB_NORETURN) > + > +/* { dg-final { scan-assembler-times "ldaddb\t" 8} } */ > +/* { dg-final { scan-assembler-times "ldaddab\t" 16} } */ > +/* { dg-final { scan-assembler-times "ldaddlb\t" 8} } */ > +/* { dg-final { scan-assembler-times "ldaddalb\t" 16} } */ > + > +/* { dg-final { scan-assembler-times "ldaddh\t" 8} } */ > +/* { dg-final { scan-assembler-times "ldaddah\t" 16} } */ > +/* { dg-final { scan-assembler-times "ldaddlh\t" 8} } */ > +/* { dg-final { scan-assembler-times "ldaddalh\t" 16} } */ > + > +/* { dg-final { scan-assembler-times "ldadd\t" 16} } */ > +/* { dg-final { scan-assembler-times "ldadda\t" 32} } */ > +/* { dg-final { scan-assembler-times "ldaddl\t" 16} } */ > +/* { dg-final { scan-assembler-times "ldaddal\t" 32} } */ > + > +/* { dg-final { scan-assembler-not "ldaxr\t" } } */ > +/* { dg-final { scan-assembler-not "stlxr\t" } } */ > +/* { dg-final { scan-assembler-not "dmb" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c > b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c > new file mode 100644 > index 0000000..fd0f484 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c > @@ -0,0 +1,109 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -march=armv8-a+lse" } */ > + > +/* Test ARMv8.1-A LD<logic-op> instruction. */ > + > +#include "atomic-inst-ops.inc" > + > +#define TEST TEST_ONE > + > +#define LOAD_OR(FN, TY, MODEL) > \ > + TY FNNAME (FN, TY) (TY* val, TY* foo) > \ > + { \ > + return __atomic_fetch_or (val, foo, MODEL); > \ > + } > + > +#define LOAD_OR_NORETURN(FN, TY, MODEL) > \ > + void FNNAME (FN, TY) (TY* val, TY* foo) \ > + { \ > + __atomic_fetch_or (val, foo, MODEL); \ > + } > + > +#define LOAD_AND(FN, TY, MODEL) > \ > + TY FNNAME (FN, TY) (TY* val, TY* foo) > \ > + { \ > + return __atomic_fetch_and (val, foo, MODEL); \ > + } > + > +#define LOAD_AND_NORETURN(FN, TY, MODEL) \ > + void FNNAME (FN, TY) (TY* val, TY* foo) \ > + { \ > + __atomic_fetch_and (val, foo, MODEL); \ > + } > + > +#define LOAD_XOR(FN, TY, MODEL) > \ > + TY FNNAME (FN, TY) (TY* val, TY* foo) > \ > + { \ > + return __atomic_fetch_xor (val, foo, MODEL); \ > + } > + > +#define LOAD_XOR_NORETURN(FN, TY, MODEL) \ > + void FNNAME (FN, TY) (TY* val, TY* foo) \ > + { \ > + __atomic_fetch_xor (val, foo, MODEL); \ > + } > + > + > +TEST (load_or, LOAD_OR) > +TEST (load_or_notreturn, LOAD_OR_NORETURN) > + > +TEST (load_and, LOAD_AND) > +TEST (load_and_notreturn, LOAD_AND_NORETURN) > + > +TEST (load_xor, LOAD_XOR) > +TEST (load_xor_notreturn, LOAD_XOR_NORETURN) > + > +/* Load-OR. */ > + > +/* { dg-final { scan-assembler-times "ldsetb\t" 4} } */ > +/* { dg-final { scan-assembler-times "ldsetab\t" 8} } */ > +/* { dg-final { scan-assembler-times "ldsetlb\t" 4} } */ > +/* { dg-final { scan-assembler-times "ldsetalb\t" 8} } */ > + > +/* { dg-final { scan-assembler-times "ldseth\t" 4} } */ > +/* { dg-final { scan-assembler-times "ldsetah\t" 8} } */ > +/* { dg-final { scan-assembler-times "ldsetlh\t" 4} } */ > +/* { dg-final { scan-assembler-times "ldsetalh\t" 8} } */ > + > +/* { dg-final { scan-assembler-times "ldset\t" 8} } */ > +/* { dg-final { scan-assembler-times "ldseta\t" 16} } */ > +/* { dg-final { scan-assembler-times "ldsetl\t" 8} } */ > +/* { dg-final { scan-assembler-times "ldsetal\t" 16} } */ > + > +/* Load-AND. */ > + > +/* { dg-final { scan-assembler-times "ldclrb\t" 4} } */ > +/* { dg-final { scan-assembler-times "ldclrab\t" 8} } */ > +/* { dg-final { scan-assembler-times "ldclrlb\t" 4} } */ > +/* { dg-final { scan-assembler-times "ldclralb\t" 8} } */ > + > +/* { dg-final { scan-assembler-times "ldclrh\t" 4} } */ > +/* { dg-final { scan-assembler-times "ldclrah\t" 8} } */ > +/* { dg-final { scan-assembler-times "ldclrlh\t" 4} } */ > +/* { dg-final { scan-assembler-times "ldclralh\t" 8} } */ > + > +/* { dg-final { scan-assembler-times "ldclr\t" 8} */ > +/* { dg-final { scan-assembler-times "ldclra\t" 16} } */ > +/* { dg-final { scan-assembler-times "ldclrl\t" 8} } */ > +/* { dg-final { scan-assembler-times "ldclral\t" 16} } */ > + > +/* Load-XOR. */ > + > +/* { dg-final { scan-assembler-times "ldeorb\t" 4} } */ > +/* { dg-final { scan-assembler-times "ldeorab\t" 8} } */ > +/* { dg-final { scan-assembler-times "ldeorlb\t" 4} } */ > +/* { dg-final { scan-assembler-times "ldeoralb\t" 8} } */ > + > +/* { dg-final { scan-assembler-times "ldeorh\t" 4} } */ > +/* { dg-final { scan-assembler-times "ldeorah\t" 8} } */ > +/* { dg-final { scan-assembler-times "ldeorlh\t" 4} } */ > +/* { dg-final { scan-assembler-times "ldeoralh\t" 8} } */ > + > +/* { dg-final { scan-assembler-times "ldeor\t" 8} */ > +/* { dg-final { scan-assembler-times "ldeora\t" 16} } */ > +/* { dg-final { scan-assembler-times "ldeorl\t" 8} } */ > +/* { dg-final { scan-assembler-times "ldeoral\t" 16} } */ > + > +/* { dg-final { scan-assembler-not "ldaxr\t" } } */ > +/* { dg-final { scan-assembler-not "stlxr\t" } } */ > +/* { dg-final { scan-assembler-not "dmb" } } */ > -- > 2.1.4 >