Hi Richard I have updated the patch according to your comments. Thanks for pointing it out and sorry for the delay!
Sudi 2017-08-07 Sudakshina Das <sudi....@arm.com> * config/aarch64/aarch64-protos.h (enum simd_immediate_check): New check type for aarch64_simd_valid_immediate. (aarch64_output_simd_general_immediate): New declaration. (aarch64_simd_valid_immediate): Update prototype. * config/aarch64/aarch64-simd.md (orr<mode>3): modified pattern to add support for ORR-immediate. (and<mode>3): modified pattern to add support for BIC-immediate. * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now checks for valid immediate for BIC and ORR based on new enum argument. (aarch64_output_simd_general_immediate): New function to output new BIC/ORR. * config/aarch64/constraints.md (Do): New vector immediate constraint. (Db): Likewise. 2017-08-07 Sudakshina Das <sudi....@arm.com> * gcc.target/aarch64/bic_imm_1.c: New test. * gcc.target/aarch64/orr_imm_1.c: Likewise. From: Richard Earnshaw (lists) <richard.earns...@arm.com> Sent: Friday, May 5, 2017 2:30 PM To: Sudi Das; gcc-patches@gcc.gnu.org Cc: nd; Marcus Shawcroft; James Greenhalgh Subject: Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern On 18/04/17 17:39, Sudi Das wrote: > > Hello all > > This patch adds the support for BIC (vector, immediate) and ORR (vector, > immediate) SIMD patterns to the AArch64 backend. > One of the examples of this is : (with -O2 -ftree-vectorize) > > void > bic_s (short *a) > { > for (int i = 0; i < 1024; i++) > a[i] &= ~(0xff); > } > > which now produces : > bic_s: > add x1, x0, 2048 > .p2align 2 > .L2: > ldr q0, [x0] > bic v0.8h, #255 > str q0, [x0], 16 > cmp x1, x0 > bne .L2 > ret > > instead of > bic_s: > movi v1.8h, 0xff, lsl 8 > add x1, x0, 2048 > .p2align 2 > .L2: > ldr q0, [x0] > and v0.16b, v0.16b, v1.16b > str q0, [x0], 16 > cmp x1, x0 > bne .L2 > ret > > Added new tests and checked for regressions on bootstrapped > aarch64-none-linux-gnu > Ok for stage 1? > > Thanks > Sudi > > 2017-04-04 Sudakshina Das <sudi....@arm.com> > > * config/aarch64/aarch64-protos.h (enum simd_immediate_check): New >check type > for aarch64_simd_valid_immediate. > (aarch64_output_simd_general_immediate): New declaration. > (aarch64_simd_valid_immediate): Update prototype. > > * config/aarch64/aarch64-simd.md (*bic_imm_<mode>3): New pattern. > (*ior_imm_<mode>3): Likewise. > > * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function >now checks > for valid immediate for BIC and ORR based on new enum argument. > (aarch64_output_simd_general_immediate): New function to output new >BIC/ORR. > > * config/aarch64/predicates.md (aarch64_simd_valid_bic_imm_p) : New. > (aarch64_simd_valid_orr_imm_p) : Likewise. > > 2017-04-04 Sudakshina Das <sudi....@arm.com> > > * gcc.target/aarch64/bic_imm_1.c: New test. > * gcc.target/aarch64/orr_imm_1.c: Likewise. > > > patch-7260-2.diff > > > diff --git a/gcc/config/aarch64/aarch64-protos.h > b/gcc/config/aarch64/aarch64-protos.h > index 9543f8c..89cc455 100644 > --- a/gcc/config/aarch64/aarch64-protos.h > +++ b/gcc/config/aarch64/aarch64-protos.h > @@ -297,6 +297,15 @@ enum aarch64_parse_opt_result > AARCH64_PARSE_INVALID_ARG /* Invalid arch, tune, cpu arg. */ > }; > > +/* Enum to distinguish which type of check is to be done in > + aarch64_simd_valid_immediate. This is used as a bitmask where CHECK_ALL > + has both bits set. Adding new types would require changes accordingly. > */ > +enum simd_immediate_check { > + CHECK_I = 1, /* Perform only non-inverted immediate checks (ORR). */ > + CHECK_NI = 2, /* Perform only inverted immediate checks (BIC). */ > + CHECK_ALL = 3 /* Perform all checks (MOVI/MNVI). */ > +}; > + > extern struct tune_params aarch64_tune_params; > > HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); > @@ -334,6 +343,8 @@ rtx aarch64_reverse_mask (enum machine_mode); > bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT); > char *aarch64_output_scalar_simd_mov_immediate (rtx, machine_mode); > char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned); > +char *aarch64_output_simd_general_immediate (rtx, machine_mode, unsigned, > + const char*); > bool aarch64_pad_arg_upward (machine_mode, const_tree); > bool aarch64_pad_reg_upward (machine_mode, const_tree, bool); > bool aarch64_regno_ok_for_base_p (int, bool); > @@ -345,7 +356,8 @@ bool aarch64_simd_imm_zero_p (rtx, machine_mode); > bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode); > bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool); > bool aarch64_simd_valid_immediate (rtx, machine_mode, bool, > - struct simd_immediate_info *); > + struct simd_immediate_info *, > + enum simd_immediate_check w = CHECK_ALL); > bool aarch64_split_dimode_const_store (rtx, rtx); > bool aarch64_symbolic_address_p (rtx); > bool aarch64_uimm12_shift (HOST_WIDE_INT); > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index c462164..92275dc 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -280,6 +280,26 @@ > [(set_attr "type" "neon_logic<q>")] > ) > > +(define_insn "*bic_imm_<mode>3" > + [(set (match_operand:VDQ_I 0 "register_operand" "=w") > + (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "0") > + (match_operand:VDQ_I 2 "aarch64_simd_valid_bic_imm_p" "")))] > + "TARGET_SIMD" > + { return aarch64_output_simd_general_immediate (operands[2], > + <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "bic"); } > + [(set_attr "type" "neon_logic<q>")] > +) > + > +(define_insn "*ior_imm_<mode>3" > + [(set (match_operand:VDQ_I 0 "register_operand" "=w") > + (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "0") > + (match_operand:VDQ_I 2 "aarch64_simd_valid_orr_imm_p" "")))] > + "TARGET_SIMD" > + { return aarch64_output_simd_general_immediate (operands[2], > + <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "orr"); } > + [(set_attr "type" "neon_logic<q>")] > +) Both of these generate the same RTL constructs as the simd ior<mode>3 and and<mode>3 patterns, so should be merged as subcases of those patterns (and handled with suitable constraint alternatives). R. > + > (define_insn "add<mode>3" > [(set (match_operand:VDQ_I 0 "register_operand" "=w") > (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index 4f769a4..450c42d 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -11066,7 +11066,8 @@ aarch64_vect_float_const_representable_p (rtx x) > /* Return true for valid and false for invalid. */ > bool > aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse, > - struct simd_immediate_info *info) > + struct simd_immediate_info *info, > + enum simd_immediate_check which) > { > #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \ > matches = 1; \ > @@ -11130,54 +11131,65 @@ aarch64_simd_valid_immediate (rtx op, machine_mode > mode, bool inverse, > > do > { > - CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0 > - && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0); > + if (which & CHECK_I) > + { > + CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0 > + && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0); > > - CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1] > - && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0); > + CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1] > + && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0); > > - CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0 > - && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0); > + CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0 > + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0); > > - CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0 > - && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0); > + CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0 > + && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0); > > - CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0); > + CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0); > > - CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0); > + CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0); > + } > > - CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff > - && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1); > + if (which & CHECK_NI) > + { > + CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff > + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1); > > - CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1] > - && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1); > + CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1] > + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1); > > - CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff > - && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1); > + CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff > + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1); > > - CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff > - && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1); > + CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff > + && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1); > > - CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1); > + CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1); > > - CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1); > + CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1); > + } > > - CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1] > - && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0); > + /* Shifting ones / 8-bit / 64-bit variants only checked > + for 'ALL' (MOVI/MVNI). */ > + if (which == CHECK_ALL) > + { > + CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1] > + && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0); > > - CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1] > - && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1); > + CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1] > + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1); > > - CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff > - && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0); > + CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff > + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0); > > - CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0 > - && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1); > + CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0 > + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1); > > - CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0); > + CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0); > > - CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff) > - && bytes[i] == bytes[(i + 8) % idx], 0, 0); > + CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff) > + && bytes[i] == bytes[(i + 8) % idx], 0, 0); > + } > } > while (0); > > @@ -12598,6 +12610,47 @@ aarch64_output_simd_mov_immediate (rtx const_vector, > return templ; > } > > +/* This function is similar to aarch64_output_simd_mov_immediate, used for > + immediate versions of 'bic' or 'orr'. */ > +char* > +aarch64_output_simd_general_immediate (rtx const_vector, > + machine_mode mode, > + unsigned width, > + const char *mnemonic) > +{ > + bool is_valid; > + static char templ[40]; > + unsigned int lane_count = 0; > + char element_char; > + > + struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false }; > + > + if (strcmp (mnemonic, "orr") == 0) > + is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, > + &info, CHECK_I); > + else > + is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, > + &info, CHECK_NI); > + > + gcc_assert (is_valid); > + gcc_assert (CONST_INT_P (info.value)); > + > + element_char = sizetochar (info.element_width); > + lane_count = width / info.element_width; > + > + if (lane_count == 1) > + sprintf (templ, "%s\t%%d0, #" HOST_WIDE_INT_PRINT_DEC, > + mnemonic, UINTVAL (info.value)); > + else if (info.shift) > + sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC > + ", %s #%d", mnemonic, lane_count, element_char, > + UINTVAL (info.value), "lsl", info.shift); > + else > + sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC, > + mnemonic, lane_count, element_char, UINTVAL (info.value)); > + return templ; > +} > + > char* > aarch64_output_scalar_simd_mov_immediate (rtx immediate, > machine_mode mode) > diff --git a/gcc/config/aarch64/predicates.md > b/gcc/config/aarch64/predicates.md > index e83d45b..fe65f2b 100644 > --- a/gcc/config/aarch64/predicates.md > +++ b/gcc/config/aarch64/predicates.md > @@ -307,6 +307,18 @@ > return aarch64_simd_shift_imm_p (op, mode, false); > }) > > +(define_special_predicate "aarch64_simd_valid_bic_imm_p" > + (match_code "const_vector") > +{ > + return aarch64_simd_valid_immediate (op, mode, false, NULL, CHECK_NI); > +}) > + > +(define_special_predicate "aarch64_simd_valid_orr_imm_p" > + (match_code "const_vector") > +{ > + return aarch64_simd_valid_immediate (op, mode, false, NULL, CHECK_I); > +}) > + > (define_predicate "aarch64_simd_reg_or_zero" > (and (match_code "reg,subreg,const_int,const_double,const_vector") > (ior (match_operand 0 "register_operand") > diff --git a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c > b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c > new file mode 100644 > index 0000000..d94dd90 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c > @@ -0,0 +1,26 @@ > +/* { dg-options "-O2 -ftree-vectorize" } */ > + > +void > +bic_s (short *a) > +{ > + for (int i = 0; i < 1024; i++) > + a[i] &= ~(0xff); > +} > + > +void > +bic_ss (short *a) > +{ > + for (int i = 0; i < 1024; i++) > + a[i] &= ~(0xff00); > +} > + > +void > +bic_int (int *a) > +{ > + for (int i = 0; i < 1024; i++) > + a[i] &= ~(0xff); > +} > + > +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255" } } */ > +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255, lsl #8" } } */ > +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #255" } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c > b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c > new file mode 100644 > index 0000000..919a6ef > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c > @@ -0,0 +1,18 @@ > +/* { dg-options "-O2 -ftree-vectorize" } */ > + > +void > +orr_s (short *a) > +{ > + for (int i = 0; i < 1024; i++) > + a[i] |= 0xab; > +} > + > +void > +orr_int (int *a) > +{ > + for (int i = 0; i < 1024; i++) > + a[i] |= 0xab; > +} > + > +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #171" } } */ > +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #171" } } */ >
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index beff28e..5ae73c7 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -308,6 +308,15 @@ enum aarch64_parse_opt_result AARCH64_PARSE_INVALID_ARG /* Invalid arch, tune, cpu arg. */ }; +/* Enum to distinguish which type of check is to be done in + aarch64_simd_valid_immediate. This is used as a bitmask where CHECK_ALL + has both bits set. Adding new types would require changes accordingly. */ +enum simd_immediate_check { + CHECK_I = 1, /* Perform only non-inverted immediate checks (ORR). */ + CHECK_NI = 2, /* Perform only inverted immediate checks (BIC). */ + CHECK_ALL = 3 /* Perform all checks (MOVI/MNVI). */ +}; + extern struct tune_params aarch64_tune_params; HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); @@ -349,6 +358,8 @@ rtx aarch64_reverse_mask (machine_mode); bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT); char *aarch64_output_scalar_simd_mov_immediate (rtx, machine_mode); char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned); +char *aarch64_output_simd_general_immediate (rtx, machine_mode, unsigned, + const char*); bool aarch64_pad_arg_upward (machine_mode, const_tree); bool aarch64_pad_reg_upward (machine_mode, const_tree, bool); bool aarch64_regno_ok_for_base_p (int, bool); @@ -360,7 +371,8 @@ bool aarch64_simd_imm_zero_p (rtx, machine_mode); bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode); bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool); bool aarch64_simd_valid_immediate (rtx, machine_mode, bool, - struct simd_immediate_info *); + struct simd_immediate_info *, + enum simd_immediate_check w = CHECK_ALL); bool aarch64_split_dimode_const_store (rtx, rtx); bool aarch64_symbolic_address_p (rtx); bool aarch64_uimm12_shift (HOST_WIDE_INT); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 011fcec0..9e16cbd 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -516,21 +516,45 @@ [(set_attr "type" "neon_fp_abd_<stype><q>")] ) +;; For AND (vector, register) and BIC (vector, immediate) (define_insn "and<mode>3" - [(set (match_operand:VDQ_I 0 "register_operand" "=w") - (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") - (match_operand:VDQ_I 2 "register_operand" "w")))] + [(set (match_operand:VDQ_I 0 "register_operand" "=w,w") + (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") + (match_operand:VDQ_I 2 "nonmemory_operand" "w,Db")))] "TARGET_SIMD" - "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" + { + switch (which_alternative) + { + case 0: + return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; + case 1: + return aarch64_output_simd_general_immediate (operands[2], + <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "bic"); + default: + gcc_unreachable (); + } + } [(set_attr "type" "neon_logic<q>")] ) +;; For ORR (vector, register) and ORR (vector, immediate) (define_insn "ior<mode>3" - [(set (match_operand:VDQ_I 0 "register_operand" "=w") - (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") - (match_operand:VDQ_I 2 "register_operand" "w")))] + [(set (match_operand:VDQ_I 0 "register_operand" "=w,w") + (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") + (match_operand:VDQ_I 2 "nonmemory_operand" "w,Do")))] "TARGET_SIMD" - "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" + { + switch (which_alternative) + { + case 0: + return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; + case 1: + return aarch64_output_simd_general_immediate (operands[2], + <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "orr"); + default: + gcc_unreachable (); + } + } [(set_attr "type" "neon_logic<q>")] ) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 055ebaf..2cce5af 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -11440,7 +11440,8 @@ aarch64_vect_float_const_representable_p (rtx x) /* Return true for valid and false for invalid. */ bool aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse, - struct simd_immediate_info *info) + struct simd_immediate_info *info, + enum simd_immediate_check which) { #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \ matches = 1; \ @@ -11504,54 +11505,65 @@ aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse, do { - CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0 - && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0); + if (which & CHECK_I) + { + CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0 + && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0); - CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1] - && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0); + CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0); - CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0 - && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0); + CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0 + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0); - CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0 - && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0); + CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0 + && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0); - CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0); + CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0); - CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0); + CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0); + } - CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff - && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1); + if (which & CHECK_NI) + { + CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1); - CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1] - && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1); + CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1); - CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff - && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1); + CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1); - CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff - && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1); + CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff + && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1); - CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1); + CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1); - CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1); + CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1); + } - CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1] - && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0); + /* Shifting ones / 8-bit / 64-bit variants only checked + for 'ALL' (MOVI/MVNI). */ + if (which == CHECK_ALL) + { + CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0); - CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1] - && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1); + CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1] + && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1); - CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff - && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0); + CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0); - CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0 - && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1); + CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0 + && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1); - CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0); + CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0); - CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff) - && bytes[i] == bytes[(i + 8) % idx], 0, 0); + CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff) + && bytes[i] == bytes[(i + 8) % idx], 0, 0); + } } while (0); @@ -13029,6 +13041,47 @@ aarch64_output_simd_mov_immediate (rtx const_vector, return templ; } +/* This function is similar to aarch64_output_simd_mov_immediate, used for + immediate versions of 'bic' or 'orr'. */ +char* +aarch64_output_simd_general_immediate (rtx const_vector, + machine_mode mode, + unsigned width, + const char *mnemonic) +{ + bool is_valid; + static char templ[40]; + unsigned int lane_count = 0; + char element_char; + + struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false }; + + if (strcmp (mnemonic, "orr") == 0) + is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, + &info, CHECK_I); + else + is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, + &info, CHECK_NI); + + gcc_assert (is_valid); + gcc_assert (CONST_INT_P (info.value)); + + element_char = sizetochar (info.element_width); + lane_count = width / info.element_width; + + if (lane_count == 1) + sprintf (templ, "%s\t%%d0, #" HOST_WIDE_INT_PRINT_DEC, + mnemonic, UINTVAL (info.value)); + else if (info.shift) + sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC + ", %s #%d", mnemonic, lane_count, element_char, + UINTVAL (info.value), "lsl", info.shift); + else + sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC, + mnemonic, lane_count, element_char, UINTVAL (info.value)); + return templ; +} + char* aarch64_output_scalar_simd_mov_immediate (rtx immediate, machine_mode mode) { diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index 9ce3d4e..e277170 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -189,6 +189,20 @@ (match_test "aarch64_simd_valid_immediate (op, GET_MODE (op), false, NULL)"))) +(define_constraint "Do" + "@internal + A constraint that matches vector of immediates for orr." + (and (match_code "const_vector") + (match_test "aarch64_simd_valid_immediate (op, mode, false, + NULL, CHECK_I)"))) + +(define_constraint "Db" + "@internal + A constraint that matches vector of immediates for bic." + (and (match_code "const_vector") + (match_test "aarch64_simd_valid_immediate (op, mode, false, + NULL, CHECK_NI)"))) + (define_constraint "Dh" "@internal A constraint that matches an immediate operand valid for\ diff --git a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c new file mode 100644 index 0000000..d94dd90 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c @@ -0,0 +1,26 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +void +bic_s (short *a) +{ + for (int i = 0; i < 1024; i++) + a[i] &= ~(0xff); +} + +void +bic_ss (short *a) +{ + for (int i = 0; i < 1024; i++) + a[i] &= ~(0xff00); +} + +void +bic_int (int *a) +{ + for (int i = 0; i < 1024; i++) + a[i] &= ~(0xff); +} + +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255" } } */ +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255, lsl #8" } } */ +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #255" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c new file mode 100644 index 0000000..919a6ef --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c @@ -0,0 +1,18 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +void +orr_s (short *a) +{ + for (int i = 0; i < 1024; i++) + a[i] |= 0xab; +} + +void +orr_int (int *a) +{ + for (int i = 0; i < 1024; i++) + a[i] |= 0xab; +} + +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #171" } } */ +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #171" } } */