> > + > > +(define_constraint "D3" > > + "@internal > > + A constraint that matches vector of immediates that is with 0 to > > +(bits(mode)/2)-1." > > + (and (match_code "const,const_vector") > > + (match_test "aarch64_const_vec_all_same_in_range_p (op, 0, > > + (GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)"))) > > Having this mapping for D2 and D3, with D2 corresponded to prec/2, kind-of > makes D3 a false mnemonic. How about DL instead? (L for "left-shift long" or > "low-part", take your pick) > > Looks good otherwise. >
Wasn't sure if this was an ok with changes or not, so here's the final patch 😊 Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: PR target/106346 * config/aarch64/aarch64-simd.md (vec_widen_<sur>shiftl_lo_<mode>, vec_widen_<sur>shiftl_hi_<mode>): Remove. (aarch64_<sur>shll<mode>_internal): Renamed to... (aarch64_<su>shll<mode>): .. This. (aarch64_<sur>shll2<mode>_internal): Renamed to... (aarch64_<su>shll2<mode>): .. This. (aarch64_<sur>shll_n<mode>, aarch64_<sur>shll2_n<mode>): Re-use new optabs. * config/aarch64/constraints.md (D2, DL): New. * config/aarch64/predicates.md (aarch64_simd_shll_imm_vec): New. gcc/testsuite/ChangeLog: PR target/106346 * gcc.target/aarch64/pr98772.c: Adjust assembly. * gcc.target/aarch64/vect-widen-shift.c: New test. --- inline copy of patch --- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index d95394101470446e55f25a2397dd112239b6a54d..f67eb70577d0c2d9911d8c867d38a4d0b390337c 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -6387,105 +6387,67 @@ (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>" [(set_attr "type" "neon_sat_shift_reg<q>")] ) -(define_expand "vec_widen_<sur>shiftl_lo_<mode>" - [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") - (match_operand:SI 2 - "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] - VSHLL))] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); - emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1], - p, operands[2])); - DONE; - } -) - -(define_expand "vec_widen_<sur>shiftl_hi_<mode>" - [(set (match_operand:<VWIDE> 0 "register_operand") - (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") - (match_operand:SI 2 - "immediate_operand" "i")] - VSHLL))] - "TARGET_SIMD" - { - rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); - emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1], - p, operands[2])); - DONE; - } -) - ;; vshll_n -(define_insn "aarch64_<sur>shll<mode>_internal" - [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (unspec:<VWIDE> [(vec_select:<VHALF> - (match_operand:VQW 1 "register_operand" "w") - (match_operand:VQW 2 "vect_par_cnst_lo_half" "")) - (match_operand:SI 3 - "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] - VSHLL))] +(define_insn "aarch64_<su>shll<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand") + (ashift:<VWIDE> (ANY_EXTEND:<VWIDE> + (match_operand:VD_BHSI 1 "register_operand")) + (match_operand:<VWIDE> 2 + "aarch64_simd_shll_imm_vec")))] "TARGET_SIMD" - { - if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) - return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3"; - else - return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3"; + {@ [cons: =0, 1, 2] + [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2 + [w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2 } [(set_attr "type" "neon_shift_imm_long")] ) -(define_insn "aarch64_<sur>shll2<mode>_internal" - [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (unspec:<VWIDE> [(vec_select:<VHALF> - (match_operand:VQW 1 "register_operand" "w") - (match_operand:VQW 2 "vect_par_cnst_hi_half" "")) - (match_operand:SI 3 - "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] +(define_expand "aarch64_<sur>shll_n<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand") + (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand") + (match_operand:SI 2 + "aarch64_simd_shift_imm_bitsize_<ve_mode>")] VSHLL))] "TARGET_SIMD" { - if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) - return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3"; - else - return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3"; + rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]); + emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft)); + DONE; } - [(set_attr "type" "neon_shift_imm_long")] ) -(define_insn "aarch64_<sur>shll_n<mode>" - [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w") - (match_operand:SI 2 - "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] - VSHLL))] +;; vshll_high_n + +(define_insn "aarch64_<su>shll2<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand") + (ashift:<VWIDE> (ANY_EXTEND:<VWIDE> + (vec_select:<VHALF> + (match_operand:VQW 1 "register_operand") + (match_operand:VQW 2 "vect_par_cnst_hi_half"))) + (match_operand:<VWIDE> 3 + "aarch64_simd_shll_imm_vec")))] "TARGET_SIMD" - { - if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) - return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; - else - return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; + {@ [cons: =0, 1, 2, 3] + [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3 + [w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3 } [(set_attr "type" "neon_shift_imm_long")] ) -;; vshll_high_n - -(define_insn "aarch64_<sur>shll2_n<mode>" - [(set (match_operand:<VWIDE> 0 "register_operand" "=w") - (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - VSHLL))] +(define_expand "aarch64_<sur>shll2_n<mode>" + [(set (match_operand:<VWIDE> 0 "register_operand") + (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand") + (match_operand:SI 2 + "aarch64_simd_shift_imm_bitsize_<ve_mode>")] + VSHLL))] "TARGET_SIMD" { - if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) - return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; - else - return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; + rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]); + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); + emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft)); + DONE; } - [(set_attr "type" "neon_shift_imm_long")] ) ;; vrshr_n diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index 6df1dbec2a8097abe9783ed1670c77a8fad4ca57..371a00827d84d8ea4a06ba2b00a761d3b179ae90 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -468,6 +468,20 @@ (define_constraint "D1" GET_MODE_UNIT_BITSIZE (mode) - 1, GET_MODE_UNIT_BITSIZE (mode) - 1)"))) +(define_constraint "D2" + "@internal + A constraint that matches vector of immediates that is bits(mode)/2." + (and (match_code "const,const_vector") + (match_test "aarch64_simd_shift_imm_vec_exact_top (op, mode)"))) + +(define_constraint "DL" + "@internal + A constraint that matches vector of immediates for left shift long. + That is immediates between 0 to (bits(mode)/2)-1." + (and (match_code "const,const_vector") + (match_test "aarch64_const_vec_all_same_in_range_p (op, 0, + (GET_MODE_UNIT_BITSIZE (mode) / 2) - 1)"))) + (define_constraint "Dr" "@internal A constraint that matches vector of immediates for right shifts." diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index d5a4a1cd9bf8cde8e779de6e0afa531f04892a7b..2d8d1fe25c1de35cb5a2386058cb2901ee46cd82 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -638,6 +638,11 @@ (define_predicate "aarch64_simd_raddsubhn_imm_vec" HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (mode) / 2 - 1))"))) +(define_predicate "aarch64_simd_shll_imm_vec" + (and (match_code "const_vector") + (match_test "aarch64_const_vec_all_same_in_range_p (op, 0, + GET_MODE_UNIT_BITSIZE (mode) / 2)"))) + (define_predicate "aarch64_simd_shift_imm_bitsize_qi" (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 0, 8)"))) diff --git a/gcc/testsuite/gcc.target/aarch64/pr98772.c b/gcc/testsuite/gcc.target/aarch64/pr98772.c index 8259251a7c0b64ae8362ea29ec3cf1d2a9d63547..52ad012dcfe72721b8c987bb826c0ffb8ba3f31e 100644 --- a/gcc/testsuite/gcc.target/aarch64/pr98772.c +++ b/gcc/testsuite/gcc.target/aarch64/pr98772.c @@ -155,4 +155,4 @@ int main () /* { dg-final { scan-assembler-times "uaddl\\tv" 2 } } */ /* { dg-final { scan-assembler-times "usubl\\tv" 2 } } */ /* { dg-final { scan-assembler-times "umull\\tv" 2 } } */ -/* { dg-final { scan-assembler-times "shl\\tv" 2 } } */ +/* { dg-final { scan-assembler-times "shll\\tv" 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c new file mode 100644 index 0000000000000000000000000000000000000000..6ee41f63ef8a145c0eb7f213950e7501e058b2fa --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-shift.c @@ -0,0 +1,50 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -save-temps" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ +#include <stdint.h> +#include <string.h> + +#pragma GCC target "+nosve" + +#define ARR_SIZE 1024 + +/* Should produce an shll,shll2 pair*/ +/* +** sshll_opt1: +** ... +** shll v[0-9]+.4s, v[0-9]+.4h, 16 +** shll2 v[0-9]+.4s, v[0-9]+.8h, 16 +** ... +*/ +void sshll_opt1 (int32_t *foo, int16_t *a, int16_t *b) +{ + for( int i = 0; i < ARR_SIZE - 3;i=i+4) + { + foo[i] = a[i] << 16; + foo[i+1] = a[i+1] << 16; + foo[i+2] = a[i+2] << 16; + foo[i+3] = a[i+3] << 16; + } +} + +/* +** sshll_opt2: +** ... +** sxtl v[0-9]+.4s, v[0-9]+.4h +** sxtl2 v[0-9]+.4s, v[0-9]+.8h +** sshl v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** sshl v[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s +** ... +*/ +void sshll_opt2 (int32_t *foo, int16_t *a, int16_t *b) +{ + for( int i = 0; i < ARR_SIZE - 3;i=i+4) + { + foo[i] = a[i] << 16; + foo[i+1] = a[i+1] << 15; + foo[i+2] = a[i+2] << 14; + foo[i+3] = a[i+3] << 17; + } +} + +
rb17620.patch
Description: rb17620.patch