The ASRD instruction on SVE performs an arithmetic shift right by an immediate for divide.
This patch enables the use of ASRD with Neon modes. For example: int in[N], out[N]; void foo (void) { for (int i = 0; i < N; i++) out[i] = in[i] / 4; } compiles to: ldr q31, [x1, x0] cmlt v30.16b, v31.16b, #0 and z30.b, z30.b, 3 add v30.16b, v30.16b, v31.16b sshr v30.16b, v30.16b, 2 str q30, [x0, x2] add x0, x0, 16 cmp x0, 1024 but can just be: ldp q30, q31, [x0], 32 asrd z31.b, p7/m, z31.b, #2 asrd z30.b, p7/m, z30.b, #2 stp q30, q31, [x1], 32 cmp x0, x2 The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. OK for mainline? Signed-off-by: Soumya AR <soum...@nvidia.com> gcc/ChangeLog: * config/aarch64/aarch64-sve.md: Extended sdiv_pow2<mode>3 and *sdiv_pow2<mode>3 to support Neon modes. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/sve-asrd.c: New test. --- gcc/config/aarch64/aarch64-sve.md | 25 ++++----- .../gcc.target/aarch64/sve/sve-asrd.c | 54 +++++++++++++++++++ 2 files changed, 67 insertions(+), 12 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index affdb24a93d..96effe4abed 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -4972,34 +4972,35 @@ ;; Unpredicated ASRD. (define_expand "sdiv_pow2<mode>3" - [(set (match_operand:SVE_I 0 "register_operand") - (unspec:SVE_I + [(set (match_operand:SVE_VDQ_I 0 "register_operand") + (unspec:SVE_VDQ_I [(match_dup 3) - (unspec:SVE_I - [(match_operand:SVE_I 1 "register_operand") + (unspec:SVE_VDQ_I + [(match_operand:SVE_VDQ_I 1 "register_operand") (match_operand 2 "aarch64_simd_rshift_imm")] UNSPEC_ASRD)] UNSPEC_PRED_X))] "TARGET_SVE" { - operands[3] = aarch64_ptrue_reg (<VPRED>mode); + operands[3] = aarch64_ptrue_reg (<VPRED>mode, + GET_MODE_UNIT_SIZE (<MODE>mode)); } ) ;; Predicated ASRD. (define_insn "*sdiv_pow2<mode>3" - [(set (match_operand:SVE_I 0 "register_operand") - (unspec:SVE_I + [(set (match_operand:SVE_VDQ_I 0 "register_operand") + (unspec:SVE_VDQ_I [(match_operand:<VPRED> 1 "register_operand") - (unspec:SVE_I - [(match_operand:SVE_I 2 "register_operand") - (match_operand:SVE_I 3 "aarch64_simd_rshift_imm")] + (unspec:SVE_VDQ_I + [(match_operand:SVE_VDQ_I 2 "register_operand") + (match_operand:SVE_VDQ_I 3 "aarch64_simd_rshift_imm")] UNSPEC_ASRD)] UNSPEC_PRED_X))] "TARGET_SVE" {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ] - [ w , Upl , 0 ; * ] asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 - [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 + [ w , Upl , 0 ; * ] asrd\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, #%3 + [ ?&w , Upl , w ; yes ] movprfx\t%Z0, %Z2\;asrd\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, #%3 } ) diff --git a/gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c b/gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c new file mode 100644 index 00000000000..00aa8b2380d --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/sve-asrd.c @@ -0,0 +1,54 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast --param aarch64-autovec-preference=asimd-only" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include <stdint.h> +#define N 1024 + +#define FUNC(M) \ +M in_##M[N]; \ +M out_##M[N]; \ +void asrd_##M() { \ + for (int i = 0; i < N; i++) \ + out_##M[i] = in_##M[i] / 4; \ +} + +/* +** asrd_int8_t: +** ... +** ptrue (p[0-7]).b, vl1 +** ... +** asrd z[0-9]+\.b, \1/m, z[0-9]+\.b, #2 +** ... +*/ +FUNC(int8_t) + +/* +** asrd_int16_t: +** ... +** ptrue (p[0-7]).b, vl2 +** ... +** asrd z[0-9]+\.h, \1/m, z[0-9]+\.h, #2 +** ... +*/ +FUNC(int16_t) + +/* +** asrd_int32_t: +** ... +** ptrue (p[0-7]).b, vl4 +** ... +** asrd z[0-9]+\.s, \1/m, z[0-9]+\.s, #2 +** ... +*/ +FUNC(int32_t) + +/* +** asrd_int64_t: +** ... +** ptrue (p[0-7]).b, vl8 +** ... +** asrd z[0-9]+\.d, \1/m, z[0-9]+\.d, #2 +** ... +*/ +FUNC(int64_t) -- 2.43.2