Jennifer Schmitz <jschm...@nvidia.com> writes: > Thanks for the comments. I updated the patch accordingly and bootstrapped and > tested again. > Best, Jennifer > > From 9ef423f23afaeaa650d511c51bbc1a167e40b349 Mon Sep 17 00:00:00 2001 > From: Jennifer Schmitz <jschm...@nvidia.com> > Date: Wed, 7 Aug 2024 08:56:45 -0700 > Subject: [PATCH] PR tree-optimization/101390: Vectorize modulo operator > > This patch adds a new vectorization pattern that detects the modulo > operation where the second operand is a variable. > It replaces the statement by division, multiplication, and subtraction. > > The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression. > Ok for mainline? > > Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com> > > gcc/ > > PR tree-optimization/101390 > * tree-vect-pattern.cc (vect_recog_mod_var_pattern): Add new pattern. > > gcc/testsuite/ > PR tree-optimization/101390 > * gcc.dg/vect/vect-mod-var.c: New test. > * gcc.target/aarch64/sve/mod_1.c: Likewise. > * lib/target-supports.exp: New selector expression.
LGTM, thanks. Please give others a couple of days to comment though. Richard > --- > gcc/testsuite/gcc.dg/vect/vect-mod-var.c | 37 +++++++++++ > gcc/testsuite/gcc.target/aarch64/sve/mod_1.c | 28 +++++++++ > gcc/testsuite/lib/target-supports.exp | 5 ++ > gcc/tree-vect-patterns.cc | 66 ++++++++++++++++++++ > 4 files changed, 136 insertions(+) > create mode 100644 gcc/testsuite/gcc.dg/vect/vect-mod-var.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mod_1.c > > diff --git a/gcc/testsuite/gcc.dg/vect/vect-mod-var.c > b/gcc/testsuite/gcc.dg/vect/vect-mod-var.c > new file mode 100644 > index 00000000000..eeed318c62b > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-mod-var.c > @@ -0,0 +1,37 @@ > +#include "tree-vect.h" > + > +#define N 64 > + > +__attribute__ ((noinline)) int > +f (int *restrict a, int *restrict b, int *restrict c) > +{ > + for (int i = 0; i < N; ++i) > + c[i] = a[i] % b[i]; > +} > + > +#define BASE1 -126 > +#define BASE2 116 > + > +int > +main (void) > +{ > + check_vect (); > + > + int a[N], b[N], c[N]; > + > + for (int i = 0; i < N; ++i) > + { > + a[i] = BASE1 + i * 5; > + b[i] = BASE2 - i * 4; > + __asm__ volatile (""); > + } > + > + f (a, b, c); > + > +#pragma GCC novector > + for (int i = 0; i < N; ++i) > + if (c[i] != a[i] % b[i]) > + __builtin_abort (); > +} > + > +/* { dg-final { scan-tree-dump "vect_recog_mod_var_pattern: detected" "vect" > { target vect_int_div } } } */ > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c > b/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c > new file mode 100644 > index 00000000000..eb37f1e3636 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c > @@ -0,0 +1,28 @@ > +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ > +/* { dg-options "-Ofast -ftree-vectorize -fno-vect-cost-model --save-temps" > } */ > + > +#include <stdint.h> > + > +#define DEF_LOOP(TYPE) \ > +void __attribute__ ((noipa)) \ > +mod_##TYPE (TYPE *restrict dst, TYPE *restrict src1, \ > + TYPE *restrict src2, int count) \ > +{ \ > + for (int i = 0; i < count; ++i) \ > + dst[i] = src1[i] % src2[i]; \ > +} > + > +#define TEST_ALL(T) \ > + T (int32_t) \ > + T (uint32_t) \ > + T (int64_t) \ > + T (uint64_t) > + > +TEST_ALL (DEF_LOOP) > + > +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.s, p[0-7]/m, > z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ > +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m, > z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m, > z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ > +/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.d, p[0-7]/m, > z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ > diff --git a/gcc/testsuite/lib/target-supports.exp > b/gcc/testsuite/lib/target-supports.exp > index 26820b146d4..b8072eaf781 100644 > --- a/gcc/testsuite/lib/target-supports.exp > +++ b/gcc/testsuite/lib/target-supports.exp > @@ -4258,6 +4258,11 @@ proc check_effective_target_vect_int { } { > }}] > } > > +# Return 1 if the target supports vector integer division, 0 otherwise. > +proc check_effective_target_vect_int_div { } { > + return [check_effective_target_aarch64_sve] > +} > + > # Return 1 if the target supports vectorization of early breaks, > # 0 otherwise. > # > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index f52de2b6972..18b322c63b8 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -5264,6 +5264,71 @@ vect_recog_divmod_pattern (vec_info *vinfo, > return pattern_stmt; > } > > +/* Detects pattern with a modulo operation (S1) where both arguments > + are variables of integral type. > + The statement is replaced by division, multiplication, and subtraction. > + The last statement (S4) is returned. > + > + Example: > + S1 c_t = a_t % b_t; > + > + is replaced by > + S2 x_t = a_t / b_t; > + S3 y_t = x_t * b_t; > + S4 z_t = a_t - y_t; */ > + > +static gimple * > +vect_recog_mod_var_pattern (vec_info *vinfo, > + stmt_vec_info stmt_vinfo, tree *type_out) > +{ > + gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo); > + tree oprnd0, oprnd1, vectype, itype; > + gimple *pattern_stmt, *def_stmt; > + enum tree_code rhs_code; > + > + if (!is_gimple_assign (last_stmt)) > + return NULL; > + > + rhs_code = gimple_assign_rhs_code (last_stmt); > + if (rhs_code != TRUNC_MOD_EXPR) > + return NULL; > + > + oprnd0 = gimple_assign_rhs1 (last_stmt); > + oprnd1 = gimple_assign_rhs2 (last_stmt); > + itype = TREE_TYPE (oprnd0); > + if (TREE_CODE (oprnd0) != SSA_NAME > + || TREE_CODE (oprnd1) != SSA_NAME > + || TREE_CODE (itype) != INTEGER_TYPE) > + return NULL; > + > + vectype = get_vectype_for_scalar_type (vinfo, itype); > + > + if (!vectype > + || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype) > + || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype) > + || !target_has_vecop_for_code (MULT_EXPR, vectype) > + || !target_has_vecop_for_code (MINUS_EXPR, vectype)) > + return NULL; > + > + tree q, tmp, r; > + q = vect_recog_temp_ssa_var (itype, NULL); > + def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1); > + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype); > + > + tmp = vect_recog_temp_ssa_var (itype, NULL); > + def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1); > + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype); > + > + r = vect_recog_temp_ssa_var (itype, NULL); > + pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp); > + > + /* Pattern detected. */ > + *type_out = vectype; > + vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt); > + > + return pattern_stmt; > +} > + > /* Function vect_recog_mixed_size_cond_pattern > > Try to find the following pattern: > @@ -7343,6 +7408,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = { > { vect_recog_rotate_pattern, "rotate" }, > { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" }, > { vect_recog_divmod_pattern, "divmod" }, > + { vect_recog_mod_var_pattern, "modvar" }, > { vect_recog_mult_pattern, "mult" }, > { vect_recog_sat_add_pattern, "sat_add" }, > { vect_recog_sat_sub_pattern, "sat_sub" },