Jennifer Schmitz <[email protected]> writes:
> Thanks for the comments. I updated the patch accordingly and bootstrapped and
> tested again.
> Best, Jennifer
>
> From 9ef423f23afaeaa650d511c51bbc1a167e40b349 Mon Sep 17 00:00:00 2001
> From: Jennifer Schmitz <[email protected]>
> Date: Wed, 7 Aug 2024 08:56:45 -0700
> Subject: [PATCH] PR tree-optimization/101390: Vectorize modulo operator
>
> This patch adds a new vectorization pattern that detects the modulo
> operation where the second operand is a variable.
> It replaces the statement by division, multiplication, and subtraction.
>
> The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
> Ok for mainline?
>
> Signed-off-by: Jennifer Schmitz <[email protected]>
>
> gcc/
>
> PR tree-optimization/101390
> * tree-vect-pattern.cc (vect_recog_mod_var_pattern): Add new pattern.
>
> gcc/testsuite/
> PR tree-optimization/101390
> * gcc.dg/vect/vect-mod-var.c: New test.
> * gcc.target/aarch64/sve/mod_1.c: Likewise.
> * lib/target-supports.exp: New selector expression.
LGTM, thanks. Please give others a couple of days to comment though.
Richard
> ---
> gcc/testsuite/gcc.dg/vect/vect-mod-var.c | 37 +++++++++++
> gcc/testsuite/gcc.target/aarch64/sve/mod_1.c | 28 +++++++++
> gcc/testsuite/lib/target-supports.exp | 5 ++
> gcc/tree-vect-patterns.cc | 66 ++++++++++++++++++++
> 4 files changed, 136 insertions(+)
> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-mod-var.c
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mod_1.c
>
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-mod-var.c
> b/gcc/testsuite/gcc.dg/vect/vect-mod-var.c
> new file mode 100644
> index 00000000000..eeed318c62b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-mod-var.c
> @@ -0,0 +1,37 @@
> +#include "tree-vect.h"
> +
> +#define N 64
> +
> +__attribute__ ((noinline)) int
> +f (int *restrict a, int *restrict b, int *restrict c)
> +{
> + for (int i = 0; i < N; ++i)
> + c[i] = a[i] % b[i];
> +}
> +
> +#define BASE1 -126
> +#define BASE2 116
> +
> +int
> +main (void)
> +{
> + check_vect ();
> +
> + int a[N], b[N], c[N];
> +
> + for (int i = 0; i < N; ++i)
> + {
> + a[i] = BASE1 + i * 5;
> + b[i] = BASE2 - i * 4;
> + __asm__ volatile ("");
> + }
> +
> + f (a, b, c);
> +
> +#pragma GCC novector
> + for (int i = 0; i < N; ++i)
> + if (c[i] != a[i] % b[i])
> + __builtin_abort ();
> +}
> +
> +/* { dg-final { scan-tree-dump "vect_recog_mod_var_pattern: detected" "vect"
> { target vect_int_div } } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c
> b/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c
> new file mode 100644
> index 00000000000..eb37f1e3636
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c
> @@ -0,0 +1,28 @@
> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */
> +/* { dg-options "-Ofast -ftree-vectorize -fno-vect-cost-model --save-temps"
> } */
> +
> +#include <stdint.h>
> +
> +#define DEF_LOOP(TYPE) \
> +void __attribute__ ((noipa)) \
> +mod_##TYPE (TYPE *restrict dst, TYPE *restrict src1, \
> + TYPE *restrict src2, int count) \
> +{ \
> + for (int i = 0; i < count; ++i) \
> + dst[i] = src1[i] % src2[i]; \
> +}
> +
> +#define TEST_ALL(T) \
> + T (int32_t) \
> + T (uint32_t) \
> + T (int64_t) \
> + T (uint64_t)
> +
> +TEST_ALL (DEF_LOOP)
> +
> +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m,
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m,
> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.s, p[0-7]/m,
> z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
> +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m,
> z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m,
> z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
> +/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.d, p[0-7]/m,
> z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
> diff --git a/gcc/testsuite/lib/target-supports.exp
> b/gcc/testsuite/lib/target-supports.exp
> index 26820b146d4..b8072eaf781 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -4258,6 +4258,11 @@ proc check_effective_target_vect_int { } {
> }}]
> }
>
> +# Return 1 if the target supports vector integer division, 0 otherwise.
> +proc check_effective_target_vect_int_div { } {
> + return [check_effective_target_aarch64_sve]
> +}
> +
> # Return 1 if the target supports vectorization of early breaks,
> # 0 otherwise.
> #
> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
> index f52de2b6972..18b322c63b8 100644
> --- a/gcc/tree-vect-patterns.cc
> +++ b/gcc/tree-vect-patterns.cc
> @@ -5264,6 +5264,71 @@ vect_recog_divmod_pattern (vec_info *vinfo,
> return pattern_stmt;
> }
>
> +/* Detects pattern with a modulo operation (S1) where both arguments
> + are variables of integral type.
> + The statement is replaced by division, multiplication, and subtraction.
> + The last statement (S4) is returned.
> +
> + Example:
> + S1 c_t = a_t % b_t;
> +
> + is replaced by
> + S2 x_t = a_t / b_t;
> + S3 y_t = x_t * b_t;
> + S4 z_t = a_t - y_t; */
> +
> +static gimple *
> +vect_recog_mod_var_pattern (vec_info *vinfo,
> + stmt_vec_info stmt_vinfo, tree *type_out)
> +{
> + gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
> + tree oprnd0, oprnd1, vectype, itype;
> + gimple *pattern_stmt, *def_stmt;
> + enum tree_code rhs_code;
> +
> + if (!is_gimple_assign (last_stmt))
> + return NULL;
> +
> + rhs_code = gimple_assign_rhs_code (last_stmt);
> + if (rhs_code != TRUNC_MOD_EXPR)
> + return NULL;
> +
> + oprnd0 = gimple_assign_rhs1 (last_stmt);
> + oprnd1 = gimple_assign_rhs2 (last_stmt);
> + itype = TREE_TYPE (oprnd0);
> + if (TREE_CODE (oprnd0) != SSA_NAME
> + || TREE_CODE (oprnd1) != SSA_NAME
> + || TREE_CODE (itype) != INTEGER_TYPE)
> + return NULL;
> +
> + vectype = get_vectype_for_scalar_type (vinfo, itype);
> +
> + if (!vectype
> + || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype)
> + || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype)
> + || !target_has_vecop_for_code (MULT_EXPR, vectype)
> + || !target_has_vecop_for_code (MINUS_EXPR, vectype))
> + return NULL;
> +
> + tree q, tmp, r;
> + q = vect_recog_temp_ssa_var (itype, NULL);
> + def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1);
> + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
> +
> + tmp = vect_recog_temp_ssa_var (itype, NULL);
> + def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1);
> + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
> +
> + r = vect_recog_temp_ssa_var (itype, NULL);
> + pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp);
> +
> + /* Pattern detected. */
> + *type_out = vectype;
> + vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt);
> +
> + return pattern_stmt;
> +}
> +
> /* Function vect_recog_mixed_size_cond_pattern
>
> Try to find the following pattern:
> @@ -7343,6 +7408,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
> { vect_recog_rotate_pattern, "rotate" },
> { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
> { vect_recog_divmod_pattern, "divmod" },
> + { vect_recog_mod_var_pattern, "modvar" },
> { vect_recog_mult_pattern, "mult" },
> { vect_recog_sat_add_pattern, "sat_add" },
> { vect_recog_sat_sub_pattern, "sat_sub" },