> On 23 Aug 2024, at 06:21, Andrew Pinski <pins...@gmail.com> wrote: > > External email: Use caution opening links or attachments > > > On Thu, Aug 22, 2024 at 11:28 AM Andrew Pinski <pins...@gmail.com> wrote: >> >> On Thu, Aug 22, 2024 at 4:12 AM Richard Biener <rguent...@suse.de> wrote: >>> >>> On Thu, 22 Aug 2024, Jennifer Schmitz wrote: >>> >>>> On 19 Aug 2024, at 21:02, Richard Sandiford <richard.sandif...@arm.com> >>>> wrote: >>>>> >>>>> External email: Use caution opening links or attachments >>>>> >>>>> >>>>> Jennifer Schmitz <jschm...@nvidia.com> writes: >>>>>> Thanks for the comments. I updated the patch accordingly and >>>>>> bootstrapped and tested again. >>>>>> Best, Jennifer >>>>>> >>>>>> From 9ef423f23afaeaa650d511c51bbc1a167e40b349 Mon Sep 17 00:00:00 2001 >>>>>> From: Jennifer Schmitz <jschm...@nvidia.com> >>>>>> Date: Wed, 7 Aug 2024 08:56:45 -0700 >>>>>> Subject: [PATCH] PR tree-optimization/101390: Vectorize modulo operator >>>>>> >>>>>> This patch adds a new vectorization pattern that detects the modulo >>>>>> operation where the second operand is a variable. >>>>>> It replaces the statement by division, multiplication, and subtraction. >>>>>> >>>>>> The patch was bootstrapped and regtested on aarch64-linux-gnu, no >>>>>> regression. >>>>>> Ok for mainline? >>>>>> >>>>>> Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com> >>>>>> >>>>>> gcc/ >>>>>> >>>>>> PR tree-optimization/101390 >>>>>> * tree-vect-pattern.cc (vect_recog_mod_var_pattern): Add new pattern. >>>>>> >>>>>> gcc/testsuite/ >>>>>> PR tree-optimization/101390 >>>>>> * gcc.dg/vect/vect-mod-var.c: New test. >>>>>> * gcc.target/aarch64/sve/mod_1.c: Likewise. >>>>>> * lib/target-supports.exp: New selector expression. >>>>> >>>>> LGTM, thanks. Please give others a couple of days to comment though. >>>>> >>>> Pushed to trunk with 9bbad3685131ec95d970f81bf75f9556d4d92742. >>> >>> The gcc.dg/vect/vect-mod-var.c seems to FAIL execution for me on >>> x86_64-linux: >>> >>> FAIL: gcc.dg/vect/vect-mod-var.c -flto -ffat-lto-objects execution test >>> FAIL: gcc.dg/vect/vect-mod-var.c execution test >> >> And on powerpc64: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116461 . > > I pushed the fix for the testcase as > r15-3098-gf6b10fe45b9b704fd6a7124ab02c6e6cbd8efce4 . The issue was > just division by 0 is undefined (well mod by 0). Thank you, Andrew. > > > > Thanks, > Andrew Pinski > >> Thanks, >> Andrew >> >>> >>> Richard. >>> >>>> Best, Jennifer >>>>> Richard >>>>> >>>>>> --- >>>>>> gcc/testsuite/gcc.dg/vect/vect-mod-var.c | 37 +++++++++++ >>>>>> gcc/testsuite/gcc.target/aarch64/sve/mod_1.c | 28 +++++++++ >>>>>> gcc/testsuite/lib/target-supports.exp | 5 ++ >>>>>> gcc/tree-vect-patterns.cc | 66 ++++++++++++++++++++ >>>>>> 4 files changed, 136 insertions(+) >>>>>> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-mod-var.c >>>>>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mod_1.c >>>>>> >>>>>> diff --git a/gcc/testsuite/gcc.dg/vect/vect-mod-var.c >>>>>> b/gcc/testsuite/gcc.dg/vect/vect-mod-var.c >>>>>> new file mode 100644 >>>>>> index 00000000000..eeed318c62b >>>>>> --- /dev/null >>>>>> +++ b/gcc/testsuite/gcc.dg/vect/vect-mod-var.c >>>>>> @@ -0,0 +1,37 @@ >>>>>> +#include "tree-vect.h" >>>>>> + >>>>>> +#define N 64 >>>>>> + >>>>>> +__attribute__ ((noinline)) int >>>>>> +f (int *restrict a, int *restrict b, int *restrict c) >>>>>> +{ >>>>>> + for (int i = 0; i < N; ++i) >>>>>> + c[i] = a[i] % b[i]; >>>>>> +} >>>>>> + >>>>>> +#define BASE1 -126 >>>>>> +#define BASE2 116 >>>>>> + >>>>>> +int >>>>>> +main (void) >>>>>> +{ >>>>>> + check_vect (); >>>>>> + >>>>>> + int a[N], b[N], c[N]; >>>>>> + >>>>>> + for (int i = 0; i < N; ++i) >>>>>> + { >>>>>> + a[i] = BASE1 + i * 5; >>>>>> + b[i] = BASE2 - i * 4; >>>>>> + __asm__ volatile (""); >>>>>> + } >>>>>> + >>>>>> + f (a, b, c); >>>>>> + >>>>>> +#pragma GCC novector >>>>>> + for (int i = 0; i < N; ++i) >>>>>> + if (c[i] != a[i] % b[i]) >>>>>> + __builtin_abort (); >>>>>> +} >>>>>> + >>>>>> +/* { dg-final { scan-tree-dump "vect_recog_mod_var_pattern: detected" >>>>>> "vect" { target vect_int_div } } } */ >>>>>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c >>>>>> b/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c >>>>>> new file mode 100644 >>>>>> index 00000000000..eb37f1e3636 >>>>>> --- /dev/null >>>>>> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c >>>>>> @@ -0,0 +1,28 @@ >>>>>> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ >>>>>> +/* { dg-options "-Ofast -ftree-vectorize -fno-vect-cost-model >>>>>> --save-temps" } */ >>>>>> + >>>>>> +#include <stdint.h> >>>>>> + >>>>>> +#define DEF_LOOP(TYPE) \ >>>>>> +void __attribute__ ((noipa)) \ >>>>>> +mod_##TYPE (TYPE *restrict dst, TYPE *restrict src1, \ >>>>>> + TYPE *restrict src2, int count) \ >>>>>> +{ \ >>>>>> + for (int i = 0; i < count; ++i) \ >>>>>> + dst[i] = src1[i] % src2[i]; \ >>>>>> +} >>>>>> + >>>>>> +#define TEST_ALL(T) \ >>>>>> + T (int32_t) \ >>>>>> + T (uint32_t) \ >>>>>> + T (int64_t) \ >>>>>> + T (uint64_t) >>>>>> + >>>>>> +TEST_ALL (DEF_LOOP) >>>>>> + >>>>>> +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, >>>>>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ >>>>>> +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, >>>>>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */ >>>>>> +/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.s, p[0-7]/m, >>>>>> z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */ >>>>>> +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m, >>>>>> z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ >>>>>> +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m, >>>>>> z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */ >>>>>> +/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.d, p[0-7]/m, >>>>>> z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */ >>>>>> diff --git a/gcc/testsuite/lib/target-supports.exp >>>>>> b/gcc/testsuite/lib/target-supports.exp >>>>>> index 26820b146d4..b8072eaf781 100644 >>>>>> --- a/gcc/testsuite/lib/target-supports.exp >>>>>> +++ b/gcc/testsuite/lib/target-supports.exp >>>>>> @@ -4258,6 +4258,11 @@ proc check_effective_target_vect_int { } { >>>>>> }}] >>>>>> } >>>>>> >>>>>> +# Return 1 if the target supports vector integer division, 0 otherwise. >>>>>> +proc check_effective_target_vect_int_div { } { >>>>>> + return [check_effective_target_aarch64_sve] >>>>>> +} >>>>>> + >>>>>> # Return 1 if the target supports vectorization of early breaks, >>>>>> # 0 otherwise. >>>>>> # >>>>>> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc >>>>>> index f52de2b6972..18b322c63b8 100644 >>>>>> --- a/gcc/tree-vect-patterns.cc >>>>>> +++ b/gcc/tree-vect-patterns.cc >>>>>> @@ -5264,6 +5264,71 @@ vect_recog_divmod_pattern (vec_info *vinfo, >>>>>> return pattern_stmt; >>>>>> } >>>>>> >>>>>> +/* Detects pattern with a modulo operation (S1) where both arguments >>>>>> + are variables of integral type. >>>>>> + The statement is replaced by division, multiplication, and >>>>>> subtraction. >>>>>> + The last statement (S4) is returned. >>>>>> + >>>>>> + Example: >>>>>> + S1 c_t = a_t % b_t; >>>>>> + >>>>>> + is replaced by >>>>>> + S2 x_t = a_t / b_t; >>>>>> + S3 y_t = x_t * b_t; >>>>>> + S4 z_t = a_t - y_t; */ >>>>>> + >>>>>> +static gimple * >>>>>> +vect_recog_mod_var_pattern (vec_info *vinfo, >>>>>> + stmt_vec_info stmt_vinfo, tree *type_out) >>>>>> +{ >>>>>> + gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo); >>>>>> + tree oprnd0, oprnd1, vectype, itype; >>>>>> + gimple *pattern_stmt, *def_stmt; >>>>>> + enum tree_code rhs_code; >>>>>> + >>>>>> + if (!is_gimple_assign (last_stmt)) >>>>>> + return NULL; >>>>>> + >>>>>> + rhs_code = gimple_assign_rhs_code (last_stmt); >>>>>> + if (rhs_code != TRUNC_MOD_EXPR) >>>>>> + return NULL; >>>>>> + >>>>>> + oprnd0 = gimple_assign_rhs1 (last_stmt); >>>>>> + oprnd1 = gimple_assign_rhs2 (last_stmt); >>>>>> + itype = TREE_TYPE (oprnd0); >>>>>> + if (TREE_CODE (oprnd0) != SSA_NAME >>>>>> + || TREE_CODE (oprnd1) != SSA_NAME >>>>>> + || TREE_CODE (itype) != INTEGER_TYPE) >>>>>> + return NULL; >>>>>> + >>>>>> + vectype = get_vectype_for_scalar_type (vinfo, itype); >>>>>> + >>>>>> + if (!vectype >>>>>> + || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype) >>>>>> + || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype) >>>>>> + || !target_has_vecop_for_code (MULT_EXPR, vectype) >>>>>> + || !target_has_vecop_for_code (MINUS_EXPR, vectype)) >>>>>> + return NULL; >>>>>> + >>>>>> + tree q, tmp, r; >>>>>> + q = vect_recog_temp_ssa_var (itype, NULL); >>>>>> + def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1); >>>>>> + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype); >>>>>> + >>>>>> + tmp = vect_recog_temp_ssa_var (itype, NULL); >>>>>> + def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1); >>>>>> + append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype); >>>>>> + >>>>>> + r = vect_recog_temp_ssa_var (itype, NULL); >>>>>> + pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp); >>>>>> + >>>>>> + /* Pattern detected. */ >>>>>> + *type_out = vectype; >>>>>> + vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt); >>>>>> + >>>>>> + return pattern_stmt; >>>>>> +} >>>>>> + >>>>>> /* Function vect_recog_mixed_size_cond_pattern >>>>>> >>>>>> Try to find the following pattern: >>>>>> @@ -7343,6 +7408,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] >>>>>> = { >>>>>> { vect_recog_rotate_pattern, "rotate" }, >>>>>> { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" }, >>>>>> { vect_recog_divmod_pattern, "divmod" }, >>>>>> + { vect_recog_mod_var_pattern, "modvar" }, >>>>>> { vect_recog_mult_pattern, "mult" }, >>>>>> { vect_recog_sat_add_pattern, "sat_add" }, >>>>>> { vect_recog_sat_sub_pattern, "sat_sub" }, >>>> >>>> >>> >>> -- >>> Richard Biener <rguent...@suse.de> >>> SUSE Software Solutions Germany GmbH, >>> Frankenstrasse 146, 90461 Nuernberg, Germany; >>> GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)
smime.p7s
Description: S/MIME cryptographic signature