> On 23 Aug 2024, at 06:21, Andrew Pinski <pins...@gmail.com> wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> On Thu, Aug 22, 2024 at 11:28 AM Andrew Pinski <pins...@gmail.com> wrote:
>> 
>> On Thu, Aug 22, 2024 at 4:12 AM Richard Biener <rguent...@suse.de> wrote:
>>> 
>>> On Thu, 22 Aug 2024, Jennifer Schmitz wrote:
>>> 
>>>> On 19 Aug 2024, at 21:02, Richard Sandiford <richard.sandif...@arm.com> 
>>>> wrote:
>>>>> 
>>>>> External email: Use caution opening links or attachments
>>>>> 
>>>>> 
>>>>> Jennifer Schmitz <jschm...@nvidia.com> writes:
>>>>>> Thanks for the comments. I updated the patch accordingly and 
>>>>>> bootstrapped and tested again.
>>>>>> Best, Jennifer
>>>>>> 
>>>>>> From 9ef423f23afaeaa650d511c51bbc1a167e40b349 Mon Sep 17 00:00:00 2001
>>>>>> From: Jennifer Schmitz <jschm...@nvidia.com>
>>>>>> Date: Wed, 7 Aug 2024 08:56:45 -0700
>>>>>> Subject: [PATCH] PR tree-optimization/101390: Vectorize modulo operator
>>>>>> 
>>>>>> This patch adds a new vectorization pattern that detects the modulo
>>>>>> operation where the second operand is a variable.
>>>>>> It replaces the statement by division, multiplication, and subtraction.
>>>>>> 
>>>>>> The patch was bootstrapped and regtested on aarch64-linux-gnu, no 
>>>>>> regression.
>>>>>> Ok for mainline?
>>>>>> 
>>>>>> Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com>
>>>>>> 
>>>>>> gcc/
>>>>>> 
>>>>>>     PR tree-optimization/101390
>>>>>>     * tree-vect-pattern.cc (vect_recog_mod_var_pattern): Add new pattern.
>>>>>> 
>>>>>> gcc/testsuite/
>>>>>>     PR tree-optimization/101390
>>>>>>     * gcc.dg/vect/vect-mod-var.c: New test.
>>>>>>     * gcc.target/aarch64/sve/mod_1.c: Likewise.
>>>>>>     * lib/target-supports.exp: New selector expression.
>>>>> 
>>>>> LGTM, thanks.  Please give others a couple of days to comment though.
>>>>> 
>>>> Pushed to trunk with 9bbad3685131ec95d970f81bf75f9556d4d92742.
>>> 
>>> The gcc.dg/vect/vect-mod-var.c seems to FAIL execution for me on
>>> x86_64-linux:
>>> 
>>> FAIL: gcc.dg/vect/vect-mod-var.c -flto -ffat-lto-objects execution test
>>> FAIL: gcc.dg/vect/vect-mod-var.c execution test
>> 
>> And on powerpc64: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116461 .
> 
> I pushed the fix for the testcase as
> r15-3098-gf6b10fe45b9b704fd6a7124ab02c6e6cbd8efce4 . The issue was
> just division by 0 is undefined (well mod by 0).
Thank you, Andrew.
> 
> 
> 
> Thanks,
> Andrew Pinski
> 
>> Thanks,
>> Andrew
>> 
>>> 
>>> Richard.
>>> 
>>>> Best, Jennifer
>>>>> Richard
>>>>> 
>>>>>> ---
>>>>>> gcc/testsuite/gcc.dg/vect/vect-mod-var.c     | 37 +++++++++++
>>>>>> gcc/testsuite/gcc.target/aarch64/sve/mod_1.c | 28 +++++++++
>>>>>> gcc/testsuite/lib/target-supports.exp        |  5 ++
>>>>>> gcc/tree-vect-patterns.cc                    | 66 ++++++++++++++++++++
>>>>>> 4 files changed, 136 insertions(+)
>>>>>> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-mod-var.c
>>>>>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mod_1.c
>>>>>> 
>>>>>> diff --git a/gcc/testsuite/gcc.dg/vect/vect-mod-var.c 
>>>>>> b/gcc/testsuite/gcc.dg/vect/vect-mod-var.c
>>>>>> new file mode 100644
>>>>>> index 00000000000..eeed318c62b
>>>>>> --- /dev/null
>>>>>> +++ b/gcc/testsuite/gcc.dg/vect/vect-mod-var.c
>>>>>> @@ -0,0 +1,37 @@
>>>>>> +#include "tree-vect.h"
>>>>>> +
>>>>>> +#define N 64
>>>>>> +
>>>>>> +__attribute__ ((noinline)) int
>>>>>> +f (int *restrict a, int *restrict b, int *restrict c)
>>>>>> +{
>>>>>> +  for (int i = 0; i < N; ++i)
>>>>>> +    c[i] = a[i] % b[i];
>>>>>> +}
>>>>>> +
>>>>>> +#define BASE1 -126
>>>>>> +#define BASE2 116
>>>>>> +
>>>>>> +int
>>>>>> +main (void)
>>>>>> +{
>>>>>> +  check_vect ();
>>>>>> +
>>>>>> +  int a[N], b[N], c[N];
>>>>>> +
>>>>>> +  for (int i = 0; i < N; ++i)
>>>>>> +    {
>>>>>> +      a[i] = BASE1 + i * 5;
>>>>>> +      b[i] = BASE2 - i * 4;
>>>>>> +      __asm__ volatile ("");
>>>>>> +    }
>>>>>> +
>>>>>> +  f (a, b, c);
>>>>>> +
>>>>>> +#pragma GCC novector
>>>>>> +  for (int i = 0; i < N; ++i)
>>>>>> +    if (c[i] != a[i] % b[i])
>>>>>> +      __builtin_abort ();
>>>>>> +}
>>>>>> +
>>>>>> +/* { dg-final { scan-tree-dump "vect_recog_mod_var_pattern: detected" 
>>>>>> "vect" { target vect_int_div } } } */
>>>>>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c 
>>>>>> b/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c
>>>>>> new file mode 100644
>>>>>> index 00000000000..eb37f1e3636
>>>>>> --- /dev/null
>>>>>> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c
>>>>>> @@ -0,0 +1,28 @@
>>>>>> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */
>>>>>> +/* { dg-options "-Ofast -ftree-vectorize -fno-vect-cost-model 
>>>>>> --save-temps" } */
>>>>>> +
>>>>>> +#include <stdint.h>
>>>>>> +
>>>>>> +#define DEF_LOOP(TYPE)                                       \
>>>>>> +void __attribute__ ((noipa))                         \
>>>>>> +mod_##TYPE (TYPE *restrict dst, TYPE *restrict src1, \
>>>>>> +         TYPE *restrict src2, int count)             \
>>>>>> +{                                                    \
>>>>>> +  for (int i = 0; i < count; ++i)                    \
>>>>>> +    dst[i] = src1[i] % src2[i];                              \
>>>>>> +}
>>>>>> +
>>>>>> +#define TEST_ALL(T) \
>>>>>> +  T (int32_t) \
>>>>>> +  T (uint32_t) \
>>>>>> +  T (int64_t) \
>>>>>> +  T (uint64_t)
>>>>>> +
>>>>>> +TEST_ALL (DEF_LOOP)
>>>>>> +
>>>>>> +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, 
>>>>>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>>>>>> +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, 
>>>>>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>>>>>> +/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.s, p[0-7]/m, 
>>>>>> z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
>>>>>> +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m, 
>>>>>> z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
>>>>>> +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m, 
>>>>>> z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
>>>>>> +/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.d, p[0-7]/m, 
>>>>>> z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
>>>>>> diff --git a/gcc/testsuite/lib/target-supports.exp 
>>>>>> b/gcc/testsuite/lib/target-supports.exp
>>>>>> index 26820b146d4..b8072eaf781 100644
>>>>>> --- a/gcc/testsuite/lib/target-supports.exp
>>>>>> +++ b/gcc/testsuite/lib/target-supports.exp
>>>>>> @@ -4258,6 +4258,11 @@ proc check_effective_target_vect_int { } {
>>>>>>     }}]
>>>>>> }
>>>>>> 
>>>>>> +# Return 1 if the target supports vector integer division, 0 otherwise.
>>>>>> +proc check_effective_target_vect_int_div { } {
>>>>>> +    return [check_effective_target_aarch64_sve]
>>>>>> +}
>>>>>> +
>>>>>> # Return 1 if the target supports vectorization of early breaks,
>>>>>> # 0 otherwise.
>>>>>> #
>>>>>> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
>>>>>> index f52de2b6972..18b322c63b8 100644
>>>>>> --- a/gcc/tree-vect-patterns.cc
>>>>>> +++ b/gcc/tree-vect-patterns.cc
>>>>>> @@ -5264,6 +5264,71 @@ vect_recog_divmod_pattern (vec_info *vinfo,
>>>>>>  return pattern_stmt;
>>>>>> }
>>>>>> 
>>>>>> +/* Detects pattern with a modulo operation (S1) where both arguments
>>>>>> +   are variables of integral type.
>>>>>> +   The statement is replaced by division, multiplication, and 
>>>>>> subtraction.
>>>>>> +   The last statement (S4) is returned.
>>>>>> +
>>>>>> +   Example:
>>>>>> +   S1 c_t = a_t % b_t;
>>>>>> +
>>>>>> +   is replaced by
>>>>>> +   S2 x_t = a_t / b_t;
>>>>>> +   S3 y_t = x_t * b_t;
>>>>>> +   S4 z_t = a_t - y_t;  */
>>>>>> +
>>>>>> +static gimple *
>>>>>> +vect_recog_mod_var_pattern (vec_info *vinfo,
>>>>>> +                         stmt_vec_info stmt_vinfo, tree *type_out)
>>>>>> +{
>>>>>> +  gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
>>>>>> +  tree oprnd0, oprnd1, vectype, itype;
>>>>>> +  gimple *pattern_stmt, *def_stmt;
>>>>>> +  enum tree_code rhs_code;
>>>>>> +
>>>>>> +  if (!is_gimple_assign (last_stmt))
>>>>>> +    return NULL;
>>>>>> +
>>>>>> +  rhs_code = gimple_assign_rhs_code (last_stmt);
>>>>>> +  if (rhs_code != TRUNC_MOD_EXPR)
>>>>>> +    return NULL;
>>>>>> +
>>>>>> +  oprnd0 = gimple_assign_rhs1 (last_stmt);
>>>>>> +  oprnd1 = gimple_assign_rhs2 (last_stmt);
>>>>>> +  itype = TREE_TYPE (oprnd0);
>>>>>> +  if (TREE_CODE (oprnd0) != SSA_NAME
>>>>>> +      || TREE_CODE (oprnd1) != SSA_NAME
>>>>>> +      || TREE_CODE (itype) != INTEGER_TYPE)
>>>>>> +    return NULL;
>>>>>> +
>>>>>> +  vectype = get_vectype_for_scalar_type (vinfo, itype);
>>>>>> +
>>>>>> +  if (!vectype
>>>>>> +      || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype)
>>>>>> +      || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype)
>>>>>> +      || !target_has_vecop_for_code (MULT_EXPR, vectype)
>>>>>> +      || !target_has_vecop_for_code (MINUS_EXPR, vectype))
>>>>>> +    return NULL;
>>>>>> +
>>>>>> +  tree q, tmp, r;
>>>>>> +  q = vect_recog_temp_ssa_var (itype, NULL);
>>>>>> +  def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1);
>>>>>> +  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
>>>>>> +
>>>>>> +  tmp = vect_recog_temp_ssa_var (itype, NULL);
>>>>>> +  def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1);
>>>>>> +  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
>>>>>> +
>>>>>> +  r = vect_recog_temp_ssa_var (itype, NULL);
>>>>>> +  pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp);
>>>>>> +
>>>>>> +  /* Pattern detected.  */
>>>>>> +  *type_out = vectype;
>>>>>> +  vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt);
>>>>>> +
>>>>>> +  return pattern_stmt;
>>>>>> +}
>>>>>> +
>>>>>> /* Function vect_recog_mixed_size_cond_pattern
>>>>>> 
>>>>>>   Try to find the following pattern:
>>>>>> @@ -7343,6 +7408,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] 
>>>>>> = {
>>>>>>  { vect_recog_rotate_pattern, "rotate" },
>>>>>>  { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
>>>>>>  { vect_recog_divmod_pattern, "divmod" },
>>>>>> +  { vect_recog_mod_var_pattern, "modvar" },
>>>>>>  { vect_recog_mult_pattern, "mult" },
>>>>>>  { vect_recog_sat_add_pattern, "sat_add" },
>>>>>>  { vect_recog_sat_sub_pattern, "sat_sub" },
>>>> 
>>>> 
>>> 
>>> --
>>> Richard Biener <rguent...@suse.de>
>>> SUSE Software Solutions Germany GmbH,
>>> Frankenstrasse 146, 90461 Nuernberg, Germany;
>>> GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)


Attachment: smime.p7s
Description: S/MIME cryptographic signature

Reply via email to