On 19 Aug 2024, at 21:02, Richard Sandiford <richard.sandif...@arm.com> wrote:
> 
> External email: Use caution opening links or attachments
> 
> 
> Jennifer Schmitz <jschm...@nvidia.com> writes:
>> Thanks for the comments. I updated the patch accordingly and bootstrapped 
>> and tested again.
>> Best, Jennifer
>> 
>> From 9ef423f23afaeaa650d511c51bbc1a167e40b349 Mon Sep 17 00:00:00 2001
>> From: Jennifer Schmitz <jschm...@nvidia.com>
>> Date: Wed, 7 Aug 2024 08:56:45 -0700
>> Subject: [PATCH] PR tree-optimization/101390: Vectorize modulo operator
>> 
>> This patch adds a new vectorization pattern that detects the modulo
>> operation where the second operand is a variable.
>> It replaces the statement by division, multiplication, and subtraction.
>> 
>> The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
>> Ok for mainline?
>> 
>> Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com>
>> 
>> gcc/
>> 
>>      PR tree-optimization/101390
>>      * tree-vect-pattern.cc (vect_recog_mod_var_pattern): Add new pattern.
>> 
>> gcc/testsuite/
>>      PR tree-optimization/101390
>>      * gcc.dg/vect/vect-mod-var.c: New test.
>>      * gcc.target/aarch64/sve/mod_1.c: Likewise.
>>      * lib/target-supports.exp: New selector expression.
> 
> LGTM, thanks.  Please give others a couple of days to comment though.
> 
Pushed to trunk with 9bbad3685131ec95d970f81bf75f9556d4d92742.
Best, Jennifer
> Richard
> 
>> ---
>> gcc/testsuite/gcc.dg/vect/vect-mod-var.c     | 37 +++++++++++
>> gcc/testsuite/gcc.target/aarch64/sve/mod_1.c | 28 +++++++++
>> gcc/testsuite/lib/target-supports.exp        |  5 ++
>> gcc/tree-vect-patterns.cc                    | 66 ++++++++++++++++++++
>> 4 files changed, 136 insertions(+)
>> create mode 100644 gcc/testsuite/gcc.dg/vect/vect-mod-var.c
>> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/mod_1.c
>> 
>> diff --git a/gcc/testsuite/gcc.dg/vect/vect-mod-var.c 
>> b/gcc/testsuite/gcc.dg/vect/vect-mod-var.c
>> new file mode 100644
>> index 00000000000..eeed318c62b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/vect/vect-mod-var.c
>> @@ -0,0 +1,37 @@
>> +#include "tree-vect.h"
>> +
>> +#define N 64
>> +
>> +__attribute__ ((noinline)) int
>> +f (int *restrict a, int *restrict b, int *restrict c)
>> +{
>> +  for (int i = 0; i < N; ++i)
>> +    c[i] = a[i] % b[i];
>> +}
>> +
>> +#define BASE1 -126
>> +#define BASE2 116
>> +
>> +int
>> +main (void)
>> +{
>> +  check_vect ();
>> +
>> +  int a[N], b[N], c[N];
>> +
>> +  for (int i = 0; i < N; ++i)
>> +    {
>> +      a[i] = BASE1 + i * 5;
>> +      b[i] = BASE2 - i * 4;
>> +      __asm__ volatile ("");
>> +    }
>> +
>> +  f (a, b, c);
>> +
>> +#pragma GCC novector
>> +  for (int i = 0; i < N; ++i)
>> +    if (c[i] != a[i] % b[i])
>> +      __builtin_abort ();
>> +}
>> +
>> +/* { dg-final { scan-tree-dump "vect_recog_mod_var_pattern: detected" 
>> "vect" { target vect_int_div } } } */
>> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c 
>> b/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c
>> new file mode 100644
>> index 00000000000..eb37f1e3636
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/aarch64/sve/mod_1.c
>> @@ -0,0 +1,28 @@
>> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */
>> +/* { dg-options "-Ofast -ftree-vectorize -fno-vect-cost-model --save-temps" 
>> } */
>> +
>> +#include <stdint.h>
>> +
>> +#define DEF_LOOP(TYPE)                                       \
>> +void __attribute__ ((noipa))                         \
>> +mod_##TYPE (TYPE *restrict dst, TYPE *restrict src1, \
>> +         TYPE *restrict src2, int count)             \
>> +{                                                    \
>> +  for (int i = 0; i < count; ++i)                    \
>> +    dst[i] = src1[i] % src2[i];                              \
>> +}
>> +
>> +#define TEST_ALL(T) \
>> +  T (int32_t) \
>> +  T (uint32_t) \
>> +  T (int64_t) \
>> +  T (uint64_t)
>> +
>> +TEST_ALL (DEF_LOOP)
>> +
>> +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>> +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
>> +/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.s, p[0-7]/m, 
>> z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
>> +/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m, 
>> z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
>> +/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m, 
>> z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
>> +/* { dg-final { scan-assembler-times {\tmsb\tz[0-9]+\.d, p[0-7]/m, 
>> z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
>> diff --git a/gcc/testsuite/lib/target-supports.exp 
>> b/gcc/testsuite/lib/target-supports.exp
>> index 26820b146d4..b8072eaf781 100644
>> --- a/gcc/testsuite/lib/target-supports.exp
>> +++ b/gcc/testsuite/lib/target-supports.exp
>> @@ -4258,6 +4258,11 @@ proc check_effective_target_vect_int { } {
>>      }}]
>> }
>> 
>> +# Return 1 if the target supports vector integer division, 0 otherwise.
>> +proc check_effective_target_vect_int_div { } {
>> +    return [check_effective_target_aarch64_sve]
>> +}
>> +
>> # Return 1 if the target supports vectorization of early breaks,
>> # 0 otherwise.
>> #
>> diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
>> index f52de2b6972..18b322c63b8 100644
>> --- a/gcc/tree-vect-patterns.cc
>> +++ b/gcc/tree-vect-patterns.cc
>> @@ -5264,6 +5264,71 @@ vect_recog_divmod_pattern (vec_info *vinfo,
>>   return pattern_stmt;
>> }
>> 
>> +/* Detects pattern with a modulo operation (S1) where both arguments
>> +   are variables of integral type.
>> +   The statement is replaced by division, multiplication, and subtraction.
>> +   The last statement (S4) is returned.
>> +
>> +   Example:
>> +   S1 c_t = a_t % b_t;
>> +
>> +   is replaced by
>> +   S2 x_t = a_t / b_t;
>> +   S3 y_t = x_t * b_t;
>> +   S4 z_t = a_t - y_t;  */
>> +
>> +static gimple *
>> +vect_recog_mod_var_pattern (vec_info *vinfo,
>> +                         stmt_vec_info stmt_vinfo, tree *type_out)
>> +{
>> +  gimple *last_stmt = STMT_VINFO_STMT (stmt_vinfo);
>> +  tree oprnd0, oprnd1, vectype, itype;
>> +  gimple *pattern_stmt, *def_stmt;
>> +  enum tree_code rhs_code;
>> +
>> +  if (!is_gimple_assign (last_stmt))
>> +    return NULL;
>> +
>> +  rhs_code = gimple_assign_rhs_code (last_stmt);
>> +  if (rhs_code != TRUNC_MOD_EXPR)
>> +    return NULL;
>> +
>> +  oprnd0 = gimple_assign_rhs1 (last_stmt);
>> +  oprnd1 = gimple_assign_rhs2 (last_stmt);
>> +  itype = TREE_TYPE (oprnd0);
>> +  if (TREE_CODE (oprnd0) != SSA_NAME
>> +      || TREE_CODE (oprnd1) != SSA_NAME
>> +      || TREE_CODE (itype) != INTEGER_TYPE)
>> +    return NULL;
>> +
>> +  vectype = get_vectype_for_scalar_type (vinfo, itype);
>> +
>> +  if (!vectype
>> +      || target_has_vecop_for_code (TRUNC_MOD_EXPR, vectype)
>> +      || !target_has_vecop_for_code (TRUNC_DIV_EXPR, vectype)
>> +      || !target_has_vecop_for_code (MULT_EXPR, vectype)
>> +      || !target_has_vecop_for_code (MINUS_EXPR, vectype))
>> +    return NULL;
>> +
>> +  tree q, tmp, r;
>> +  q = vect_recog_temp_ssa_var (itype, NULL);
>> +  def_stmt = gimple_build_assign (q, TRUNC_DIV_EXPR, oprnd0, oprnd1);
>> +  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
>> +
>> +  tmp = vect_recog_temp_ssa_var (itype, NULL);
>> +  def_stmt = gimple_build_assign (tmp, MULT_EXPR, q, oprnd1);
>> +  append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vectype);
>> +
>> +  r = vect_recog_temp_ssa_var (itype, NULL);
>> +  pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, tmp);
>> +
>> +  /* Pattern detected.  */
>> +  *type_out = vectype;
>> +  vect_pattern_detected ("vect_recog_mod_var_pattern", last_stmt);
>> +
>> +  return pattern_stmt;
>> +}
>> +
>> /* Function vect_recog_mixed_size_cond_pattern
>> 
>>    Try to find the following pattern:
>> @@ -7343,6 +7408,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
>>   { vect_recog_rotate_pattern, "rotate" },
>>   { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
>>   { vect_recog_divmod_pattern, "divmod" },
>> +  { vect_recog_mod_var_pattern, "modvar" },
>>   { vect_recog_mult_pattern, "mult" },
>>   { vect_recog_sat_add_pattern, "sat_add" },
>>   { vect_recog_sat_sub_pattern, "sat_sub" },

Attachment: smime.p7s
Description: S/MIME cryptographic signature

Reply via email to