Re: [PATCH 1/2] SVE intrinsics: Fold constant operands for svdiv

Richard Sandiford Mon, 19 Aug 2024 05:58:16 -0700

Jennifer Schmitz <jschm...@nvidia.com> writes:
> This patch implements constant folding for svdiv. A new gimple_folder
> method was added that uses const_binop to fold binary operations using a
> given tree_code. For svdiv, this method is used to fold constant
> operands.
> Additionally, if at least one of the operands is a zero vector, svdiv is
> folded to a zero vector (in case of ptrue, _x, or _z).
> Tests were added to check the produced assembly for different
> predicates and signed and unsigned integers.
> Currently, constant folding is only implemented for integers and binary
> operations, but extending it to float types and other operations is
> planned for a future follow-up.
>
> The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
> OK for mainline?
>
> Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com>
>
> gcc/
>
>       * config/aarch64/aarch64-sve-builtins-base.cc
>       (svdiv_impl::fold): Add constant folding.
>       * config/aarch64/aarch64-sve-builtins.cc
>       (gimple_folder::const_fold): New method.
>       * config/aarch64/aarch64-sve-builtins.h
>       (gimple_folder::const_fold): Add function declaration.
>
> gcc/testsuite/
>
>       * gcc.target/aarch64/sve/const_fold_div_1.c: New test.
>       * gcc.target/aarch64/sve/const_fold_div_zero.c: Likewise.
>
> From 79355d876503558f661b46ebbeaa11c74ce176cb Mon Sep 17 00:00:00 2001
> From: Jennifer Schmitz <jschm...@nvidia.com>
> Date: Thu, 15 Aug 2024 05:42:06 -0700
> Subject: [PATCH 1/2] SVE intrinsics: Fold constant operands for svdiv
>
> This patch implements constant folding for svdiv. A new gimple_folder
> method was added that uses const_binop to fold binary operations using a
> given tree_code. For svdiv, this method is used to fold constant
> operands.
> Additionally, if at least one of the operands is a zero vector, svdiv is
> folded to a zero vector (in case of ptrue, _x, or _z).
> Tests were added to check the produced assembly for different
> predicates and signed and unsigned integers.
> Currently, constant folding is only implemented for integers and binary
> operations, but extending it to float types and other operations is
> planned for a future follow-up.
>
> The patch was bootstrapped and regtested on aarch64-linux-gnu, no regression.
> OK for mainline?
>
> Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com>
>
> gcc/
>
>       * config/aarch64/aarch64-sve-builtins-base.cc
>       (svdiv_impl::fold): Add constant folding.
>       * config/aarch64/aarch64-sve-builtins.cc
>       (gimple_folder::const_fold): New method.
>       * config/aarch64/aarch64-sve-builtins.h
>       (gimple_folder::const_fold): Add function declaration.
>
> gcc/testsuite/
>
>       * gcc.target/aarch64/sve/const_fold_div_1.c: New test.
>       * gcc.target/aarch64/sve/const_fold_div_zero.c: Likewise.
> ---
>  .../aarch64/aarch64-sve-builtins-base.cc      |  30 ++-
>  gcc/config/aarch64/aarch64-sve-builtins.cc    |  25 +++
>  gcc/config/aarch64/aarch64-sve-builtins.h     |   1 +
>  .../gcc.target/aarch64/sve/const_fold_div_1.c | 128 ++++++++++++
>  .../aarch64/sve/const_fold_div_zero.c         | 186 ++++++++++++++++++
>  .../aarch64/sve/const_fold_mul_zero.c         |  95 +++++++++
>  6 files changed, 462 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_1.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_zero.c
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_zero.c
>
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> index d55bee0b72f..7f948ecc0c7 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
> @@ -755,8 +755,32 @@ public:
>    gimple *
>    fold (gimple_folder &f) const override
>    {
> -    tree divisor = gimple_call_arg (f.call, 2);
> -    tree divisor_cst = uniform_integer_cst_p (divisor);
> +    tree pg = gimple_call_arg (f.call, 0);
> +    tree op1 = gimple_call_arg (f.call, 1);
> +    tree op2 = gimple_call_arg (f.call, 2);
> +
> +    /* For integer division, if the dividend or divisor are all zeros,
> +       fold to zero vector.  */
> +    int step = f.type_suffix (0).element_bytes;
> +    if (f.pred != PRED_m || is_ptrue (pg, step))
> +      {
> +     if (vector_cst_all_same (op1, step)
> +         && integer_zerop (VECTOR_CST_ENCODED_ELT (op1, 0)))
> +       return gimple_build_assign (f.lhs, op1);
> +     if (vector_cst_all_same (op2, step)
> +         && integer_zerop (VECTOR_CST_ENCODED_ELT (op2, 0)))
> +       return gimple_build_assign (f.lhs, op2);
> +      }


Rather than handle all-zeros as a special case here, I think we should
try to do it elementwise in the const_binop.  More below.

> +
> +    /* Try to fold constant operands.  */
> +    tree_code m_code = f.type_suffix (0).integer_p ? TRUNC_DIV_EXPR
> +                     : RDIV_EXPR;
> +    if (gimple *new_stmt = f.const_fold (m_code))
> +      return new_stmt;
> +
> +    /* If the divisor is a uniform power of 2, fold to a shift
> +       instruction.  */
> +    tree divisor_cst = uniform_integer_cst_p (op2);
>  
>      if (!divisor_cst || !integer_pow2p (divisor_cst))
>        return NULL;
> @@ -770,7 +794,7 @@ public:
>                                   shapes::binary_uint_opt_n, MODE_n,
>                                   f.type_suffix_ids, GROUP_none, f.pred);
>       call = f.redirect_call (instance);
> -     tree d = INTEGRAL_TYPE_P (TREE_TYPE (divisor)) ? divisor : divisor_cst;
> +     tree d = INTEGRAL_TYPE_P (TREE_TYPE (op2)) ? op2 : divisor_cst;
>       new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d));
>        }
>      else
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
> b/gcc/config/aarch64/aarch64-sve-builtins.cc
> index 0a560eaedca..0f69c586464 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins.cc
> +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
> @@ -3691,6 +3691,31 @@ gimple_folder::fold_to_vl_pred (unsigned int vl)
>    return gimple_build_assign (lhs, builder.build ());
>  }
>  
> +/* If the predicate is svptrue or PRED_x, try to perform constant folding
> +   on the call using the given tree_code.
> +   Return the new statement on success, otherwise return null.  */
> +gimple *
> +gimple_folder::const_fold (tree_code code)
> +{
> +  tree pg = gimple_call_arg (call, 0);
> +  if (type_suffix (0).integer_p
> +      && (is_ptrue (pg, type_suffix (0).element_bytes)
> +       || pred == PRED_x))
> +    {
> +      if (TREE_CODE_CLASS (code) == tcc_binary)
> +     {
> +       gcc_assert (gimple_call_num_args (call) == 3);
> +       tree op1 = gimple_call_arg (call, 1);
> +       tree op2 = gimple_call_arg (call, 2);
> +       if (TREE_TYPE (op1) != TREE_TYPE (op2))
> +         return NULL;

I assume this is rejecting the svdiv_n_* case, is that right?
I think we should instead try to handle that too, since the _n
variants are specifically provided as a convenience for uniform divisors.

It looks like const_binop should just work for that case too, thanks to
the shift handling.  (AFAICT, the handling is not explicitly restricted
to shifts.)  But if it doesn't, I think it would be a reasonable extension.

> +       if (tree res = const_binop (code, TREE_TYPE (lhs), op1, op2))
> +         return gimple_build_assign (lhs, res);

Going back to the comment above about handling /0 elementwise:
how about splitting the vector part of const_binop out into a
new public function with the following interface:

tree vector_const_binop (tree_code code, tree arg1, tree arg2,
                         tree (*elt_const_binop) (code, tree, tree))

where "the vector part" is everything in the function after:

  if (TREE_CODE (arg1) == VECTOR_CST
      && TREE_CODE (arg2) == VECTOR_CST
  ...

Then const_binop itself can just use:

  return vector_const_binop (code, arg1, arg2, const_binop);

whereas aarch64 code can pass its own wrapper that handles the extra
defined cases.  +Richi in case he has any thoughts on this.

I think the starting point for the aarch64 implementation should be
something like:

  if (poly_int_tree_p (arg1) && poly_int_tree_p (arg2))
    {
      poly_wide_int poly_res;
      tree type = TREE_TYPE (arg1);
      signop sign = TYPE_SIGN (type);
      wi::overflow_type overflow = wi::OVF_NONE;

      ...if chain of special cases...
      else if (!poly_int_binop (poly_res, code, arg1, arg2, sign, &overflow))
        return NULL_TREE;
      return force_fit_type (type, poly_res, false,
                             TREE_OVERFLOW (arg1) | TREE_OVERFLOW (arg2));
    }
  return NULL_TREE;

which is adapted from int_const_binop, and would need poly_int_binop
to become a public function.  The key thing here is that we completely
ignore overflow in the calculation, because the semantics of the intrinsics
are that language-level overflow does not happen.

Thanks,
Richard


> +     }
> +    }
> +  return NULL;
> +}
> +
>  /* Try to fold the call.  Return the new statement on success and null
>     on failure.  */
>  gimple *
> diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h 
> b/gcc/config/aarch64/aarch64-sve-builtins.h
> index 9ab6f202c30..db30225a008 100644
> --- a/gcc/config/aarch64/aarch64-sve-builtins.h
> +++ b/gcc/config/aarch64/aarch64-sve-builtins.h
> @@ -636,6 +636,7 @@ public:
>    gimple *fold_to_pfalse ();
>    gimple *fold_to_ptrue ();
>    gimple *fold_to_vl_pred (unsigned int);
> +  gimple *const_fold (tree_code);
>  
>    gimple *fold ();
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_1.c
> new file mode 100644
> index 00000000000..d8460a4d336
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_1.c
> @@ -0,0 +1,128 @@
> +/* { dg-final { check-function-bodies "**" "" } } */
> +/* { dg-options "-O2" } */
> +
> +#include "arm_sve.h"
> +
> +/*
> +** s64_x_pg:
> +**   mov     z[0-9]+\.d, #1
> +**   ret
> +*/
> +svint64_t s64_x_pg (svbool_t pg)
> +{
> +  return svdiv_x (pg, svdup_s64 (5), svdup_s64 (3));
> +}
> +
> +/*
> +** s64_z_pg:
> +**   mov     z[0-9]+\.d, p[0-7]/z, #1
> +**   ret
> +*/
> +svint64_t s64_z_pg (svbool_t pg)
> +{
> +  return svdiv_z (pg, svdup_s64 (5), svdup_s64 (3));
> +}
> +
> +/*
> +** s64_m_pg:
> +**   mov     (z[0-9]+\.d), #3
> +**   mov     (z[0-9]+\.d), #5
> +**   sdiv    \2, p[0-7]/m, \2, \1
> +**   ret
> +*/
> +svint64_t s64_m_pg (svbool_t pg)
> +{
> +  return svdiv_m (pg, svdup_s64 (5), svdup_s64 (3));
> +}
> +
> +/*
> +** s64_x_ptrue:
> +**   mov     z[0-9]+\.d, #1
> +**   ret
> +*/
> +svint64_t s64_x_ptrue ()
> +{
> +  return svdiv_x (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3));
> +}
> +
> +/*
> +** s64_z_ptrue:
> +**   mov     z[0-9]+\.d, #1
> +**   ret
> +*/
> +svint64_t s64_z_ptrue ()
> +{
> +  return svdiv_z (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3));
> +}
> +
> +/*
> +** s64_m_ptrue:
> +**   mov     z[0-9]+\.d, #1
> +**   ret
> +*/
> +svint64_t s64_m_ptrue ()
> +{
> +  return svdiv_m (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3));
> +}
> +
> +/*
> +** u64_x_pg:
> +**   mov     z[0-9]+\.d, #1
> +**   ret
> +*/
> +svuint64_t u64_x_pg (svbool_t pg)
> +{
> +  return svdiv_x (pg, svdup_u64 (5), svdup_u64 (3));
> +}
> +
> +/*
> +** u64_z_pg:
> +**   mov     z[0-9]+\.d, p[0-7]/z, #1
> +**   ret
> +*/
> +svuint64_t u64_z_pg (svbool_t pg)
> +{
> +  return svdiv_z (pg, svdup_u64 (5), svdup_u64 (3));
> +}
> +
> +/*
> +** u64_m_pg:
> +**   mov     (z[0-9]+\.d), #3
> +**   mov     (z[0-9]+\.d), #5
> +**   udiv    \2, p[0-7]/m, \2, \1
> +**   ret
> +*/
> +svuint64_t u64_m_pg (svbool_t pg)
> +{
> +  return svdiv_m (pg, svdup_u64 (5), svdup_u64 (3));
> +}
> +
> +/*
> +** u64_x_ptrue:
> +**   mov     z[0-9]+\.d, #1
> +**   ret
> +*/
> +svuint64_t u64_x_ptrue ()
> +{
> +  return svdiv_x (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3));
> +}
> +
> +/*
> +** u64_z_ptrue:
> +**   mov     z[0-9]+\.d, #1
> +**   ret
> +*/
> +svuint64_t u64_z_ptrue ()
> +{
> +  return svdiv_z (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3));
> +}
> +
> +/*
> +** u64_m_ptrue:
> +**   mov     z[0-9]+\.d, #1
> +**   ret
> +*/
> +svuint64_t u64_m_ptrue ()
> +{
> +  return svdiv_m (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3));
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_zero.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_zero.c
> new file mode 100644
> index 00000000000..00d14a46ced
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_zero.c
> @@ -0,0 +1,186 @@
> +/* { dg-final { check-function-bodies "**" "" } } */
> +/* { dg-options "-O2" } */
> +
> +#include "arm_sve.h"
> +
> +/*
> +** s64_x_pg_op1:
> +**   mov     z[0-9]+\.b, #0
> +**   ret
> +*/
> +svint64_t s64_x_pg_op1 (svbool_t pg, svint64_t op2)
> +{
> +  return svdiv_x (pg, svdup_s64 (0), op2);
> +}
> +
> +/*
> +** s64_z_pg_op1:
> +**   mov     z[0-9]+\.b, #0
> +**      ret
> +*/
> +svint64_t s64_z_pg_op1 (svbool_t pg, svint64_t op2)
> +{
> +  return svdiv_z (pg, svdup_s64 (0), op2);
> +}
> +
> +/*
> +** s64_m_pg_op1:
> +**   mov     z[0-9]+\.d, p[0-7]/z, #0
> +**      ret
> +*/
> +svint64_t s64_m_pg_op1 (svbool_t pg, svint64_t op2)
> +{
> +  return svdiv_m (pg, svdup_s64 (0), op2);
> +}
> +
> +/*
> +** s64_x_pg_op2:
> +**   mov     z[0-9]+\.b, #0
> +**   ret
> +*/
> +svint64_t s64_x_pg_op2 (svbool_t pg, svint64_t op1)
> +{
> +  return svdiv_x (pg, op1, svdup_s64 (0));
> +}
> +
> +/*
> +** s64_z_pg_op2:
> +**   mov     z[0-9]+\.b, #0
> +**      ret
> +*/
> +svint64_t s64_z_pg_op2 (svbool_t pg, svint64_t op1)
> +{
> +  return svdiv_z (pg, op1, svdup_s64 (0));
> +}
> +
> +/*
> +** s64_m_pg_op2:
> +**   mov     (z[0-9]+)\.b, #0
> +**   sdiv    (z[0-9]+\.d), p[0-7]/m, \2, \1\.d
> +**      ret
> +*/
> +svint64_t s64_m_pg_op2 (svbool_t pg, svint64_t op1)
> +{
> +  return svdiv_m (pg, op1, svdup_s64 (0));
> +}
> +
> +/*
> +** s64_m_ptrue_op1:
> +**   mov     z[0-9]+\.b, #0
> +**      ret
> +*/
> +svint64_t s64_m_ptrue_op1 (svint64_t op2)
> +{
> +  return svdiv_m (svptrue_b64 (), svdup_s64 (0), op2);
> +}
> +
> +/*
> +** s64_m_ptrue_op2:
> +**   mov     z[0-9]+\.b, #0
> +**      ret
> +*/
> +svint64_t s64_m_ptrue_op2 (svint64_t op1)
> +{
> +  return svdiv_m (svptrue_b64 (), op1, svdup_s64 (0));
> +}
> +
> +/*
> +** s64_m_ptrue_op1_op2:
> +**   mov     z[0-9]+\.b, #0
> +**      ret
> +*/
> +svint64_t s64_m_ptrue_op1_op2 ()
> +{
> +  return svdiv_m (svptrue_b64 (), svdup_s64 (0), svdup_s64 (0));
> +}
> +
> +/*
> +** u64_x_pg_op1:
> +**   mov     z[0-9]+\.b, #0
> +**   ret
> +*/
> +svuint64_t u64_x_pg_op1 (svbool_t pg, svuint64_t op2)
> +{
> +  return svdiv_x (pg, svdup_u64 (0), op2);
> +}
> +
> +/*
> +** u64_z_pg_op1:
> +**   mov     z[0-9]+\.b, #0
> +**      ret
> +*/
> +svuint64_t u64_z_pg_op1 (svbool_t pg, svuint64_t op2)
> +{
> +  return svdiv_z (pg, svdup_u64 (0), op2);
> +}
> +
> +/*
> +** u64_m_pg_op1:
> +**   mov     z[0-9]+\.d, p[0-7]/z, #0
> +**      ret
> +*/
> +svuint64_t u64_m_pg_op1 (svbool_t pg, svuint64_t op2)
> +{
> +  return svdiv_m (pg, svdup_u64 (0), op2);
> +}
> +
> +/*
> +** u64_x_pg_op2:
> +**   mov     z[0-9]+\.b, #0
> +**   ret
> +*/
> +svuint64_t u64_x_pg_op2 (svbool_t pg, svuint64_t op1)
> +{
> +  return svdiv_x (pg, op1, svdup_u64 (0));
> +}
> +
> +/*
> +** u64_z_pg_op2:
> +**   mov     z[0-9]+\.b, #0
> +**      ret
> +*/
> +svuint64_t u64_z_pg_op2 (svbool_t pg, svuint64_t op1)
> +{
> +  return svdiv_z (pg, op1, svdup_u64 (0));
> +}
> +
> +/*
> +** u64_m_pg_op2:
> +**   mov     (z[0-9]+)\.b, #0
> +**   udiv    (z[0-9]+\.d), p[0-7]/m, \2, \1\.d
> +**      ret
> +*/
> +svuint64_t u64_m_pg_op2 (svbool_t pg, svuint64_t op1)
> +{
> +  return svdiv_m (pg, op1, svdup_u64 (0));
> +}
> +
> +/*
> +** u64_m_ptrue_op1:
> +**   mov     z[0-9]+\.b, #0
> +**      ret
> +*/
> +svuint64_t u64_m_ptrue_op1 (svuint64_t op2)
> +{
> +  return svdiv_m (svptrue_b64 (), svdup_u64 (0), op2);
> +}
> +
> +/*
> +** u64_m_ptrue_op2:
> +**   mov     z[0-9]+\.b, #0
> +**      ret
> +*/
> +svuint64_t u64_m_ptrue_op2 (svuint64_t op1)
> +{
> +  return svdiv_m (svptrue_b64 (), op1, svdup_u64 (0));
> +}
> +
> +/*
> +** u64_m_ptrue_op1_op2:
> +**   mov     z[0-9]+\.b, #0
> +**      ret
> +*/
> +svuint64_t u64_m_ptrue_op1_op2 ()
> +{
> +  return svdiv_m (svptrue_b64 (), svdup_u64 (0), svdup_u64 (0));
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_zero.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_zero.c
> new file mode 100644
> index 00000000000..793291449c1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_zero.c
> @@ -0,0 +1,95 @@
> +/* { dg-final { check-function-bodies "**" "" } } */
> +/* { dg-options "-O2" } */
> +
> +#include "arm_sve.h"
> +
> +/*
> +** s64_x_pg_op1:
> +**   mov     z[0-9]+\.b, #0
> +**   ret
> +*/
> +svint64_t s64_x_pg_op1 (svbool_t pg, svint64_t op2)
> +{
> +  return svmul_x (pg, svdup_s64 (0), op2);
> +}
> +
> +/*
> +** s64_z_pg_op1:
> +**   mov     z[0-9]+\.b, #0
> +**      ret
> +*/
> +svint64_t s64_z_pg_op1 (svbool_t pg, svint64_t op2)
> +{
> +  return svdiv_z (pg, svdup_s64 (0), op2);
> +}
> +
> +/*
> +** s64_m_pg_op1:
> +**   mov     z[0-9]+\.d, p[0-7]/z, #0
> +**      ret
> +*/
> +svint64_t s64_m_pg_op1 (svbool_t pg, svint64_t op2)
> +{
> +  return svdiv_m (pg, svdup_s64 (0), op2);
> +}
> +
> +/*
> +** s64_x_pg_op2:
> +**   mov     z[0-9]+\.b, #0
> +**   ret
> +*/
> +svint64_t s64_x_pg_op2 (svbool_t pg, svint64_t op1)
> +{
> +  return svdiv_x (pg, op1, svdup_s64 (0));
> +}
> +
> +/*
> +** s64_z_pg_op2:
> +**   mov     z[0-9]+\.b, #0
> +**      ret
> +*/
> +svint64_t s64_z_pg_op2 (svbool_t pg, svint64_t op1)
> +{
> +  return svdiv_z (pg, op1, svdup_s64 (0));
> +}
> +
> +/*
> +** s64_m_pg_op2:
> +**   mov     (z[0-9]+)\.b, #0
> +**   mul     (z[0-9]+\.d), p[0-7]+/m, \2, \1\.d
> +**      ret
> +*/
> +svint64_t s64_m_pg_op2 (svbool_t pg, svint64_t op1)
> +{
> +  return svdiv_m (pg, op1, svdup_s64 (0));
> +}
> +
> +/*
> +** s64_m_ptrue_op1:
> +**   mov     z[0-9]+\.b, #0
> +**      ret
> +*/
> +svint64_t s64_m_ptrue_op1 (svint64_t op2)
> +{
> +  return svdiv_m (svptrue_b64 (), svdup_s64 (0), op2);
> +}
> +
> +/*
> +** s64_m_ptrue_op2:
> +**   mov     z[0-9]+\.b, #0
> +**      ret
> +*/
> +svint64_t s64_m_ptrue_op2 (svint64_t op1)
> +{
> +  return svdiv_m (svptrue_b64 (), op1, svdup_s64 (0));
> +}
> +
> +/*
> +** s64_m_ptrue_op1_op2:
> +**   mov     z[0-9]+\.b, #0
> +**      ret
> +*/
> +svint64_t s64_m_ptrue_op1_op2 ()
> +{
> +  return svdiv_m (svptrue_b64 (), svdup_s64 (0), svdup_s64 (0));
> +}

Re: [PATCH 1/2] SVE intrinsics: Fold constant operands for svdiv

Reply via email to