On Mon, 19 Aug 2024, Richard Sandiford wrote: > Jennifer Schmitz <jschm...@nvidia.com> writes: > > This patch implements constant folding for svdiv. A new gimple_folder > > method was added that uses const_binop to fold binary operations using a > > given tree_code. For svdiv, this method is used to fold constant > > operands. > > Additionally, if at least one of the operands is a zero vector, svdiv is > > folded to a zero vector (in case of ptrue, _x, or _z). > > Tests were added to check the produced assembly for different > > predicates and signed and unsigned integers. > > Currently, constant folding is only implemented for integers and binary > > operations, but extending it to float types and other operations is > > planned for a future follow-up. > > > > The patch was bootstrapped and regtested on aarch64-linux-gnu, no > > regression. > > OK for mainline? > > > > Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com> > > > > gcc/ > > > > * config/aarch64/aarch64-sve-builtins-base.cc > > (svdiv_impl::fold): Add constant folding. > > * config/aarch64/aarch64-sve-builtins.cc > > (gimple_folder::const_fold): New method. > > * config/aarch64/aarch64-sve-builtins.h > > (gimple_folder::const_fold): Add function declaration. > > > > gcc/testsuite/ > > > > * gcc.target/aarch64/sve/const_fold_div_1.c: New test. > > * gcc.target/aarch64/sve/const_fold_div_zero.c: Likewise. > > > > From 79355d876503558f661b46ebbeaa11c74ce176cb Mon Sep 17 00:00:00 2001 > > From: Jennifer Schmitz <jschm...@nvidia.com> > > Date: Thu, 15 Aug 2024 05:42:06 -0700 > > Subject: [PATCH 1/2] SVE intrinsics: Fold constant operands for svdiv > > > > This patch implements constant folding for svdiv. A new gimple_folder > > method was added that uses const_binop to fold binary operations using a > > given tree_code. For svdiv, this method is used to fold constant > > operands. > > Additionally, if at least one of the operands is a zero vector, svdiv is > > folded to a zero vector (in case of ptrue, _x, or _z). > > Tests were added to check the produced assembly for different > > predicates and signed and unsigned integers. > > Currently, constant folding is only implemented for integers and binary > > operations, but extending it to float types and other operations is > > planned for a future follow-up. > > > > The patch was bootstrapped and regtested on aarch64-linux-gnu, no > > regression. > > OK for mainline? > > > > Signed-off-by: Jennifer Schmitz <jschm...@nvidia.com> > > > > gcc/ > > > > * config/aarch64/aarch64-sve-builtins-base.cc > > (svdiv_impl::fold): Add constant folding. > > * config/aarch64/aarch64-sve-builtins.cc > > (gimple_folder::const_fold): New method. > > * config/aarch64/aarch64-sve-builtins.h > > (gimple_folder::const_fold): Add function declaration. > > > > gcc/testsuite/ > > > > * gcc.target/aarch64/sve/const_fold_div_1.c: New test. > > * gcc.target/aarch64/sve/const_fold_div_zero.c: Likewise. > > --- > > .../aarch64/aarch64-sve-builtins-base.cc | 30 ++- > > gcc/config/aarch64/aarch64-sve-builtins.cc | 25 +++ > > gcc/config/aarch64/aarch64-sve-builtins.h | 1 + > > .../gcc.target/aarch64/sve/const_fold_div_1.c | 128 ++++++++++++ > > .../aarch64/sve/const_fold_div_zero.c | 186 ++++++++++++++++++ > > .../aarch64/sve/const_fold_mul_zero.c | 95 +++++++++ > > 6 files changed, 462 insertions(+), 3 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_1.c > > create mode 100644 > > gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_zero.c > > create mode 100644 > > gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_zero.c > > > > diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc > > b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > > index d55bee0b72f..7f948ecc0c7 100644 > > --- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc > > +++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc > > @@ -755,8 +755,32 @@ public: > > gimple * > > fold (gimple_folder &f) const override > > { > > - tree divisor = gimple_call_arg (f.call, 2); > > - tree divisor_cst = uniform_integer_cst_p (divisor); > > + tree pg = gimple_call_arg (f.call, 0); > > + tree op1 = gimple_call_arg (f.call, 1); > > + tree op2 = gimple_call_arg (f.call, 2); > > + > > + /* For integer division, if the dividend or divisor are all zeros, > > + fold to zero vector. */ > > + int step = f.type_suffix (0).element_bytes; > > + if (f.pred != PRED_m || is_ptrue (pg, step)) > > + { > > + if (vector_cst_all_same (op1, step) > > + && integer_zerop (VECTOR_CST_ENCODED_ELT (op1, 0))) > > + return gimple_build_assign (f.lhs, op1); > > + if (vector_cst_all_same (op2, step) > > + && integer_zerop (VECTOR_CST_ENCODED_ELT (op2, 0))) > > + return gimple_build_assign (f.lhs, op2); > > + } > > Rather than handle all-zeros as a special case here, I think we should > try to do it elementwise in the const_binop. More below. > > > + > > + /* Try to fold constant operands. */ > > + tree_code m_code = f.type_suffix (0).integer_p ? TRUNC_DIV_EXPR > > + : RDIV_EXPR; > > + if (gimple *new_stmt = f.const_fold (m_code)) > > + return new_stmt; > > + > > + /* If the divisor is a uniform power of 2, fold to a shift > > + instruction. */ > > + tree divisor_cst = uniform_integer_cst_p (op2); > > > > if (!divisor_cst || !integer_pow2p (divisor_cst)) > > return NULL; > > @@ -770,7 +794,7 @@ public: > > shapes::binary_uint_opt_n, MODE_n, > > f.type_suffix_ids, GROUP_none, f.pred); > > call = f.redirect_call (instance); > > - tree d = INTEGRAL_TYPE_P (TREE_TYPE (divisor)) ? divisor : divisor_cst; > > + tree d = INTEGRAL_TYPE_P (TREE_TYPE (op2)) ? op2 : divisor_cst; > > new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d)); > > } > > else > > diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc > > b/gcc/config/aarch64/aarch64-sve-builtins.cc > > index 0a560eaedca..0f69c586464 100644 > > --- a/gcc/config/aarch64/aarch64-sve-builtins.cc > > +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc > > @@ -3691,6 +3691,31 @@ gimple_folder::fold_to_vl_pred (unsigned int vl) > > return gimple_build_assign (lhs, builder.build ()); > > } > > > > +/* If the predicate is svptrue or PRED_x, try to perform constant folding > > + on the call using the given tree_code. > > + Return the new statement on success, otherwise return null. */ > > +gimple * > > +gimple_folder::const_fold (tree_code code) > > +{ > > + tree pg = gimple_call_arg (call, 0); > > + if (type_suffix (0).integer_p > > + && (is_ptrue (pg, type_suffix (0).element_bytes) > > + || pred == PRED_x)) > > + { > > + if (TREE_CODE_CLASS (code) == tcc_binary) > > + { > > + gcc_assert (gimple_call_num_args (call) == 3); > > + tree op1 = gimple_call_arg (call, 1); > > + tree op2 = gimple_call_arg (call, 2); > > + if (TREE_TYPE (op1) != TREE_TYPE (op2)) > > + return NULL; > > I assume this is rejecting the svdiv_n_* case, is that right? > I think we should instead try to handle that too, since the _n > variants are specifically provided as a convenience for uniform divisors. > > It looks like const_binop should just work for that case too, thanks to > the shift handling. (AFAICT, the handling is not explicitly restricted > to shifts.) But if it doesn't, I think it would be a reasonable extension. > > > + if (tree res = const_binop (code, TREE_TYPE (lhs), op1, op2)) > > + return gimple_build_assign (lhs, res); > > Going back to the comment above about handling /0 elementwise: > how about splitting the vector part of const_binop out into a > new public function with the following interface: > > tree vector_const_binop (tree_code code, tree arg1, tree arg2, > tree (*elt_const_binop) (code, tree, tree)) > > where "the vector part" is everything in the function after: > > if (TREE_CODE (arg1) == VECTOR_CST > && TREE_CODE (arg2) == VECTOR_CST > ... > > Then const_binop itself can just use: > > return vector_const_binop (code, arg1, arg2, const_binop); > > whereas aarch64 code can pass its own wrapper that handles the extra > defined cases. +Richi in case he has any thoughts on this.
I think that's reasonable. > I think the starting point for the aarch64 implementation should be > something like: > > if (poly_int_tree_p (arg1) && poly_int_tree_p (arg2)) > { > poly_wide_int poly_res; > tree type = TREE_TYPE (arg1); > signop sign = TYPE_SIGN (type); > wi::overflow_type overflow = wi::OVF_NONE; > > ...if chain of special cases... > else if (!poly_int_binop (poly_res, code, arg1, arg2, sign, &overflow)) > return NULL_TREE; > return force_fit_type (type, poly_res, false, > TREE_OVERFLOW (arg1) | TREE_OVERFLOW (arg2)); > } > return NULL_TREE; > > which is adapted from int_const_binop, and would need poly_int_binop > to become a public function. The key thing here is that we completely > ignore overflow in the calculation, because the semantics of the intrinsics > are that language-level overflow does not happen. > > Thanks, > Richard > > > > + } > > + } > > + return NULL; > > +} > > + > > /* Try to fold the call. Return the new statement on success and null > > on failure. */ > > gimple * > > diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h > > b/gcc/config/aarch64/aarch64-sve-builtins.h > > index 9ab6f202c30..db30225a008 100644 > > --- a/gcc/config/aarch64/aarch64-sve-builtins.h > > +++ b/gcc/config/aarch64/aarch64-sve-builtins.h > > @@ -636,6 +636,7 @@ public: > > gimple *fold_to_pfalse (); > > gimple *fold_to_ptrue (); > > gimple *fold_to_vl_pred (unsigned int); > > + gimple *const_fold (tree_code); > > > > gimple *fold (); > > > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_1.c > > b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_1.c > > new file mode 100644 > > index 00000000000..d8460a4d336 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_1.c > > @@ -0,0 +1,128 @@ > > +/* { dg-final { check-function-bodies "**" "" } } */ > > +/* { dg-options "-O2" } */ > > + > > +#include "arm_sve.h" > > + > > +/* > > +** s64_x_pg: > > +** mov z[0-9]+\.d, #1 > > +** ret > > +*/ > > +svint64_t s64_x_pg (svbool_t pg) > > +{ > > + return svdiv_x (pg, svdup_s64 (5), svdup_s64 (3)); > > +} > > + > > +/* > > +** s64_z_pg: > > +** mov z[0-9]+\.d, p[0-7]/z, #1 > > +** ret > > +*/ > > +svint64_t s64_z_pg (svbool_t pg) > > +{ > > + return svdiv_z (pg, svdup_s64 (5), svdup_s64 (3)); > > +} > > + > > +/* > > +** s64_m_pg: > > +** mov (z[0-9]+\.d), #3 > > +** mov (z[0-9]+\.d), #5 > > +** sdiv \2, p[0-7]/m, \2, \1 > > +** ret > > +*/ > > +svint64_t s64_m_pg (svbool_t pg) > > +{ > > + return svdiv_m (pg, svdup_s64 (5), svdup_s64 (3)); > > +} > > + > > +/* > > +** s64_x_ptrue: > > +** mov z[0-9]+\.d, #1 > > +** ret > > +*/ > > +svint64_t s64_x_ptrue () > > +{ > > + return svdiv_x (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3)); > > +} > > + > > +/* > > +** s64_z_ptrue: > > +** mov z[0-9]+\.d, #1 > > +** ret > > +*/ > > +svint64_t s64_z_ptrue () > > +{ > > + return svdiv_z (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3)); > > +} > > + > > +/* > > +** s64_m_ptrue: > > +** mov z[0-9]+\.d, #1 > > +** ret > > +*/ > > +svint64_t s64_m_ptrue () > > +{ > > + return svdiv_m (svptrue_b64 (), svdup_s64 (5), svdup_s64 (3)); > > +} > > + > > +/* > > +** u64_x_pg: > > +** mov z[0-9]+\.d, #1 > > +** ret > > +*/ > > +svuint64_t u64_x_pg (svbool_t pg) > > +{ > > + return svdiv_x (pg, svdup_u64 (5), svdup_u64 (3)); > > +} > > + > > +/* > > +** u64_z_pg: > > +** mov z[0-9]+\.d, p[0-7]/z, #1 > > +** ret > > +*/ > > +svuint64_t u64_z_pg (svbool_t pg) > > +{ > > + return svdiv_z (pg, svdup_u64 (5), svdup_u64 (3)); > > +} > > + > > +/* > > +** u64_m_pg: > > +** mov (z[0-9]+\.d), #3 > > +** mov (z[0-9]+\.d), #5 > > +** udiv \2, p[0-7]/m, \2, \1 > > +** ret > > +*/ > > +svuint64_t u64_m_pg (svbool_t pg) > > +{ > > + return svdiv_m (pg, svdup_u64 (5), svdup_u64 (3)); > > +} > > + > > +/* > > +** u64_x_ptrue: > > +** mov z[0-9]+\.d, #1 > > +** ret > > +*/ > > +svuint64_t u64_x_ptrue () > > +{ > > + return svdiv_x (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3)); > > +} > > + > > +/* > > +** u64_z_ptrue: > > +** mov z[0-9]+\.d, #1 > > +** ret > > +*/ > > +svuint64_t u64_z_ptrue () > > +{ > > + return svdiv_z (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3)); > > +} > > + > > +/* > > +** u64_m_ptrue: > > +** mov z[0-9]+\.d, #1 > > +** ret > > +*/ > > +svuint64_t u64_m_ptrue () > > +{ > > + return svdiv_m (svptrue_b64 (), svdup_u64 (5), svdup_u64 (3)); > > +} > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_zero.c > > b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_zero.c > > new file mode 100644 > > index 00000000000..00d14a46ced > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_div_zero.c > > @@ -0,0 +1,186 @@ > > +/* { dg-final { check-function-bodies "**" "" } } */ > > +/* { dg-options "-O2" } */ > > + > > +#include "arm_sve.h" > > + > > +/* > > +** s64_x_pg_op1: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svint64_t s64_x_pg_op1 (svbool_t pg, svint64_t op2) > > +{ > > + return svdiv_x (pg, svdup_s64 (0), op2); > > +} > > + > > +/* > > +** s64_z_pg_op1: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svint64_t s64_z_pg_op1 (svbool_t pg, svint64_t op2) > > +{ > > + return svdiv_z (pg, svdup_s64 (0), op2); > > +} > > + > > +/* > > +** s64_m_pg_op1: > > +** mov z[0-9]+\.d, p[0-7]/z, #0 > > +** ret > > +*/ > > +svint64_t s64_m_pg_op1 (svbool_t pg, svint64_t op2) > > +{ > > + return svdiv_m (pg, svdup_s64 (0), op2); > > +} > > + > > +/* > > +** s64_x_pg_op2: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svint64_t s64_x_pg_op2 (svbool_t pg, svint64_t op1) > > +{ > > + return svdiv_x (pg, op1, svdup_s64 (0)); > > +} > > + > > +/* > > +** s64_z_pg_op2: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svint64_t s64_z_pg_op2 (svbool_t pg, svint64_t op1) > > +{ > > + return svdiv_z (pg, op1, svdup_s64 (0)); > > +} > > + > > +/* > > +** s64_m_pg_op2: > > +** mov (z[0-9]+)\.b, #0 > > +** sdiv (z[0-9]+\.d), p[0-7]/m, \2, \1\.d > > +** ret > > +*/ > > +svint64_t s64_m_pg_op2 (svbool_t pg, svint64_t op1) > > +{ > > + return svdiv_m (pg, op1, svdup_s64 (0)); > > +} > > + > > +/* > > +** s64_m_ptrue_op1: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svint64_t s64_m_ptrue_op1 (svint64_t op2) > > +{ > > + return svdiv_m (svptrue_b64 (), svdup_s64 (0), op2); > > +} > > + > > +/* > > +** s64_m_ptrue_op2: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svint64_t s64_m_ptrue_op2 (svint64_t op1) > > +{ > > + return svdiv_m (svptrue_b64 (), op1, svdup_s64 (0)); > > +} > > + > > +/* > > +** s64_m_ptrue_op1_op2: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svint64_t s64_m_ptrue_op1_op2 () > > +{ > > + return svdiv_m (svptrue_b64 (), svdup_s64 (0), svdup_s64 (0)); > > +} > > + > > +/* > > +** u64_x_pg_op1: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svuint64_t u64_x_pg_op1 (svbool_t pg, svuint64_t op2) > > +{ > > + return svdiv_x (pg, svdup_u64 (0), op2); > > +} > > + > > +/* > > +** u64_z_pg_op1: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svuint64_t u64_z_pg_op1 (svbool_t pg, svuint64_t op2) > > +{ > > + return svdiv_z (pg, svdup_u64 (0), op2); > > +} > > + > > +/* > > +** u64_m_pg_op1: > > +** mov z[0-9]+\.d, p[0-7]/z, #0 > > +** ret > > +*/ > > +svuint64_t u64_m_pg_op1 (svbool_t pg, svuint64_t op2) > > +{ > > + return svdiv_m (pg, svdup_u64 (0), op2); > > +} > > + > > +/* > > +** u64_x_pg_op2: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svuint64_t u64_x_pg_op2 (svbool_t pg, svuint64_t op1) > > +{ > > + return svdiv_x (pg, op1, svdup_u64 (0)); > > +} > > + > > +/* > > +** u64_z_pg_op2: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svuint64_t u64_z_pg_op2 (svbool_t pg, svuint64_t op1) > > +{ > > + return svdiv_z (pg, op1, svdup_u64 (0)); > > +} > > + > > +/* > > +** u64_m_pg_op2: > > +** mov (z[0-9]+)\.b, #0 > > +** udiv (z[0-9]+\.d), p[0-7]/m, \2, \1\.d > > +** ret > > +*/ > > +svuint64_t u64_m_pg_op2 (svbool_t pg, svuint64_t op1) > > +{ > > + return svdiv_m (pg, op1, svdup_u64 (0)); > > +} > > + > > +/* > > +** u64_m_ptrue_op1: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svuint64_t u64_m_ptrue_op1 (svuint64_t op2) > > +{ > > + return svdiv_m (svptrue_b64 (), svdup_u64 (0), op2); > > +} > > + > > +/* > > +** u64_m_ptrue_op2: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svuint64_t u64_m_ptrue_op2 (svuint64_t op1) > > +{ > > + return svdiv_m (svptrue_b64 (), op1, svdup_u64 (0)); > > +} > > + > > +/* > > +** u64_m_ptrue_op1_op2: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svuint64_t u64_m_ptrue_op1_op2 () > > +{ > > + return svdiv_m (svptrue_b64 (), svdup_u64 (0), svdup_u64 (0)); > > +} > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_zero.c > > b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_zero.c > > new file mode 100644 > > index 00000000000..793291449c1 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/const_fold_mul_zero.c > > @@ -0,0 +1,95 @@ > > +/* { dg-final { check-function-bodies "**" "" } } */ > > +/* { dg-options "-O2" } */ > > + > > +#include "arm_sve.h" > > + > > +/* > > +** s64_x_pg_op1: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svint64_t s64_x_pg_op1 (svbool_t pg, svint64_t op2) > > +{ > > + return svmul_x (pg, svdup_s64 (0), op2); > > +} > > + > > +/* > > +** s64_z_pg_op1: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svint64_t s64_z_pg_op1 (svbool_t pg, svint64_t op2) > > +{ > > + return svdiv_z (pg, svdup_s64 (0), op2); > > +} > > + > > +/* > > +** s64_m_pg_op1: > > +** mov z[0-9]+\.d, p[0-7]/z, #0 > > +** ret > > +*/ > > +svint64_t s64_m_pg_op1 (svbool_t pg, svint64_t op2) > > +{ > > + return svdiv_m (pg, svdup_s64 (0), op2); > > +} > > + > > +/* > > +** s64_x_pg_op2: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svint64_t s64_x_pg_op2 (svbool_t pg, svint64_t op1) > > +{ > > + return svdiv_x (pg, op1, svdup_s64 (0)); > > +} > > + > > +/* > > +** s64_z_pg_op2: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svint64_t s64_z_pg_op2 (svbool_t pg, svint64_t op1) > > +{ > > + return svdiv_z (pg, op1, svdup_s64 (0)); > > +} > > + > > +/* > > +** s64_m_pg_op2: > > +** mov (z[0-9]+)\.b, #0 > > +** mul (z[0-9]+\.d), p[0-7]+/m, \2, \1\.d > > +** ret > > +*/ > > +svint64_t s64_m_pg_op2 (svbool_t pg, svint64_t op1) > > +{ > > + return svdiv_m (pg, op1, svdup_s64 (0)); > > +} > > + > > +/* > > +** s64_m_ptrue_op1: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svint64_t s64_m_ptrue_op1 (svint64_t op2) > > +{ > > + return svdiv_m (svptrue_b64 (), svdup_s64 (0), op2); > > +} > > + > > +/* > > +** s64_m_ptrue_op2: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svint64_t s64_m_ptrue_op2 (svint64_t op1) > > +{ > > + return svdiv_m (svptrue_b64 (), op1, svdup_s64 (0)); > > +} > > + > > +/* > > +** s64_m_ptrue_op1_op2: > > +** mov z[0-9]+\.b, #0 > > +** ret > > +*/ > > +svint64_t s64_m_ptrue_op1_op2 () > > +{ > > + return svdiv_m (svptrue_b64 (), svdup_s64 (0), svdup_s64 (0)); > > +} > -- Richard Biener <rguent...@suse.de> SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)