On Mon, 23 Feb 2026, Tamar Christina wrote:

> The loop
> 
> void
> __attribute__((noipa))
> bug (int f, int *w, int l)
> {
>   int i;
>   for (i = 0; i < l; ++i)
>     while (f % w[i]) w[i]--;
> }
> 
> is an uncounted loop which performs a trapping operation during vectorization.
> Normally the vectorizer doesn't stop vectorization if the operation can't be
> masked (though ifcvt does).
> 
> For Early breaks however this is unsafe as we are introducing a trap where the
> original scalar code may not have.
> 
> reductions however don't require masking when the scalar epilogue is used to
> restart the last iteration as we use the previous value of the vector in that
> case and don't reduce "inactive" lanes.  This would need to be adjusted when 
> we
> support staying inside the vector loop directly.
> 
> Some tests now fail to vectorize (including some tsvc ones) which I could have
> xfail'ed but instead decided to add -fno-trapping-math just to get the
> additional coverage they provide.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
> 
> Any objections?

I was expecting the check to be in vect_analyze_early_break_dependences
since I think there is a special thing about early break vectorization
when a trapping operation is before the early exit condition or does
feed it, like for the testcase at hand.  Those operations cannot be
masked since the very mask is not yet computed.

There might be other latent issues with trapping operations but I'd
rather split those out.

So, am I missing something?

Thanks,
Richard.

> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       PR tree-optimization/124142
>       * tree-vect-stmts.cc (vectorizable_call, vectorizable_operation):
>       For early break require masking when operation can trap.
> 
> gcc/testsuite/ChangeLog:
> 
>       PR tree-optimization/124142
>       * gcc.dg/vect/tsvc/vect-tsvc-s481.c: Add -fno-trapping-math.
>       * gcc.dg/vect/tsvc/vect-tsvc-s482.c: Likewise.
>       * gcc.dg/vect/vect-early-break_61.c: Likewise.
>       * gcc.target/aarch64/vect-early-break-cbranch_3.c: Likewise.
>       * gcc.dg/vect/vect-early-break_143-pr124142.c: New test.
> 
> ---
> diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c 
> b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c
> index 
> e4433385d6686806a75fffe22f90e3bfb603564c..23c9961691d2f0c9528949c5c44dae3e264dc3ec
>  100644
> --- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c
> +++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s481.c
> @@ -1,7 +1,7 @@
>  /*  This file is distributed under the University of Illinois Open Source
>      License. See license.txt for details.  */
>  
> -/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
> +/* { dg-additional-options "--param vect-epilogues-nomask=0 
> -fno-trapping-math" } */
>  /* { dg-require-effective-target vect_float } */
>  /* { dg-require-effective-target vect_early_break_hw } */
>  /* { dg-add-options vect_early_break } */
> @@ -41,4 +41,4 @@ int main (int argc, char **argv)
>    return 0;
>  }
>  
> -/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! 
> vect_early_break} } } } */
> +/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail { ! 
> vect_early_break } } } } */
> diff --git a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c 
> b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c
> index 
> 146df409ecc64e9535583c0d0083469d7aa24031..38426845a3d115432bfc68d9e1e7539fe10e27e2
>  100644
> --- a/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c
> +++ b/gcc/testsuite/gcc.dg/vect/tsvc/vect-tsvc-s482.c
> @@ -1,7 +1,7 @@
>  /*  This file is distributed under the University of Illinois Open Source
>      License. See license.txt for details.  */
>  
> -/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
> +/* { dg-additional-options "--param vect-epilogues-nomask=0 
> -fno-trapping-math" } */
>  /* { dg-require-effective-target vect_float } */
>  /* { dg-require-effective-target vect_early_break_hw } */
>  /* { dg-add-options vect_early_break } */
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_143-pr124142.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_143-pr124142.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..b9141e3f15a9d152118c2d8e84fc45d5e83066cb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_143-pr124142.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +
> +/* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" } } */
> +
> +void
> +__attribute__((noipa))
> +bug (int f, int *w, int l)
> +{
> +  int i;
> +  for (i = 0; i < l; ++i)
> +    while (f % w[i]) w[i]--;
> +}
> +
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_61.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_61.c
> index 
> c789ec01f32c6b958c6a3663531a7b7517b94477..0cd20549cdc656baad2b5561e39ca999bca98bb0
>  100644
> --- a/gcc/testsuite/gcc.dg/vect/vect-early-break_61.c
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_61.c
> @@ -2,6 +2,7 @@
>  /* { dg-do compile } */
>  /* { dg-require-effective-target vect_early_break } */
>  /* { dg-require-effective-target vect_float } */
> +/* { dg-additional-options "-fno-trapping-math" } */
>  
>  typedef float real_t;
>  __attribute__((aligned(64))) real_t a[32000], b[32000], c[32000];
> diff --git a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c 
> b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c
> index 
> 8980b9f04f9a15ded9e90954e25b2c0578681761..b252b80ef291efd4f6fc98224a44ef7fadc68d6c
>  100644
> --- a/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c
> +++ b/gcc/testsuite/gcc.target/aarch64/vect-early-break-cbranch_3.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-options "-O3 -fno-schedule-insns -fno-reorder-blocks 
> -fno-schedule-insns2 --param aarch64-autovec-preference=asimd-only" } */
> +/* { dg-options "-O3 -fno-trapping-math -fno-schedule-insns 
> -fno-reorder-blocks -fno-schedule-insns2 --param 
> aarch64-autovec-preference=asimd-only" } */
>  /* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
>  
>  #pragma GCC target "+sve"
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 
> 22285250aa8d4f600721c647a83d1b2bafe7ef2a..cb7217ad6f92ad4b2bd543733c802606f3fba03e
>  100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -3650,6 +3650,18 @@ vectorizable_call (vec_info *vinfo,
>             LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
>           }
>       }
> +
> +      /* If the operation traps, and we're an early break loop then don't 
> allow
> +      vectorization if masking isn't supported.  */
> +      if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> +     {
> +       if (dump_enabled_p ())
> +         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                          "can't use a fully-masked loop and loop has "
> +                          "multiple exits.  Cannot vectorize as operation "
> +                          "may trap.\n");
> +       return false;
> +     }
>      }
>  
>    /* If that fails, try asking for a target-specific built-in function.  */
> @@ -6792,7 +6804,8 @@ vectorizable_operation (vec_info *vinfo,
>       Similarly, if this operation is part of a reduction, a fully-masked
>       loop should only change the active lanes of the reduction chain,
>       keeping the inactive lanes as-is.  */
> -  bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
> +  bool could_trap_p = gimple_could_trap_p (stmt);
> +  bool mask_out_inactive = ((!is_invariant && could_trap_p)
>                           || reduc_idx >= 0);
>  
>    if (cost_vec) /* transformation not required.  */
> @@ -6821,6 +6834,23 @@ vectorizable_operation (vec_info *vinfo,
>           }
>       }
>  
> +      /* If the operation traps, and we're an early break loop then don't 
> allow
> +      vectorization if masking isn't supported.  Reductions are OK because if
> +      we take an early exit, the epilog will get the value of the previous
> +      iterations and we recompute the remainder.  */
> +      if (loop_vinfo
> +       && could_trap_p
> +       && !LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
> +       && LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> +     {
> +       if (dump_enabled_p ())
> +         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +                          "can't use a fully-masked loop and loop has "
> +                          "multiple exits.  Cannot vectorize as operation "
> +                          "may trap.\n");
> +       return false;
> +     }
> +
>        /* Put types on constant and invariant SLP children.  */
>        if (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
>         || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
> 
> 
> 

-- 
Richard Biener <[email protected]>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Jochen Jaser, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to