https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122263
Bug ID: 122263
Summary: cannot autovectorize max reduction of double
Product: gcc
Version: 16.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: middle-end
Assignee: unassigned at gcc dot gnu.org
Reporter: manu at gcc dot gnu.org
Target Milestone: ---
```
typedef unsigned char dimension_t;
#define MAX(x,y) ((x)>(y)?(x):(y))
#define ASSUME(EXPR) __attribute__((__assume__(EXPR)))
//#define MAX(x,y) fmax((x),(y))
double
epsilon(const double * restrict pa, const double * restrict pb,
dimension_t dim)
{
double eps_max = 0;
for (dimension_t d=0; d < dim; d++) {
ASSUME(__builtin_isfinite(pa[d]) && __builtin_isfinite(pb[d]));
ASSUME(pa[d] > 0 && pb[d] > 0);
double eps_tmp = pa[d] / pb[d];
eps_max = MAX(eps_max, eps_tmp);
}
return eps_max;
}
int
epsilon2(const double * restrict pa, const double * restrict pb,
dimension_t dim)
{
int eps_max = 0;
for (dimension_t d=0; d < dim; d++) {
int eps_tmp = pa[d] / pb[d];
eps_max = MAX(eps_max, eps_tmp);
}
return eps_max;
}
double
epsilon3(const double * restrict pa, const double * restrict pb,
dimension_t dim)
{
double eps_max = 0;
#pragma omp simd reduction(max:eps_max)
for (dimension_t d=1; d < dim; d++) {
double eps_tmp = pa[d] / pb[d];
eps_max = MAX(eps_max, eps_tmp);
}
return eps_max;
}
```
gcc -O3 -march=x86-64-v3 -Wall -Wextra -fopt-info-vec-missed-optimized -fopenmp
epsilon.c
produces:
<source>:12:29: missed: couldn't vectorize loop
<source>:8:1: missed: not vectorized: unsupported use in stmt.
<source>:26:29: optimized: loop vectorized using 32 byte vectors and unroll
factor 8
<source>:26:29: optimized: epilogue loop vectorized using 16 byte vectors and
unroll factor 4
<source>:38:13: optimized: loop vectorized using 32 byte vectors and unroll
factor 4
<source>:40:9: optimized: loop vectorized using 32 byte vectors and unroll
factor 4
<source>:38:13: optimized: loop vectorized using 32 byte vectors and unroll
factor 4
That is, epsilon() is not vectorized but the other two functions are.
Adding -ffinite-math-only -fno-signed-zeros does allow autovectorization, but
the ASSUME() conditions should be sufficient to reach the same outcome.
https://godbolt.org/z/753sq4vMs