https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122263

            Bug ID: 122263
           Summary: cannot autovectorize max reduction of double
           Product: gcc
           Version: 16.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: manu at gcc dot gnu.org
  Target Milestone: ---

```
typedef unsigned char dimension_t;

#define MAX(x,y) ((x)>(y)?(x):(y))
#define ASSUME(EXPR) __attribute__((__assume__(EXPR)))
//#define MAX(x,y) fmax((x),(y))

double
epsilon(const double * restrict pa, const double * restrict pb,
dimension_t dim)
{
    double eps_max = 0;
    for (dimension_t d=0; d < dim; d++) {
        ASSUME(__builtin_isfinite(pa[d]) && __builtin_isfinite(pb[d]));
        ASSUME(pa[d] > 0 && pb[d] > 0);
        double eps_tmp = pa[d] / pb[d];
        eps_max = MAX(eps_max, eps_tmp);
    }
    return eps_max;
}

int
epsilon2(const double * restrict pa, const double * restrict pb,
dimension_t dim)
{
    int eps_max = 0;
    for (dimension_t d=0; d < dim; d++) {
        int eps_tmp = pa[d] / pb[d];
        eps_max = MAX(eps_max, eps_tmp);
    }
    return eps_max;
}

double
epsilon3(const double * restrict pa, const double * restrict pb,
dimension_t dim)
{
    double eps_max = 0;
    #pragma omp simd reduction(max:eps_max)
    for (dimension_t d=1; d < dim; d++) {
        double eps_tmp = pa[d] / pb[d];
        eps_max = MAX(eps_max, eps_tmp);
    }
    return eps_max;
}
```

gcc -O3 -march=x86-64-v3 -Wall -Wextra -fopt-info-vec-missed-optimized -fopenmp
epsilon.c

produces:

<source>:12:29: missed: couldn't vectorize loop
<source>:8:1: missed: not vectorized: unsupported use in stmt.
<source>:26:29: optimized: loop vectorized using 32 byte vectors and unroll
factor 8
<source>:26:29: optimized: epilogue loop vectorized using 16 byte vectors and
unroll factor 4
<source>:38:13: optimized: loop vectorized using 32 byte vectors and unroll
factor 4
<source>:40:9: optimized: loop vectorized using 32 byte vectors and unroll
factor 4
<source>:38:13: optimized: loop vectorized using 32 byte vectors and unroll
factor 4

That is, epsilon() is not vectorized but the other two functions are.

Adding -ffinite-math-only -fno-signed-zeros does allow autovectorization, but
the ASSUME() conditions should be sufficient to reach the same outcome.

https://godbolt.org/z/753sq4vMs

Reply via email to