https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111306

            Bug ID: 111306
           Summary: macro-fusion makes error on conjugate complex
                    multiplication
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: joony.wie at samsung dot com
  Target Milestone: ---

It seems that the operands src1 and src2 of "_mm512_fcmul_pch" are swapped for
macro-fusion with optimize option.

If the operands are swapped, the imag value of result will have incorrect sign
bit.

So, the operands should not be swapped in these conjugate complex
multiplication intrinsics.

Let me show the example and the output.

output: 
3.000000 -4.000000 // w/o optimize.
3.000000 4.000000 // w/ optimize.

https://godbolt.org/z/df9Gz18hc // but may not executable
```
#include <immintrin.h>
#include <cstdio>

__attribute__((optimize("O0")))
auto func0(_Float16 *a, _Float16 *b, int n, _Float16 *c) {
  __m512h rA = _mm512_loadu_ph(a);
  for (int i = 0; i < n; i += 32) {
    __m512h rB = _mm512_loadu_ph(b + i);
    _mm512_storeu_ph(c + i, _mm512_fcmul_pch(rB, rA));
  }
}

__attribute__((optimize("O")))
auto func1(_Float16 *a, _Float16 *b, int n, _Float16 *c) {
  __m512h rA = _mm512_loadu_ph(a);
  for (int i = 0; i < n; i += 32) {
    __m512h rB = _mm512_loadu_ph(b + i);
    _mm512_storeu_ph(c + i, _mm512_fcmul_pch(rB, rA));
  }
}

int main() {
  int n = 32;

  _Float16 a[n], b[n], c[n];
  for (int i = 1; i <= n; i++) {
    a[i - 1] = i & 1 ? -i : i;
    b[i - 1] = i;
  }

  func0(a, b, n, c);
    for (int i = 0; i < n / 32 * 2; i++) {
      printf("%f ", (float)c[i]);
    }
    printf("\n");

  func1(a, b, n, c);
    for (int i = 0; i < n / 32 * 2; i++) {
      printf("%f ", (float)c[i]);
    }
    printf("\n");

  return 0;
}
```

Reply via email to