https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111306
Bug ID: 111306 Summary: macro-fusion makes error on conjugate complex multiplication Product: gcc Version: unknown Status: UNCONFIRMED Severity: normal Priority: P3 Component: c++ Assignee: unassigned at gcc dot gnu.org Reporter: joony.wie at samsung dot com Target Milestone: --- It seems that the operands src1 and src2 of "_mm512_fcmul_pch" are swapped for macro-fusion with optimize option. If the operands are swapped, the imag value of result will have incorrect sign bit. So, the operands should not be swapped in these conjugate complex multiplication intrinsics. Let me show the example and the output. output: 3.000000 -4.000000 // w/o optimize. 3.000000 4.000000 // w/ optimize. https://godbolt.org/z/df9Gz18hc // but may not executable ``` #include <immintrin.h> #include <cstdio> __attribute__((optimize("O0"))) auto func0(_Float16 *a, _Float16 *b, int n, _Float16 *c) { __m512h rA = _mm512_loadu_ph(a); for (int i = 0; i < n; i += 32) { __m512h rB = _mm512_loadu_ph(b + i); _mm512_storeu_ph(c + i, _mm512_fcmul_pch(rB, rA)); } } __attribute__((optimize("O"))) auto func1(_Float16 *a, _Float16 *b, int n, _Float16 *c) { __m512h rA = _mm512_loadu_ph(a); for (int i = 0; i < n; i += 32) { __m512h rB = _mm512_loadu_ph(b + i); _mm512_storeu_ph(c + i, _mm512_fcmul_pch(rB, rA)); } } int main() { int n = 32; _Float16 a[n], b[n], c[n]; for (int i = 1; i <= n; i++) { a[i - 1] = i & 1 ? -i : i; b[i - 1] = i; } func0(a, b, n, c); for (int i = 0; i < n / 32 * 2; i++) { printf("%f ", (float)c[i]); } printf("\n"); func1(a, b, n, c); for (int i = 0; i < n / 32 * 2; i++) { printf("%f ", (float)c[i]); } printf("\n"); return 0; } ```