https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95218
--- Comment #8 from Martin Liška <marxin at gcc dot gnu.org> ---
There's partially reduced test-case:
$ cat fma.i
double res_test0101[] = {
-3, 1, 17, 51, 109, 197, 321, 487, 701, 969, 1297,
1691, 2157, 2701, 3329, 4047, 4861, 5777, 6801, 7939, 9197, 10581,
12097, 13751, 15549, 17497, 19601, 21867, 24301, 26909, 29697, 32671};
double res_test0110[] = {3, -1, -17, -51, -109, -197, -321,
-487, -701, -969, -1297, -1691, -2157, -2701,
-3329, -4047, -4861, -5777, -6801, -7939, -9197,
-10581, -12097, -13751, -15549, -17497, -19601,
-21867,
-24301, -26909, -29697, -32671};
extern void abort() __attribute__(()) __attribute__(());
static __inline int __get_cpuid(unsigned int __leaf, unsigned int *__eax,
unsigned int *__ebx, unsigned int *__ecx,
unsigned int *__edx) {
__asm__("cpuid\n\t"
: "=a"(*__eax), "=b"(*__ebx), "=c"(*__ecx), "=d"(*__edx)
: "0"(__leaf));
}
static void fma_test();
int main() {
unsigned int eax, ebx, ecx, edx;
if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) 0;
if (ecx & (1 << 12)) fma_test();
return 0;
}
double m1[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32};
double m2[] = {2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33};
double m3[] = {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34};
double m4[32];
int test_fails = 0;
void compare_result(double *res) {
int i;
int good = 1;
i = 0;
for (i; i < 32; i++)
if (m4[i] != res[i])
if (good) good = 0;
if (!good) test_fails = 1;
}
static void fma_test() {
double __trans_tmp_3;
double __trans_tmp_2;
double __trans_tmp_1;
int i;
for (i = 0; i < 32; i++) m4[i] = 0;
i = 0;
for (i; i < 32; i++) {
double a = m1[i];
double b = m2[i];
double c = m3[i];
__trans_tmp_1 = ((a * b) - c) * a - b;
m4[i] = __trans_tmp_1;
}
compare_result(res_test0101);
i = 0;
for (i; i < 32; i++) {
{
double a = m1[i];
double b = m2[i];
double c = m3[i];
__trans_tmp_3 = -((a * b) - c) * a + b;
}
m4[i] = __trans_tmp_3;
}
compare_result(res_test0110);
i = 0;
for (i; i < 32; i++) {
double a = m1[i];
double b = m2[i];
double c = m3[i];
__trans_tmp_2 = -((a * b) - c) * a - b;
m4[i] = __trans_tmp_2;
}
if (test_fails) abort();
}
$ gcc -O3 -Wno-attributes -mfpmath=sse -mfma fma.i && ./a.out
Aborted (core dumped)