Hello world, the attached patch moves the zeroing of the result variable before the early return. This is done so that the result for zero-sized arguments is still valid.
The bug was only in the library version, but I have also added a test case for the inline version to make sure the bug does not suddenly appear there. OK for trunk? Regards Thomas 2017-06-05 Thomas Koenig <tkoe...@gcc.gnu.org> PR fortran/80975 * m4/matmul_internal.m4: Move zeroing before early return. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Regenerated. * generated/matmul_c4.c: Regenerated. * generated/matmul_c8.c: Regenerated. * generated/matmul_i1.c: Regenerated. * generated/matmul_i16.c: Regenerated. * generated/matmul_i2.c: Regenerated. * generated/matmul_i4.c: Regenerated. * generated/matmul_i8.c: Regenerated. * generated/matmul_r10.c: Regenerated. * generated/matmul_r16.c: Regenerated. * generated/matmul_r4.c: Regenerated. * generated/matmul_r8.c: Regenerated. * generated/matmulavx128_c10.c: Regenerated. * generated/matmulavx128_c16.c: Regenerated. * generated/matmulavx128_c4.c: Regenerated. * generated/matmulavx128_c8.c: Regenerated. * generated/matmulavx128_i1.c: Regenerated. * generated/matmulavx128_i16.c: Regenerated. * generated/matmulavx128_i2.c: Regenerated. * generated/matmulavx128_i4.c: Regenerated. * generated/matmulavx128_i8.c: Regenerated. * generated/matmulavx128_r10.c: Regenerated. * generated/matmulavx128_r16.c: Regenerated. * generated/matmulavx128_r4.c: Regenerated.
Index: generated/matmul_c10.c =================================================================== --- generated/matmul_c10.c (Revision 248472) +++ generated/matmul_c10.c (Arbeitskopie) @@ -307,6 +307,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmul_c16.c =================================================================== --- generated/matmul_c16.c (Revision 248472) +++ generated/matmul_c16.c (Arbeitskopie) @@ -307,6 +307,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmul_c4.c =================================================================== --- generated/matmul_c4.c (Revision 248472) +++ generated/matmul_c4.c (Arbeitskopie) @@ -307,6 +307,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmul_c8.c =================================================================== --- generated/matmul_c8.c (Revision 248472) +++ generated/matmul_c8.c (Arbeitskopie) @@ -307,6 +307,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmul_i1.c =================================================================== --- generated/matmul_i1.c (Revision 248472) +++ generated/matmul_i1.c (Arbeitskopie) @@ -307,6 +307,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_1)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_1)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_1)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_1)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_1)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmul_i16.c =================================================================== --- generated/matmul_i16.c (Revision 248472) +++ generated/matmul_i16.c (Arbeitskopie) @@ -307,6 +307,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmul_i2.c =================================================================== --- generated/matmul_i2.c (Revision 248472) +++ generated/matmul_i2.c (Arbeitskopie) @@ -307,6 +307,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_2)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_2)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_2)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_2)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_2)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmul_i4.c =================================================================== --- generated/matmul_i4.c (Revision 248472) +++ generated/matmul_i4.c (Arbeitskopie) @@ -307,6 +307,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmul_i8.c =================================================================== --- generated/matmul_i8.c (Revision 248472) +++ generated/matmul_i8.c (Arbeitskopie) @@ -307,6 +307,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmul_r10.c =================================================================== --- generated/matmul_r10.c (Revision 248472) +++ generated/matmul_r10.c (Arbeitskopie) @@ -307,6 +307,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmul_r16.c =================================================================== --- generated/matmul_r16.c (Revision 248472) +++ generated/matmul_r16.c (Arbeitskopie) @@ -307,6 +307,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmul_r4.c =================================================================== --- generated/matmul_r4.c (Revision 248472) +++ generated/matmul_r4.c (Arbeitskopie) @@ -307,6 +307,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmul_r8.c =================================================================== --- generated/matmul_r8.c (Revision 248472) +++ generated/matmul_r8.c (Arbeitskopie) @@ -307,6 +307,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -319,11 +324,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -859,6 +859,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -871,11 +876,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1411,6 +1411,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1423,11 +1428,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -1977,6 +1977,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -1989,11 +1994,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -2603,6 +2603,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -2615,11 +2620,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmulavx128_c10.c =================================================================== --- generated/matmulavx128_c10.c (Revision 248472) +++ generated/matmulavx128_c10.c (Arbeitskopie) @@ -272,6 +272,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmulavx128_c16.c =================================================================== --- generated/matmulavx128_c16.c (Revision 248472) +++ generated/matmulavx128_c16.c (Arbeitskopie) @@ -272,6 +272,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmulavx128_c4.c =================================================================== --- generated/matmulavx128_c4.c (Revision 248472) +++ generated/matmulavx128_c4.c (Arbeitskopie) @@ -272,6 +272,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmulavx128_c8.c =================================================================== --- generated/matmulavx128_c8.c (Revision 248472) +++ generated/matmulavx128_c8.c (Arbeitskopie) @@ -272,6 +272,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_COMPLEX_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_COMPLEX_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmulavx128_i1.c =================================================================== --- generated/matmulavx128_i1.c (Revision 248472) +++ generated/matmulavx128_i1.c (Arbeitskopie) @@ -272,6 +272,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_1)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_1)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_1)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmulavx128_i16.c =================================================================== --- generated/matmulavx128_i16.c (Revision 248472) +++ generated/matmulavx128_i16.c (Arbeitskopie) @@ -272,6 +272,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmulavx128_i2.c =================================================================== --- generated/matmulavx128_i2.c (Revision 248472) +++ generated/matmulavx128_i2.c (Arbeitskopie) @@ -272,6 +272,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_2)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_2)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_2)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmulavx128_i4.c =================================================================== --- generated/matmulavx128_i4.c (Revision 248472) +++ generated/matmulavx128_i4.c (Arbeitskopie) @@ -272,6 +272,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmulavx128_i8.c =================================================================== --- generated/matmulavx128_i8.c (Revision 248472) +++ generated/matmulavx128_i8.c (Arbeitskopie) @@ -272,6 +272,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_INTEGER_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_INTEGER_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmulavx128_r10.c =================================================================== --- generated/matmulavx128_r10.c (Revision 248472) +++ generated/matmulavx128_r10.c (Arbeitskopie) @@ -272,6 +272,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_10)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_10)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_10)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmulavx128_r16.c =================================================================== --- generated/matmulavx128_r16.c (Revision 248472) +++ generated/matmulavx128_r16.c (Arbeitskopie) @@ -272,6 +272,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_16)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_16)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_16)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmulavx128_r4.c =================================================================== --- generated/matmulavx128_r4.c (Revision 248472) +++ generated/matmulavx128_r4.c (Arbeitskopie) @@ -272,6 +272,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_4)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_4)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_4)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: generated/matmulavx128_r8.c =================================================================== --- generated/matmulavx128_r8.c (Revision 248472) +++ generated/matmulavx128_r8.c (Arbeitskopie) @@ -272,6 +272,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -284,11 +289,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) @@ -825,6 +825,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = (GFC_REAL_8)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -837,11 +842,6 @@ t1 = malloc (t1_dim * sizeof(GFC_REAL_8)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = (GFC_REAL_8)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512) Index: m4/matmul_internal.m4 =================================================================== --- m4/matmul_internal.m4 (Revision 248467) +++ m4/matmul_internal.m4 (Arbeitskopie) @@ -223,6 +223,11 @@ b_offset = 1 + b_dim1; b -= b_offset; + /* Empty c first. */ + for (j=1; j<=n; j++) + for (i=1; i<=m; i++) + c[i + j * c_dim1] = ('rtype_name`)0; + /* Early exit if possible */ if (m == 0 || n == 0 || k == 0) return; @@ -235,11 +240,6 @@ t1 = malloc (t1_dim * sizeof('rtype_name`)); - /* Empty c first. */ - for (j=1; j<=n; j++) - for (i=1; i<=m; i++) - c[i + j * c_dim1] = ('rtype_name`)0; - /* Start turning the crank. */ i1 = n; for (jj = 1; jj <= i1; jj += 512)
! { dg-do run } ! { dg-options "-finline-matmul-limit=0" } ! PR 80975 - this did not zero the result array program bogus_matmul implicit none real :: M(3,0), v(0), w(3) w = 7 w = matmul(M,v) if( any(w .ne. 0) ) then call abort end if end program bogus_matmul
! { dg-do run } ! { dg-options "-O -finline-matmul-limit=100 -fdump-tree-optimized" } ! PR 80975 - this did not zero the result array in the library version; ! make sure this also doesn't happen in the inline version. program bogus_matmul implicit none real :: M(3,0), v(0), w(3) w = 7 w = matmul(M,v) if( any(w .ne. 0) ) then call abort end if end program bogus_matmul ! { dg-final { scan-tree-dump-times "matmul_r4" 0 "optimized" } }