Hello world,

the attached patch moves the zeroing of the result variable before the
early return. This is done so that the result for zero-sized arguments
is still valid.

The bug was only in the library version, but I have also added a test
case for the inline version to make sure the bug does not suddenly
appear there.

OK for trunk?

Regards

        Thomas

2017-06-05  Thomas Koenig  <tkoe...@gcc.gnu.org>

        PR fortran/80975
        * m4/matmul_internal.m4:  Move zeroing before early return.
        * generated/matmul_c10.c: Regenerated.
        * generated/matmul_c16.c: Regenerated.
        * generated/matmul_c4.c: Regenerated.
        * generated/matmul_c8.c: Regenerated.
        * generated/matmul_i1.c: Regenerated.
        * generated/matmul_i16.c: Regenerated.
        * generated/matmul_i2.c: Regenerated.
        * generated/matmul_i4.c: Regenerated.
        * generated/matmul_i8.c: Regenerated.
        * generated/matmul_r10.c: Regenerated.
        * generated/matmul_r16.c: Regenerated.
        * generated/matmul_r4.c: Regenerated.
        * generated/matmul_r8.c: Regenerated.
        * generated/matmulavx128_c10.c: Regenerated.
        * generated/matmulavx128_c16.c: Regenerated.
        * generated/matmulavx128_c4.c: Regenerated.
        * generated/matmulavx128_c8.c: Regenerated.
        * generated/matmulavx128_i1.c: Regenerated.
        * generated/matmulavx128_i16.c: Regenerated.
        * generated/matmulavx128_i2.c: Regenerated.
        * generated/matmulavx128_i4.c: Regenerated.
        * generated/matmulavx128_i8.c: Regenerated.
        * generated/matmulavx128_r10.c: Regenerated.
        * generated/matmulavx128_r16.c: Regenerated.
        * generated/matmulavx128_r4.c: Regenerated.
Index: generated/matmul_c10.c
===================================================================
--- generated/matmul_c10.c	(Revision 248472)
+++ generated/matmul_c10.c	(Arbeitskopie)
@@ -307,6 +307,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -319,11 +324,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -859,6 +859,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -871,11 +876,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1411,6 +1411,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1423,11 +1428,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1977,6 +1977,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1989,11 +1994,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -2603,6 +2603,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -2615,11 +2620,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmul_c16.c
===================================================================
--- generated/matmul_c16.c	(Revision 248472)
+++ generated/matmul_c16.c	(Arbeitskopie)
@@ -307,6 +307,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -319,11 +324,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -859,6 +859,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -871,11 +876,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1411,6 +1411,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1423,11 +1428,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1977,6 +1977,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1989,11 +1994,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -2603,6 +2603,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -2615,11 +2620,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmul_c4.c
===================================================================
--- generated/matmul_c4.c	(Revision 248472)
+++ generated/matmul_c4.c	(Arbeitskopie)
@@ -307,6 +307,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -319,11 +324,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -859,6 +859,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -871,11 +876,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1411,6 +1411,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1423,11 +1428,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1977,6 +1977,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1989,11 +1994,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -2603,6 +2603,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -2615,11 +2620,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmul_c8.c
===================================================================
--- generated/matmul_c8.c	(Revision 248472)
+++ generated/matmul_c8.c	(Arbeitskopie)
@@ -307,6 +307,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -319,11 +324,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -859,6 +859,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -871,11 +876,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1411,6 +1411,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1423,11 +1428,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1977,6 +1977,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1989,11 +1994,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -2603,6 +2603,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -2615,11 +2620,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmul_i1.c
===================================================================
--- generated/matmul_i1.c	(Revision 248472)
+++ generated/matmul_i1.c	(Arbeitskopie)
@@ -307,6 +307,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -319,11 +324,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -859,6 +859,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -871,11 +876,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1411,6 +1411,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1423,11 +1428,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1977,6 +1977,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1989,11 +1994,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -2603,6 +2603,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -2615,11 +2620,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmul_i16.c
===================================================================
--- generated/matmul_i16.c	(Revision 248472)
+++ generated/matmul_i16.c	(Arbeitskopie)
@@ -307,6 +307,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -319,11 +324,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -859,6 +859,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -871,11 +876,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1411,6 +1411,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1423,11 +1428,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1977,6 +1977,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1989,11 +1994,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -2603,6 +2603,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -2615,11 +2620,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmul_i2.c
===================================================================
--- generated/matmul_i2.c	(Revision 248472)
+++ generated/matmul_i2.c	(Arbeitskopie)
@@ -307,6 +307,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -319,11 +324,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -859,6 +859,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -871,11 +876,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1411,6 +1411,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1423,11 +1428,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1977,6 +1977,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1989,11 +1994,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -2603,6 +2603,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -2615,11 +2620,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmul_i4.c
===================================================================
--- generated/matmul_i4.c	(Revision 248472)
+++ generated/matmul_i4.c	(Arbeitskopie)
@@ -307,6 +307,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -319,11 +324,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -859,6 +859,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -871,11 +876,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1411,6 +1411,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1423,11 +1428,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1977,6 +1977,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1989,11 +1994,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -2603,6 +2603,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -2615,11 +2620,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmul_i8.c
===================================================================
--- generated/matmul_i8.c	(Revision 248472)
+++ generated/matmul_i8.c	(Arbeitskopie)
@@ -307,6 +307,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -319,11 +324,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -859,6 +859,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -871,11 +876,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1411,6 +1411,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1423,11 +1428,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1977,6 +1977,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1989,11 +1994,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -2603,6 +2603,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -2615,11 +2620,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmul_r10.c
===================================================================
--- generated/matmul_r10.c	(Revision 248472)
+++ generated/matmul_r10.c	(Arbeitskopie)
@@ -307,6 +307,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_10)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -319,11 +324,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_10)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -859,6 +859,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_10)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -871,11 +876,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_10)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1411,6 +1411,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_10)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1423,11 +1428,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_10)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1977,6 +1977,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_10)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1989,11 +1994,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_10)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -2603,6 +2603,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_10)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -2615,11 +2620,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_10)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmul_r16.c
===================================================================
--- generated/matmul_r16.c	(Revision 248472)
+++ generated/matmul_r16.c	(Arbeitskopie)
@@ -307,6 +307,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -319,11 +324,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -859,6 +859,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -871,11 +876,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1411,6 +1411,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1423,11 +1428,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1977,6 +1977,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1989,11 +1994,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -2603,6 +2603,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -2615,11 +2620,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmul_r4.c
===================================================================
--- generated/matmul_r4.c	(Revision 248472)
+++ generated/matmul_r4.c	(Arbeitskopie)
@@ -307,6 +307,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -319,11 +324,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -859,6 +859,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -871,11 +876,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1411,6 +1411,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1423,11 +1428,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1977,6 +1977,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1989,11 +1994,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -2603,6 +2603,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -2615,11 +2620,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmul_r8.c
===================================================================
--- generated/matmul_r8.c	(Revision 248472)
+++ generated/matmul_r8.c	(Arbeitskopie)
@@ -307,6 +307,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -319,11 +324,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -859,6 +859,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -871,11 +876,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1411,6 +1411,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1423,11 +1428,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -1977,6 +1977,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -1989,11 +1994,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -2603,6 +2603,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -2615,11 +2620,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmulavx128_c10.c
===================================================================
--- generated/matmulavx128_c10.c	(Revision 248472)
+++ generated/matmulavx128_c10.c	(Arbeitskopie)
@@ -272,6 +272,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -284,11 +289,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -825,6 +825,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -837,11 +842,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_10));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmulavx128_c16.c
===================================================================
--- generated/matmulavx128_c16.c	(Revision 248472)
+++ generated/matmulavx128_c16.c	(Arbeitskopie)
@@ -272,6 +272,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -284,11 +289,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -825,6 +825,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -837,11 +842,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmulavx128_c4.c
===================================================================
--- generated/matmulavx128_c4.c	(Revision 248472)
+++ generated/matmulavx128_c4.c	(Arbeitskopie)
@@ -272,6 +272,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -284,11 +289,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -825,6 +825,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -837,11 +842,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmulavx128_c8.c
===================================================================
--- generated/matmulavx128_c8.c	(Revision 248472)
+++ generated/matmulavx128_c8.c	(Arbeitskopie)
@@ -272,6 +272,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -284,11 +289,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -825,6 +825,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -837,11 +842,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_COMPLEX_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmulavx128_i1.c
===================================================================
--- generated/matmulavx128_i1.c	(Revision 248472)
+++ generated/matmulavx128_i1.c	(Arbeitskopie)
@@ -272,6 +272,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -284,11 +289,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -825,6 +825,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -837,11 +842,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_1));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmulavx128_i16.c
===================================================================
--- generated/matmulavx128_i16.c	(Revision 248472)
+++ generated/matmulavx128_i16.c	(Arbeitskopie)
@@ -272,6 +272,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -284,11 +289,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -825,6 +825,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -837,11 +842,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmulavx128_i2.c
===================================================================
--- generated/matmulavx128_i2.c	(Revision 248472)
+++ generated/matmulavx128_i2.c	(Arbeitskopie)
@@ -272,6 +272,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -284,11 +289,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -825,6 +825,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -837,11 +842,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_2));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmulavx128_i4.c
===================================================================
--- generated/matmulavx128_i4.c	(Revision 248472)
+++ generated/matmulavx128_i4.c	(Arbeitskopie)
@@ -272,6 +272,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -284,11 +289,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -825,6 +825,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -837,11 +842,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmulavx128_i8.c
===================================================================
--- generated/matmulavx128_i8.c	(Revision 248472)
+++ generated/matmulavx128_i8.c	(Arbeitskopie)
@@ -272,6 +272,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -284,11 +289,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -825,6 +825,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -837,11 +842,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_INTEGER_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmulavx128_r10.c
===================================================================
--- generated/matmulavx128_r10.c	(Revision 248472)
+++ generated/matmulavx128_r10.c	(Arbeitskopie)
@@ -272,6 +272,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_10)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -284,11 +289,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_10)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -825,6 +825,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_10)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -837,11 +842,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_10));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_10)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmulavx128_r16.c
===================================================================
--- generated/matmulavx128_r16.c	(Revision 248472)
+++ generated/matmulavx128_r16.c	(Arbeitskopie)
@@ -272,6 +272,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -284,11 +289,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -825,6 +825,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_16)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -837,11 +842,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_16));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_16)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmulavx128_r4.c
===================================================================
--- generated/matmulavx128_r4.c	(Revision 248472)
+++ generated/matmulavx128_r4.c	(Arbeitskopie)
@@ -272,6 +272,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -284,11 +289,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -825,6 +825,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_4)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -837,11 +842,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_4));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_4)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: generated/matmulavx128_r8.c
===================================================================
--- generated/matmulavx128_r8.c	(Revision 248472)
+++ generated/matmulavx128_r8.c	(Arbeitskopie)
@@ -272,6 +272,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -284,11 +289,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
@@ -825,6 +825,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_8)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -837,11 +842,6 @@
 
       t1 = malloc (t1_dim * sizeof(GFC_REAL_8));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = (GFC_REAL_8)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
Index: m4/matmul_internal.m4
===================================================================
--- m4/matmul_internal.m4	(Revision 248467)
+++ m4/matmul_internal.m4	(Arbeitskopie)
@@ -223,6 +223,11 @@
       b_offset = 1 + b_dim1;
       b -= b_offset;
 
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = ('rtype_name`)0;
+
       /* Early exit if possible */
       if (m == 0 || n == 0 || k == 0)
 	return;
@@ -235,11 +240,6 @@
 
       t1 = malloc (t1_dim * sizeof('rtype_name`));
 
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = ('rtype_name`)0;
-
       /* Start turning the crank. */
       i1 = n;
       for (jj = 1; jj <= i1; jj += 512)
! { dg-do  run }
! { dg-options "-finline-matmul-limit=0" }
! PR 80975 - this did not zero the result array
program bogus_matmul
  implicit none
  real :: M(3,0), v(0), w(3)

  w = 7
  w = matmul(M,v)
  if( any(w .ne. 0) ) then
    call abort
  end if
end program bogus_matmul
! { dg-do  run }
! { dg-options "-O -finline-matmul-limit=100 -fdump-tree-optimized" }
! PR 80975 - this did not zero the result array in the library version;
! make sure this also doesn't happen in the inline version.
program bogus_matmul
  implicit none
  real :: M(3,0), v(0), w(3)

  w = 7
  w = matmul(M,v)
  if( any(w .ne. 0) ) then
    call abort
  end if
end program bogus_matmul
! { dg-final { scan-tree-dump-times "matmul_r4" 0 "optimized" } }

Reply via email to