This patch are the initial set of tests. The libgomp tests use an idiom of summing thread identifiers and then checking the expected set of threads participated. They are all derived from the loop tests I recently added for the execution model itself.

The fortran test was duplicated in both the gfortran testsuite and the libgomp testsuite. I deleted it from the former. It was slightly bogus as it asked for a vector-length of 40, and appeared to be working by accident by not actually partitioning the loop. I fixed that up and reworked it to avoid needing a reduction on a reference variable. Reference handling will be a later patch.

nathan
2015-11-02  Nathan Sidwell  <nat...@codesourcery.com>

	libgomp/
	* libgomp.oacc-c-c++-common/loop-red-g-1.c: New.
	* libgomp.oacc-c-c++-common/loop-red-gwv-1.c: New.
	* libgomp.oacc-c-c++-common/loop-red-v-1.c: New.
	* libgomp.oacc-c-c++-common/loop-red-v-2.c: New.
	* libgomp.oacc-c-c++-common/loop-red-w-1.c: New.
	* libgomp.oacc-c-c++-common/loop-red-w-2.c: New.
	* libgomp.oacc-c-c++-common/loop-red-wv-1.c: New.
	* libgomp.oacc-fortran/reduction-5.f90: Avoid reference var.

	gcc/testsuite/
	* gfortran.dg/goacc/reduction-2.f95: Delete.

Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-g-1.c	(working copy)
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ix;
+  int ondev = 0;
+  int t = 0, h = 0;
+  
+#pragma acc parallel num_gangs(32) vector_length(32) copy(t) copy(ondev)
+  {
+#pragma acc loop gang  reduction (+:t)
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int val = ix;
+	
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    val = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	t += val;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int val = ix;
+      if(ondev)
+	{
+	  int g = ix / ((N + 31) / 32);
+	  int w = 0;
+	  int v = 0;
+
+	  val = (g << 16) | (w << 8) | v;
+	}
+      h += val;
+    }
+  if (t != h)
+    {
+      printf ("t=%x expected %x\n", t, h);
+      return 1;
+    }
+  
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c	(working copy)
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ix;
+  int ondev = 0;
+  int t = 0, h = 0;
+  
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(t) copy(ondev)
+  {
+#pragma acc loop gang worker vector reduction(+:t)
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int val = ix;
+	
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    val = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	t += val;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int val = ix;
+      if(ondev)
+	{
+	  int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+	  
+	  int g = ix / (chunk_size * 32 * 32);
+	  int w = ix / 32 % 32;
+	  int v = ix % 32;
+
+	  val = (g << 16) | (w << 8) | v;
+	}
+      h += val;
+    }
+  if (t != h)
+    {
+      printf ("t=%x expected %x\n", t, h);
+      return 1;
+    }
+  
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c	(working copy)
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+
+int main ()
+{
+  int ix;
+  int ondev = 0;
+  int t = 0,  h = 0;
+
+#pragma acc parallel vector_length(32) copy(t) copy(ondev)
+  {
+#pragma acc loop vector reduction (+:t)
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int val = ix;
+	
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    val = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	t += val;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int val = ix;
+      if (ondev)
+	{
+	  int g = 0;
+	  int w = 0;
+	  int v = ix % 32;
+
+	  val = (g << 16) | (w << 8) | v;
+	}
+      h += val;
+    }
+
+  if (t != h)
+    {
+      printf ("t=%x expected %x\n", t, h);
+      return 1;
+    }
+  
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c	(working copy)
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+
+int main ()
+{
+  int ix;
+  int ondev = 0;
+  int q = 0,  h = 0;
+
+#pragma acc parallel vector_length(32) copy(q) copy(ondev)
+  {
+    int t = q;
+    
+#pragma acc loop vector reduction (+:t)
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int val = ix;
+	
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    val = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	t += val;
+      }
+    q = t;
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int val = ix;
+      if (ondev)
+	{
+	  int g = 0;
+	  int w = 0;
+	  int v = ix % 32;
+
+	  val = (g << 16) | (w << 8) | v;
+	}
+      h += val;
+    }
+
+  if (q != h)
+    {
+      printf ("t=%x expected %x\n", q, h);
+      return 1;
+    }
+  
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c	(working copy)
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ix;
+  int ondev = 0;
+  int t = 0,  h = 0;
+
+#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev)
+  {
+#pragma acc loop worker reduction(+:t)
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int val = ix;
+	
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    val = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	t += val;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int val = ix;
+      if(ondev)
+	{
+	  int g = 0;
+	  int w = ix % 32;
+	  int v = 0;
+
+	  val = (g << 16) | (w << 8) | v;
+	}
+      h += val;
+    }
+  if (t != h)
+    {
+      printf ("t=%x expected %x\n", t, h);
+      return 1;
+    }
+  
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c	(working copy)
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ix;
+  int ondev = 0;
+  int q = 0,  h = 0;
+
+#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev)
+  {
+    int t = q;
+    
+#pragma acc loop worker reduction(+:t)
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int val = ix;
+	
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    val = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	t += val;
+      }
+    q = t;
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int val = ix;
+      if(ondev)
+	{
+	  int g = 0;
+	  int w = ix % 32;
+	  int v = 0;
+
+	  val = (g << 16) | (w << 8) | v;
+	}
+      h += val;
+    }
+  if (q != h)
+    {
+      printf ("t=%x expected %x\n", q, h);
+      return 1;
+    }
+  
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c	(working copy)
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ix;
+  int ondev = 0;
+  int t = 0, h = 0;
+  
+#pragma acc parallel num_workers(32) vector_length(32) copy(t) copy(ondev)
+  {
+#pragma acc loop worker vector reduction (+:t)
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	int val = ix;
+	
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    val = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	t += val;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int val = ix;
+      if(ondev)
+	{
+	  int g = 0;
+	  int w = (ix / 32) % 32;
+	  int v = ix % 32;
+
+	  val = (g << 16) | (w << 8) | v;
+	}
+      h += val;
+    }
+  if (t != h)
+    {
+      printf ("t=%x expected %x\n", t, h);
+      return 1;
+    }
+  
+  return 0;
+}
Index: libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90
===================================================================
--- libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90	(revision 229667)
+++ libgomp/testsuite/libgomp.oacc-fortran/reduction-5.f90	(working copy)
@@ -21,12 +21,15 @@ end program reduction
 subroutine redsub(sum, n, c)
   integer :: sum, n, c
 
-  sum = 0
+  integer :: s
+  s = 0
 
-  !$acc parallel vector_length(n) copyin (n, c) num_gangs(1)
-  !$acc loop reduction(+:sum)
+  !$acc parallel vector_length(32) copyin (n, c) copy (s) num_gangs(1)
+  !$acc loop reduction(+:s)
   do i = 1, n
-     sum = sum + c
+     s = s + c
   end do
   !$acc end parallel
+
+  sum = s
 end subroutine redsub
Index: gcc/testsuite/gfortran.dg/goacc/reduction-2.f95
===================================================================
--- gcc/testsuite/gfortran.dg/goacc/reduction-2.f95	(revision 229667)
+++ gcc/testsuite/gfortran.dg/goacc/reduction-2.f95	(working copy)
@@ -1,21 +0,0 @@
-! { dg-do compile }
-
-program reduction
-  integer, parameter    :: n = 40, c = 10
-  integer               :: i, sum
-
-  call redsub (sum, n, c)
-end program reduction
-
-subroutine redsub(sum, n, c)
-  integer :: sum, n, c
-
-  sum = 0
-
-  !$acc parallel vector_length(n) copyin (n, c)
-  !$acc loop reduction(+:sum)
-  do i = 1, n
-     sum = sum + c
-  end do
-  !$acc end parallel
-end subroutine redsub

Reply via email to