This patch has some new execution tests, verifying loop partitioning is behaving as expected.

There are more execution tests on the gomp4 branch, but many of them use reductions. We'll merge those once reductions are merged.

nathan
2015-10-20  Nathan Sidwell  <nat...@codesourcery.com>

	* testsuite/libgomp.oacc-c-c++-common/loop-g-1.c: New.
	* testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: New.
	* testsuite/libgomp.oacc-c-c++-common/loop-g-1.s: New.
	* testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c: New.
	* testsuite/libgomp.oacc-c-c++-common/loop-g-2.c: New.
	* testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c: New.
	* testsuite/libgomp.oacc-c-c++-common/loop-v-1.c: New.

Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c	(working copy)
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ary[N];
+  int ix;
+  int exit = 0;
+  int ondev = 0;
+
+  for (ix = 0; ix < N;ix++)
+    ary[ix] = -1;
+  
+#pragma acc parallel num_gangs(32) vector_length(32) copy(ary) copy(ondev)
+  {
+#pragma acc loop gang
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    ary[ix] = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	else
+	  ary[ix] = ix;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int expected = ix;
+      if(ondev)
+	{
+	  int g = ix / ((N + 31) / 32);
+	  int w = 0;
+	  int v = 0;
+
+	  expected = (g << 16) | (w << 8) | v;
+	}
+      
+      if (ary[ix] != expected)
+	{
+	  exit = 1;
+	  printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected);
+	}
+    }
+  
+  return exit;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.s
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.s	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.s	(working copy)
@@ -0,0 +1,386 @@
+	.file	"loop-g-1.c"
+	.section	.gnu.offload_lto_.inline.f031cb8759bb7418,"e",@progbits
+	.string	"x\234ce\200"
+	.string	"i\006\004`d`P\220g``\262zp\205\231\201\205\201\t,\306\310\004\022\007"
+	.ascii	"-\377\002_"
+	.text
+	.section	.gnu.offload_lto_main._omp_fn.0.f031cb8759bb7418,"e",@progbits
+	.ascii	"x\234\215W\373STG\026\276\347\366\2357\f\304\020b\310h0\331\321"
+	.ascii	"$\204\031\311V\355\243\222\335\252\255\375i\377\201T~\263\310"
+	.ascii	"@\b\273\300X0\032\363\023\027\034^E6\006bH\234\302\205\215(\020"
+	.ascii	"\214H\214\213\016.\316\202\n\270\3403\260\242\361\301#\nb\214"
+	.ascii	"\004\\$\b\3549\335\227a\200Y\365V\335\236\333\347~\337\351\257"
+	.ascii	"O\237>}G'\211K\266J\222\003$)\233:f\td\to`\037\237\301_&+\222"
+	.ascii	"\244\312Fl\024E6\320\263\236\236u!v=\350\020i\340\255Q6\021O"
+	.ascii	"U\030\2756\001\272\225@\312\225\231\242\323\033\214&\310eL\033"
+	.ascii	"U\321~?d;\031\264\025\335<*\277R\253\342u\243\264~U\236Z\260"
+	.ascii	"\272\022\201\025\032\030\366\300+\210\362\265L\236\326\225\262"
+	.ascii	"J\006`\304n\363\335\206\346H\352\312\020\220\260\377`\376r\233"
+	.ascii	"\201\025\342\230\330\351\274t\266\n6\354U?\316W\253;U(F\030\203"
+	.ascii	"}\300\320%\354\005\033\324\200M\252d\212\004\265\260\201\360"
+	.ascii	"\347\275\007+\231\360\255`w\274\3077d\025\276\343\261\373\311"
+	.ascii	"x\211\314\312\205\347\233\245\027K\242\354{\204\326\256\350\362"
+	.ascii	"\374\202NU\251d:\370Rx\257G\357u\350\235.\204\227\r"
+	.string	"\365\324D\bx\365\324\250RV\027\360\306#Zz\f\364v\311\037\362\312Ux\002\344\334\023#\017<1\262e\234=Jh\343\235\243\323<\362z\036\221\212\202\266\263\206R\036a\352n\357\231\373S\025\354\316S\243+\231\201\342\013_\023\225\3028\263w4\322\231H\274CP\005\2372\315_\351\341\341V\206\024\222\006\337\200\023\2324\202\267cG\263\314>\023\201o\236\275x\fP`\200\002_i-S\003\336uD\370\307\322\270\303at\2559\016*3>F\331\350\376\313\n\t3\302Q.\f\004\277\255\243\267V'\204\231\300\217\302\216h\370\017?\272\327\250\210t9\241R2V6\365\316\311\"a\252\275<\035g\206\232\017\033\330n\241\275\245\355\314\016\235\320^\375\275Q-\367r\217\255B\371?Q\3711R\336\262\250|\260i\356\323H\275\002\307\301\016\315x\037\304\273\021\357\003x\177\205\367~\274\033\300\316wG"
+	.string	"p\035\244Xd\235\034\367W"
+	.ascii	"\373\273\030\263~\276\244\"b!Q\033V\225i\211j\206\177\255LTh"
+	.ascii	"\003;R\246\206\317\317\232\331W\202_\323<\277S\257%\304\211j"
+	.ascii	"\320\310\355a\310'8Y\235\2716\027\305\374\202\\\374\267\276\021"
+	.ascii	"\266\202|2\f\371\024'O\\\237h\265\2626A\036\031\236\270$\257"
+	.ascii	" w\204!wr\262\377\352\221[Q\254[\220\217\237\275\360\302\nnW"
+	.ascii	"\030\356i\316\275\032\270^``}\202\273\353\312\314\004W\235\257"
+	.ascii	"\006\212w\306\323\006\260\300\277\303p\2739\267qv\270O\317\006"
+	.ascii	"\004\267\244a\327\035\203\340\336(\335\037[\236\257\362\261-"
+	.ascii	"\320\023\206\177F[\322\006\021\366\376\257k,\202Z]{\001\312\353"
+	.ascii	"\002\237\023\325(\205V+\316;\313\361\355u\375\275z6,\306\355"
+	.ascii	"\n\324\345Gj\023\036(\001U\354p8\027f\324\363\234\335\177\356"
+	.ascii	"`"
+	.string	"\251\225\335\024\354\351\2077\2765k\354}\213\354\013a\330\0279\273pO\373t$\033\021l\357\241\003G\264ZX=\270\310\3766\f\273we\002/\233\371\324\300\243f\336'F\357\231\274\246h\370\231\351 ^\277\022\377\037\034C\333<K\027\247\372P\277L\265#\376\377\255\315%\2607\226C@\235\237\337\256\252\215\277(g\t\200G\205Y\246\306BM\0045\221\324X\251\211\242&\032\033\313S|aW\341\243\356i\036\355\030\036\265g\370\354c\271\345YnY\315-\317\0210\216\232\347\251\241\003I\267\006\250.\255\305Gc<6\372\027\311\370\022u\3554\314zj6\360a$\251\210\327\317b\206\0076\033\203\302\342\366O\336Z\254\034\274,,)%8/\t\372\227\031\311\021\\&#\\\241V\202\327l\220`\243#\360\341\356f\023U4\035\304\300w`\213\223\306\240\240\370\217\032\f\001\337\234\350(4\022\300\002"
+	.string	"W1\202\211\034\343\rb\320U\2344\n\371\036\311\307,\241\325\330\f\022\\\303p\217\301.\265\311Hh\037\213@\374\2534\356\341\242\356&\263@\001\\\0072\265\236\372\342ea)Se\270\201C\t\373|\375]\277^\274\310\205\0014\277j[\263\304i$:M\340\350\005V\020F\201c<p\276 \334\312\347ur~*\020%&.\303 \204\243\373\030\320@\213\314(\021\270pH\353\032i\030\274g\030\006\301\312\201\222t\037"
+	.ascii	"#\"\243e\275\221t\340\267\020\3038\375d@K\264\017\327s\024\266"
+	.ascii	"W\350\261\363T\350ak\301\250\r\001z\214\246(\327\035\0338\031"
+	.ascii	"\025\272\334W\226\256\254\004\303|U\277'y\253\350\360\373\371"
+	.ascii	"\370\2045wV\305/\253\210\n:\340\340&\344\362\002R\343\177\030"
+	.ascii	"!\346\253\203[`\253\304\260\241\342+\3551\032\031\247!T\3138"
+	.ascii	"\005v\355R9\3631\273\216\204\353\270p\241\325\216\302}\202\312"
+	.ascii	"'k\327&\2130\2756\277\242\306\2326Y8}\021'G\271\341\357\212\023"
+	.ascii	"\206\227PI\355\271\300>s\360\200\366\2615\301\363\271\242\370"
+	.ascii	"\257vq>\217\004\317g\030\005'\271\250\350z\231\\P5\3518]\034"
+	.ascii	"\241\035\327\3501xZ\267\364\336\337!'5\3622\365g\232\347mtB\002"
+	.ascii	"\310\313\030$\241\034\256\323 tN\216\034y3d\213\340\256\245="
+	.ascii	"K;\226\207\0239wH\361\323\024\027\202w&\nCL\320\360\2460<C\206"
+	.ascii	"\333\220\367\305X\277\201,\370)\214]\232\316G\025U\312\272<\271"
+	.ascii	"J\375\034\313\220\2254\316\016\026\f\231\024\372\366\376\001"
+	.ascii	"\326q\325\247\006\017\371\365h\201\273\240\370X,\276\371\021"
+	.ascii	"\024\356\356\307\256\240\273\002\371q\356\356-w7N\356\236\305"
+	.ascii	"7?i\356\306a\301\233\357\261\336&\226{\233$o\253\361\315}\364"
+	.ascii	"\206\271p\257\360\352-\263\230\376s\250\032\241\027\356u\357"
+	.ascii	"\266\210\374\212\206\377\202M\240\246\r\002\024\207ZB\277\355"
+	.ascii	"u`\204)\001zPX\257\201\236\347\316\342\204\261\337$\2146n_-\262"
+	.ascii	"\310&lk\2370\213\036,f\321\364B\026\275M.Vd\321\332\260Y\224"
+	.ascii	"MB\177\016\311\242\031\312\"\276\372\376\321\326\337\0131\361"
+	.ascii	"\217\024\323}\252\366K&>\206\037\006?\206aV\250\2314jb\232\275"
+	.ascii	"\307&\025\355\323\030=\212/c\230C\002\0266\236\264XA\330\024"
+	.ascii	"\315da#\256\307\352D\025(\270\023\327\363\352\312\301\246\220"
+	.ascii	"\r\273\201\252\t\375\365\351\333\327\244\210\002C\345u\236\027"
+	.ascii	"\346\202\342\366\317\372R\026\213}\360\377\221\016\024Pe:#V\034"
+	.ascii	"\b\301\302i\301\302\231'\2079\020\220\030'\375@\3770\351*\242"
+	.ascii	"\005\300\347|p\346\270\262\223=\256\367\234Y9\351)\357\247fd"
+	.ascii	"8\335\233S\263\222].\247'{K\326_\0223S\263\323R\2359\331.g\232"
+	.ascii	"\313\225\230\231\234\236\225\221\236\225\352\314H\177'\315\235"
+	.ascii	"\271\331\351I\315\361\344lI\367\004-\0167r\023]\t\tN\207c\231"
+	.ascii	"\215\254\211.wf\246;\313\231\341voNLKLr\270$\213\003!\233R\222"
+	.ascii	"=\311\233\322\225\364m\216\215:wVJ\352V\226\234\375\201\305\341"
+	.ascii	"z\017El\312r;~\023\241=g&os\374\332\340\310\361\244nv\374\312"
+	.ascii	"\344p\277\373nN\252\307\361[\223\343\035\367\226\254\024G\322"
+	.ascii	"F\363\202-)IN\337\006i\360"
+	.ascii	">l\215\315toul\371\345\353\361\366\215\257\331\355.Orz\212c\333"
+	.ascii	"\033\022\373]\266\024\263\344\225\007_|\360F\030#\242\377\007"
+	.ascii	"!Kr\335"
+	.text
+	.section	.gnu.offload_lto_.symbol_nodes.f031cb8759bb7418,"e",@progbits
+	.string	"x\234ce``\320\003b\006&\236z\006\206\t\347\030\030\200\324\212\205\013\0170300\362\3263\202\205\030\030\032\032\024\030\030\230\031\030\031\216\264\277\231\317\301"
+	.string	""
+	.ascii	"\004N\0139"
+	.text
+	.section	.gnu.offload_lto_.refs.f031cb8759bb7418,"e",@progbits
+	.string	"x\234ce```\004b\006"
+	.string	""
+	.string	";"
+	.ascii	"\007"
+	.text
+	.section	.gnu.offload_lto_.offload_table.f031cb8759bb7418,"e",@progbits
+	.string	"x\234ce```\006bF\006\006"
+	.string	""
+	.string	"Z"
+	.ascii	"\n"
+	.text
+	.section	.gnu.offload_lto_.decls.f031cb8759bb7418,"e",@progbits
+	.ascii	"x\234\215T\337O\333U\024\377\236\336oa\226\2262@C\f\017d!\031"
+	.ascii	"\311\322v\350\037\240\017>\360\270\355\3057I\375\322\261F\370"
+	.ascii	"\226\264_4{\362\322\221XA\035L\030Jp\351\346F\221!k\351X\367"
+	.ascii	"\013\2500\030l\300\306&\242\213 \272\200/\023\331d\262lq\365"
+	.ascii	"\334{\373\205\002\242\336\344\334~\317\271\237\3639\347\334{"
+	.ascii	"N\215\222X?\247I\322\f\376~\206\222\300e\300\337\003\322\372"
+	.ascii	"bz\030%\"m]\314nM\342\263QrQ\206Q\366\243\354C)A\031B\351G\351"
+	.ascii	"C\031E\331\2152\2012\216r/\311?\200\022\377\217XM\004\306\347"
+	.ascii	"\342\2473\346\340G@\025\330\302}#\b\222E\360\005\034\245\327"
+	.ascii	" \255A7;I lI\360\026\312T\302u\216d|\236\213\b\260\201\017\203"
+	.ascii	"nJ\006\326\230ak\246\234\b\364L6\227$ID\277\003\332\031x\f?\301"
+	.ascii	",p\255g52b&\030\t\277\037?j9m%;\371\367\330\350\261\363f\331"
+	.ascii	" A;\020\341\367p\242\365~f=\201}&(@\365\344\374\303f\006\220"
+	.ascii	"\240\003\3629\340\346\344\314\220\221Y$\370\ndfy\336\031\355"
+	.ascii	"5\350\030\350\024\306\225\313\253\263F\262\233GI4v\f\247\261"
+	.ascii	"(]@\340,\310\020\022\230\356\256ga\023w\334o\206\267\270\351"
+	.ascii	"\367\225\346\017w\344\035\247\270\226kh\026\300\201\227\340\325"
+	.ascii	"|\330\213\261\341\034\344\341\336\215\276<\221\341\253\023\027"
+	.ascii	"\255\273\2024\036\244\363\261O\215\237\237\210\323#\224\326\310"
+	.ascii	"\f\007gx=\3605\006\343\237\360%\314\341\265\205a\027z\266/\236"
+	.ascii	"\250I\253#$\232&A\204[Z\257\254\3340b\325\331&\330\201j"
+	.string	"l\351l\314\202j\256\t\342,T\355\365\2431\003\271\004\274\232?f\273\226\254d@(w\226\307\277\310\020\216Y\250^X\034\0331\tG\n\250?\273\037\353M'\023\002{t\241\277\305\"\260\r\324\200\206\245\310jM\232)\300\212\r\372\351\316\232bv\024\305;\354aw\r\227\301\204\273#\037.\241\212\360\217>Y>'\013\377k\224\245\331\026\231~n\020\321\202\265<\321\370J\364\003\310n`\214\270\025\310\354\031\257\"\341\025N\330\007\331:-\007O\266\005\300\\G\251_\017\217\200\224\370\375`\026\315$7O\306#2\024\026\346\024\235b\350\371\330\020\t\371i\340H:\224\230\341<>\3527\220\263\346\210\031\243s1n\022G\007\027\032\200\3762\017\214\017.\376#8\312\342I\215\224\006h\330O_\317\223 \206\347\027\330\371 \0241\007\004\r@!\304\241\2207x;\253\220\277:\215gq\302^ )\377"
+	.ascii	"\277u\207Z2r\222\204\301\377A\210>\017\236\034{\323R\237r\033"
+	.ascii	"C)yJp\r,I\340\355\216!j\325\221\376m\2210\f\026DO\325\206\333"
+	.ascii	"\210x59e\276r\371|\221\256xS\263\025L\371\200\336\300;\241\301"
+	.ascii	"O_(\221\340:\032F8\335(o\203\033\250\217\241\336\212%\321\345"
+	.ascii	"D\342\375D\"\235\037q\003m<\365F\240\361en`\333\206\341\302\036"
+	.ascii	"\330f\270\360D\037\256q>\\7\365\341\252\3778\324\277\2617\020"
+	.ascii	"\220R\343\204\350\r\366\313F1\261\032\267\212\032Y_\327>8^g\331"
+	.ascii	"\276\257o\351}=)\372z\360Q8d\026\336\204ME\355\302|\206\270\241"
+	.ascii	"<T'Cg\232LdJ\214\320\364\223\276\247fr\227+\362X\357\323\036"
+	.ascii	"\302\357\256H\277\274\345\223\224\340c\213\242\340;|\352oy\260"
+	.ascii	"i\f>\243\337\036E2fn\027c\322 KL\205\273\311\016\202;\354C\334"
+	.ascii	"\363k\034\310ns\365\327\2772\327\007\213\023\300\024\262\336"
+	.ascii	"\346%}\317'\353\226>Y-=\177\006eS2\243\300\277f\204\350\305p"
+	.ascii	"t\320\222\312\375\303\266\334p\017\225\314J\247[\265\227z*\253"
+	.ascii	"J\017\252\366\275/\342G\201\346\364\226\273\264\002\227\252y"
+	.ascii	"\017Wy\334\252f\3618\025\245\340`\265\252hn\217\352\007\207O"
+	.ascii	"\361:5\345\220C\365\271\313\336sUT8<U.\0251\016\315[\255\276"
+	.ascii	"c\253t!\201\303\347U\034\345\212bc\021*\334\252\313Q\341~\273"
+	.ascii	"\034\371\035\232\313\247\371\252\335\332\232\305\316\370m\312"
+	.ascii	"\236=\016\273}\223\215Ym\212\247\262\322\243:*<\236*[\271\255"
+	.ascii	"\330\256HF\217Z\346z\327bgy\22795g\251f\177\2058\275\207A\221"
+	.ascii	"\225CN\357\337(\266/\210"
+	.text
+	.section	.gnu.offload_lto_.symtab.f031cb8759bb7418,"e",@progbits
+	.text
+	.section	.gnu.offload_lto_.opts,"e",@progbits
+	.string	"'-fexceptions' '-fmath-errno' '-fsigned-zeros' '-ftrapping-math' '-fno-trapv' '-fno-strict-overflow' '-fno-openmp' '-foffload-abi=lp64' '-fopenacc'"
+	.text
+	.section	.gnu.offload_lto_.mode_table.f031cb8759bb7418,"e",@progbits
+	.string	"x\234ce\200"
+	.string	"e \026"
+	.string	"\342\376#\r\035\r{:\004&\2664-h8\322\0210\251\245\345@\303\211\216\t\314\223[:\032\032\317t\\`f`\016\364d`\016\006b\027 \016\361d"
+	.string	""
+	.ascii	"\225\020\024\253"
+	.text
+	.section	.rodata
+.LC0:
+	.string	"ary[%d]=%x expected %x\n"
+	.text
+	.globl	main
+	.type	main, @function
+main:
+.LFB11:
+	.cfi_startproc
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset 6, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register 6
+	subq	$131216, %rsp
+	movl	$0, -8(%rbp)
+	movl	$0, -131204(%rbp)
+	movl	$0, -4(%rbp)
+.L3:
+	cmpl	$32784, -4(%rbp)
+	jg	.L2
+	movl	-4(%rbp), %eax
+	cltq
+	movl	$-1, -131200(%rbp,%rax,4)
+	addl	$1, -4(%rbp)
+	jmp	.L3
+.L2:
+	leaq	-131204(%rbp), %rax
+	movq	%rax, -48(%rbp)
+	leaq	-131200(%rbp), %rax
+	movq	%rax, -40(%rbp)
+	leaq	-48(%rbp), %rax
+	subq	$8, %rsp
+	pushq	$0
+	movl	$_ZZ4mainE17.omp_data_kinds.5, %r9d
+	movl	$_ZZ4mainE17.omp_data_sizes.4, %r8d
+	movq	%rax, %rcx
+	movl	$2, %edx
+	movl	$main._omp_fn.0, %esi
+	movl	$-1, %edi
+	movl	$0, %eax
+	call	GOACC_parallel_keyed
+	addq	$16, %rsp
+	movl	$0, -4(%rbp)
+.L7:
+	cmpl	$32784, -4(%rbp)
+	jg	.L4
+	movl	-4(%rbp), %eax
+	movl	%eax, -12(%rbp)
+	movl	-131204(%rbp), %eax
+	testl	%eax, %eax
+	je	.L5
+	movl	-4(%rbp), %eax
+	movslq	%eax, %rdx
+	imulq	$2145388543, %rdx, %rdx
+	shrq	$32, %rdx
+	sarl	$9, %edx
+	sarl	$31, %eax
+	subl	%eax, %edx
+	movl	%edx, %eax
+	movl	%eax, -16(%rbp)
+	movl	$0, -20(%rbp)
+	movl	$0, -24(%rbp)
+	movl	-16(%rbp), %eax
+	sall	$16, %eax
+	movl	%eax, %edx
+	movl	-20(%rbp), %eax
+	sall	$8, %eax
+	orl	%edx, %eax
+	orl	-24(%rbp), %eax
+	movl	%eax, -12(%rbp)
+.L5:
+	movl	-4(%rbp), %eax
+	cltq
+	movl	-131200(%rbp,%rax,4), %eax
+	cmpl	-12(%rbp), %eax
+	je	.L6
+	movl	$1, -8(%rbp)
+	movl	-4(%rbp), %eax
+	cltq
+	movl	-131200(%rbp,%rax,4), %edx
+	movl	-12(%rbp), %ecx
+	movl	-4(%rbp), %eax
+	movl	%eax, %esi
+	movl	$.LC0, %edi
+	movl	$0, %eax
+	call	printf
+.L6:
+	addl	$1, -4(%rbp)
+	jmp	.L7
+.L4:
+	movl	-8(%rbp), %eax
+	leave
+	.cfi_def_cfa 7, 8
+	ret
+	.cfi_endproc
+.LFE11:
+	.size	main, .-main
+	.type	main._omp_fn.0, @function
+main._omp_fn.0:
+.LFB12:
+	.cfi_startproc
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset 6, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register 6
+	pushq	%r15
+	pushq	%r14
+	pushq	%r13
+	pushq	%r12
+	pushq	%rbx
+	subq	$40, %rsp
+	.cfi_offset 15, -24
+	.cfi_offset 14, -32
+	.cfi_offset 13, -40
+	.cfi_offset 12, -48
+	.cfi_offset 3, -56
+	movq	%rdi, -72(%rbp)
+	movl	$0, %r12d
+	movl	$1, %r14d
+	movl	$1, %eax
+	movl	%eax, %r15d
+.L15:
+	movl	$0, %eax
+	movl	%eax, %ebx
+	movl	$32785, %r13d
+	cmpl	%r13d, %ebx
+	jge	.L10
+.L13:
+	movl	%ebx, %eax
+	movl	%eax, -52(%rbp)
+	movl	$5, %edi
+	call	acc_on_device
+	testl	%eax, %eax
+	jne	.L11
+	jmp	.L16
+.L14:
+	addl	%r15d, %ebx
+	cmpl	%r13d, %ebx
+	jl	.L13
+	jmp	.L10
+.L16:
+	movl	-52(%rbp), %ecx
+	movq	-72(%rbp), %rax
+	movq	8(%rax), %rax
+	movl	-52(%rbp), %edx
+	movl	%ecx, (%rax,%rdx,4)
+	jmp	.L14
+.L11:
+	movl	$0, -56(%rbp)
+	movl	$0, -60(%rbp)
+	movl	$0, -64(%rbp)
+#APP
+# 26 "/scratch/nsidwell/openacc/trunk-merge/src/gcc-mainline/libgomp/testsuite/libgomp.oacc-c++/../libgomp.oacc-c-c++-common/loop-g-1.c" 1
+	mov.u32 %eax,%ctaid.x;
+# 0 "" 2
+#NO_APP
+	movl	%eax, -56(%rbp)
+#APP
+# 27 "/scratch/nsidwell/openacc/trunk-merge/src/gcc-mainline/libgomp/testsuite/libgomp.oacc-c++/../libgomp.oacc-c-c++-common/loop-g-1.c" 1
+	mov.u32 %eax,%tid.y;
+# 0 "" 2
+#NO_APP
+	movl	%eax, -60(%rbp)
+#APP
+# 28 "/scratch/nsidwell/openacc/trunk-merge/src/gcc-mainline/libgomp/testsuite/libgomp.oacc-c++/../libgomp.oacc-c-c++-common/loop-g-1.c" 1
+	mov.u32 %eax,%tid.x;
+# 0 "" 2
+#NO_APP
+	movl	%eax, -64(%rbp)
+	movl	-56(%rbp), %eax
+	sall	$16, %eax
+	movl	%eax, %edx
+	movl	-60(%rbp), %eax
+	sall	$8, %eax
+	orl	%edx, %eax
+	orl	-64(%rbp), %eax
+	movl	%eax, %ecx
+	movq	-72(%rbp), %rax
+	movq	8(%rax), %rax
+	movl	-52(%rbp), %edx
+	movl	%ecx, (%rax,%rdx,4)
+	movq	-72(%rbp), %rax
+	movq	(%rax), %rax
+	movl	$1, (%rax)
+	jmp	.L14
+.L10:
+	addl	$1, %r12d
+	cmpl	%r14d, %r12d
+	jl	.L15
+	movl	$32785, -52(%rbp)
+	addq	$40, %rsp
+	popq	%rbx
+	popq	%r12
+	popq	%r13
+	popq	%r14
+	popq	%r15
+	popq	%rbp
+	.cfi_def_cfa 7, 8
+	ret
+	.cfi_endproc
+.LFE12:
+	.size	main._omp_fn.0, .-main._omp_fn.0
+	.data
+	.align 16
+	.type	_ZZ4mainE17.omp_data_sizes.4, @object
+	.size	_ZZ4mainE17.omp_data_sizes.4, 16
+_ZZ4mainE17.omp_data_sizes.4:
+	.quad	4
+	.quad	131140
+	.align 2
+	.type	_ZZ4mainE17.omp_data_kinds.5, @object
+	.size	_ZZ4mainE17.omp_data_kinds.5, 4
+_ZZ4mainE17.omp_data_kinds.5:
+	.value	643
+	.value	643
+	.section	.gnu.offload_vars,"aw",@progbits
+	.align 8
+	.type	.offload_var_table, @object
+	.size	.offload_var_table, 0
+.offload_var_table:
+	.section	.gnu.offload_funcs,"aw",@progbits
+	.align 8
+	.type	.offload_func_table, @object
+	.size	.offload_func_table, 8
+.offload_func_table:
+	.quad	main._omp_fn.0
+	.comm	__gnu_lto_v1,1,1
+	.ident	"GCC: (Sourcery CodeBench (OpenACC/PTX) Lite 2016.05-999999) 6.0.0 20151019 (experimental)"
+	.section	.note.GNU-stack,"",@progbits
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c	(working copy)
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ary[N];
+  int ix;
+  int exit = 0;
+  int ondev = 0;
+
+  for (ix = 0; ix < N;ix++)
+    ary[ix] = -1;
+  
+#pragma acc parallel num_gangs(32) vector_length(32) copy(ary) copy(ondev)
+  {
+#pragma acc loop gang (static:1)
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    ary[ix] = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	else
+	  ary[ix] = ix;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int expected = ix;
+      if(ondev)
+	{
+	  int g = ix % 32;
+	  int w = 0;
+	  int v = 0;
+
+	  expected = (g << 16) | (w << 8) | v;
+	}
+      
+      if (ary[ix] != expected)
+	{
+	  exit = 1;
+	  printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected);
+	}
+    }
+  
+  return exit;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c	(working copy)
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ary[N];
+  int ix;
+  int exit = 0;
+  int ondev = 0;
+
+  for (ix = 0; ix < N;ix++)
+    ary[ix] = -1;
+  
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev)
+  {
+#pragma acc loop gang worker vector
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    ary[ix] = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	else
+	  ary[ix] = ix;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int expected = ix;
+      if(ondev)
+	{
+	  int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+	  
+	  int g = ix / (chunk_size * 32 * 32);
+	  int w = ix / 32 % 32;
+	  int v = ix % 32;
+
+	  expected = (g << 16) | (w << 8) | v;
+	}
+      
+      if (ary[ix] != expected)
+	{
+	  exit = 1;
+	  printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected);
+	}
+    }
+  
+  return exit;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c	(working copy)
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ary[N];
+  int ix;
+  int exit = 0;
+  int ondev = 0;
+
+  for (ix = 0; ix < N;ix++)
+    ary[ix] = -1;
+  
+#pragma acc parallel vector_length(32) copy(ary) copy(ondev)
+  {
+#pragma acc loop vector
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    ary[ix] = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	else
+	  ary[ix] = ix;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int expected = ix;
+      if(ondev)
+	{
+	  int g = 0;
+	  int w = 0;
+	  int v = ix % 32;
+
+	  expected = (g << 16) | (w << 8) | v;
+	}
+      
+      if (ary[ix] != expected)
+	{
+	  exit = 1;
+	  printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected);
+	}
+    }
+  
+  return exit;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c	(working copy)
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ary[N];
+  int ix;
+  int exit = 0;
+  int ondev = 0;
+
+  for (ix = 0; ix < N;ix++)
+    ary[ix] = -1;
+  
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+  {
+#pragma acc loop worker
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    ary[ix] = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	else
+	  ary[ix] = ix;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int expected = ix;
+      if(ondev)
+	{
+	  int g = 0;
+	  int w = ix % 32;
+	  int v = 0;
+
+	  expected = (g << 16) | (w << 8) | v;
+	}
+      
+      if (ary[ix] != expected)
+	{
+	  exit = 1;
+	  printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected);
+	}
+    }
+  
+  return exit;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c	(revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c	(working copy)
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+  int ary[N];
+  int ix;
+  int exit = 0;
+  int ondev = 0;
+
+  for (ix = 0; ix < N;ix++)
+    ary[ix] = -1;
+  
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+  {
+#pragma acc loop worker vector
+    for (unsigned ix = 0; ix < N; ix++)
+      {
+	if (__builtin_acc_on_device (5))
+	  {
+	    int g = 0, w = 0, v = 0;
+
+	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+	    ary[ix] = (g << 16) | (w << 8) | v;
+	    ondev = 1;
+	  }
+	else
+	  ary[ix] = ix;
+      }
+  }
+
+  for (ix = 0; ix < N; ix++)
+    {
+      int expected = ix;
+      if(ondev)
+	{
+	  int g = 0;
+	  int w = (ix / 32) % 32;
+	  int v = ix % 32;
+
+	  expected = (g << 16) | (w << 8) | v;
+	}
+      
+      if (ary[ix] != expected)
+	{
+	  exit = 1;
+	  printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected);
+	}
+    }
+  
+  return exit;
+}

Reply via email to