This patch has some new execution tests, verifying loop partitioning is behaving
as expected.
There are more execution tests on the gomp4 branch, but many of them use
reductions. We'll merge those once reductions are merged.
nathan
2015-10-20 Nathan Sidwell <nat...@codesourcery.com>
* testsuite/libgomp.oacc-c-c++-common/loop-g-1.c: New.
* testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: New.
* testsuite/libgomp.oacc-c-c++-common/loop-g-1.s: New.
* testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c: New.
* testsuite/libgomp.oacc-c-c++-common/loop-g-2.c: New.
* testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c: New.
* testsuite/libgomp.oacc-c-c++-common/loop-v-1.c: New.
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c (revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.c (working copy)
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+ int ary[N];
+ int ix;
+ int exit = 0;
+ int ondev = 0;
+
+ for (ix = 0; ix < N;ix++)
+ ary[ix] = -1;
+
+#pragma acc parallel num_gangs(32) vector_length(32) copy(ary) copy(ondev)
+ {
+#pragma acc loop gang
+ for (unsigned ix = 0; ix < N; ix++)
+ {
+ if (__builtin_acc_on_device (5))
+ {
+ int g = 0, w = 0, v = 0;
+
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+ __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+ __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ ary[ix] = (g << 16) | (w << 8) | v;
+ ondev = 1;
+ }
+ else
+ ary[ix] = ix;
+ }
+ }
+
+ for (ix = 0; ix < N; ix++)
+ {
+ int expected = ix;
+ if(ondev)
+ {
+ int g = ix / ((N + 31) / 32);
+ int w = 0;
+ int v = 0;
+
+ expected = (g << 16) | (w << 8) | v;
+ }
+
+ if (ary[ix] != expected)
+ {
+ exit = 1;
+ printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected);
+ }
+ }
+
+ return exit;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.s
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.s (revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-1.s (working copy)
@@ -0,0 +1,386 @@
+ .file "loop-g-1.c"
+ .section .gnu.offload_lto_.inline.f031cb8759bb7418,"e",@progbits
+ .string "x\234ce\200"
+ .string "i\006\004`d`P\220g``\262zp\205\231\201\205\201\t,\306\310\004\022\007"
+ .ascii "-\377\002_"
+ .text
+ .section .gnu.offload_lto_main._omp_fn.0.f031cb8759bb7418,"e",@progbits
+ .ascii "x\234\215W\373STG\026\276\347\366\2357\f\304\020b\310h0\331\321"
+ .ascii "$\204\031\311V\355\243\222\335\252\255\375i\377\201T~\263\310"
+ .ascii "@\b\273\300X0\032\363\023\027\034^E6\006bH\234\302\205\215(\020"
+ .ascii "\214H\214\213\016.\316\202\n\270\3403\260\242\361\301#\nb\214"
+ .ascii "\004\\$\b\3549\335\227a\200Y\365V\335\236\333\347~\337\351\257"
+ .ascii "O\237>}G'\211K\266J\222\003$)\233:f\td\to`\037\237\301_&+\222"
+ .ascii "\244\312Fl\024E6\320\263\236\236u!v=\350\020i\340\255Q6\021O"
+ .ascii "U\030\2756\001\272\225@\312\225\231\242\323\033\214&\310eL\033"
+ .ascii "U\321~?d;\031\264\025\335<*\277R\253\342u\243\264~U\236Z\260"
+ .ascii "\272\022\201\025\032\030\366\300+\210\362\265L\236\326\225\262"
+ .ascii "J\006`\304n\363\335\206\346H\352\312\020\220\260\377`\376r\233"
+ .ascii "\201\025\342\230\330\351\274t\266\n6\354U?\316W\253;U(F\030\203"
+ .ascii "}\300\320%\354\005\033\324\200M\252d\212\004\265\260\201\360"
+ .ascii "\347\275\007+\231\360\255`w\274\3077d\025\276\343\261\373\311"
+ .ascii "x\211\314\312\205\347\233\245\027K\242\354{\204\326\256\350\362"
+ .ascii "\374\202NU\251d:\370Rx\257G\357u\350\235.\204\227\r"
+ .string "\365\324D\bx\365\324\250RV\027\360\306#Zz\f\364v\311\037\362\312Ux\002\344\334\023#\017<1\262e\234=Jh\343\235\243\323<\362z\036\221\212\202\266\263\206R\036a\352n\357\231\373S\025\354\316S\243+\231\201\342\013_\023\225\3028\263w4\322\231H\274CP\005\2372\315_\351\341\341V\206\024\222\006\337\200\023\2324\202\267cG\263\314>\023\201o\236\275x\fP`\200\002_i-S\003\336uD\370\307\322\270\303at\2559\016*3>F\331\350\376\313\n\t3\302Q.\f\004\277\255\243\267V'\204\231\300\217\302\216h\370\017?\272\327\250\210t9\241R2V6\365\316\311\"a\252\275<\035g\206\232\017\033\330n\241\275\245\355\314\016\235\320^\375\275Q-\367r\217\255B\371?Q\3711R\336\262\250|\260i\356\323H\275\002\307\301\016\315x\037\304\273\021\357\003x\177\205\367~\274\033\300\316wG"
+ .string "p\035\244Xd\235\034\367W"
+ .ascii "\373\273\030\263~\276\244\"b!Q\033V\225i\211j\206\177\255LTh"
+ .ascii "\003;R\246\206\317\317\232\331W\202_\323<\277S\257%\304\211j"
+ .ascii "\320\310\355a\310'8Y\235\2716\027\305\374\202\\\374\267\276\021"
+ .ascii "\266\202|2\f\371\024'O\\\237h\265\2626A\036\031\236\270$\257"
+ .ascii " w\204!wr\262\377\352\221[Q\254[\220\217\237\275\360\302\nnW"
+ .ascii "\030\356i\316\275\032\270^``}\202\273\353\312\314\004W\235\257"
+ .ascii "\006\212w\306\323\006\260\300\277\303p\2739\267qv\270O\317\006"
+ .ascii "\004\267\244a\327\035\203\340\336(\335\037[\236\257\362\261-"
+ .ascii "\320\023\206\177F[\322\006\021\366\376\257k,\202Z]{\001\312\353"
+ .ascii "\002\237\023\325(\205V+\316;\313\361\355u\375\275z6,\306\355"
+ .ascii "\n\324\345Gj\023\036(\001U\354p8\027f\324\363\234\335\177\356"
+ .ascii "`"
+ .string "\251\225\335\024\354\351\2077\2765k\354}\213\354\013a\330\0279\273pO\373t$\033\021l\357\241\003G\264ZX=\270\310\3766\f\273we\002/\233\371\324\300\243f\336'F\357\231\274\246h\370\231\351 ^\277\022\377\037\034C\333<K\027\247\372P\277L\265#\376\377\255\315%\2607\226C@\235\237\337\256\252\215\277(g\t\200G\205Y\246\306BM\0045\221\324X\251\211\242&\032\033\313S|aW\341\243\356i\036\355\030\036\265g\370\354c\271\345YnY\315-\317\0210\216\232\347\251\241\003I\267\006\250.\255\305Gc<6\372\027\311\370\022u\3554\314zj6\360a$\251\210\327\317b\206\0076\033\203\302\342\366O\336Z\254\034\274,,)%8/\t\372\227\031\311\021\\&#\\\241V\202\327l\220`\243#\360\341\356f\023U4\035\304\300w`\213\223\306\240\240\370\217\032\f\001\337\234\350(4\022\300\002"
+ .string "W1\202\211\034\343\rb\320U\2344\n\371\036\311\307,\241\325\330\f\022\\\303p\217\301.\265\311Hh\037\213@\374\2534\356\341\242\356&\263@\001\\\0072\265\236\372\342ea)Se\270\201C\t\373|\375]\277^\274\310\205\0014\277j[\263\304i$:M\340\350\005V\020F\201c<p\276 \334\312\347ur~*\020%&.\303 \204\243\373\030\320@\213\314(\021\270pH\353\032i\030\274g\030\006\301\312\201\222t\037"
+ .ascii "#\"\243e\275\221t\340\267\020\3038\375d@K\264\017\327s\024\266"
+ .ascii "W\350\261\363T\350ak\301\250\r\001z\214\246(\327\035\0338\031"
+ .ascii "\025\272\334W\226\256\254\004\303|U\277'y\253\350\360\373\371"
+ .ascii "\370\2045wV\305/\253\210\n:\340\340&\344\362\002R\343\177\030"
+ .ascii "!\346\253\203[`\253\304\260\241\342+\3551\032\031\247!T\3138"
+ .ascii "\005v\355R9\3631\273\216\204\353\270p\241\325\216\302}\202\312"
+ .ascii "'k\327&\2130\2756\277\242\306\2326Y8}\021'G\271\341\357\212\023"
+ .ascii "\206\227PI\355\271\300>s\360\200\366\2615\301\363\271\242\370"
+ .ascii "\257vq>\217\004\317g\030\005'\271\250\350z\231\\P5\3518]\034"
+ .ascii "\241\035\327\3501xZ\267\364\336\337!'5\3622\365g\232\347mtB\002"
+ .ascii "\310\313\030$\241\034\256\323 tN\216\034y3d\213\340\256\245="
+ .ascii "K;\226\207\0239wH\361\323\024\027\202w&\nCL\320\360\2460<C\206"
+ .ascii "\333\220\367\305X\277\201,\370)\214]\232\316G\025U\312\272<\271"
+ .ascii "J\375\034\313\220\2254\316\016\026\f\231\024\372\366\376\001"
+ .ascii "\326q\325\247\006\017\371\365h\201\273\240\370X,\276\371\021"
+ .ascii "\024\356\356\307\256\240\273\002\371q\356\356-w7N\356\236\305"
+ .ascii "7?i\356\306a\301\233\357\261\336&\226{\233$o\253\361\315}\364"
+ .ascii "\206\271p\257\360\352-\263\230\376s\250\032\241\027\356u\357"
+ .ascii "\266\210\374\212\206\377\202M\240\246\r\002\024\207ZB\277\355"
+ .ascii "u`\204)\001zPX\257\201\236\347\316\342\204\261\337$\2146n_-\262"
+ .ascii "\310&lk\2370\213\036,f\321\364B\026\275M.Vd\321\332\260Y\224"
+ .ascii "MB\177\016\311\242\031\312\"\276\372\376\321\326\337\0131\361"
+ .ascii "\217\024\323}\252\366K&>\206\037\006?\206aV\250\2314jb\232\275"
+ .ascii "\307&\025\355\323\030=\212/c\230C\002\0266\236\264XA\330\024"
+ .ascii "\315da#\256\307\352D\025(\270\023\327\363\352\312\301\246\220"
+ .ascii "\r\273\201\252\t\375\365\351\333\327\244\210\002C\345u\236\027"
+ .ascii "\346\202\342\366\317\372R\026\213}\360\377\221\016\024Pe:#V\034"
+ .ascii "\b\301\302i\301\302\231'\2079\020\220\030'\375@\3770\351*\242"
+ .ascii "\005\300\347|p\346\270\262\223=\256\367\234Y9\351)\357\247fd"
+ .ascii "8\335\233S\263\222].\247'{K\326_\0223S\263\323R\2359\331.g\232"
+ .ascii "\313\225\230\231\234\236\225\221\236\225\352\314H\177'\315\235"
+ .ascii "\271\331\351I\315\361\344lI\367\004-\0167r\023]\t\tN\207c\231"
+ .ascii "\215\254\211.wf\246;\313\231\341voNLKLr\270$\213\003!\233R\222"
+ .ascii "=\311\233\322\225\364m\216\215:wVJ\352V\226\234\375\201\305\341"
+ .ascii "z\017El\312r;~\023\241=g&os\374\332\340\310\361\244nv\374\312"
+ .ascii "\344p\277\373nN\252\307\361[\223\343\035\367\226\254\024G\322"
+ .ascii "F\363\202-)IN\337\006i\360"
+ .ascii ">l\215\315toul\371\345\353\361\366\215\257\331\355.Orz\212c\333"
+ .ascii "\033\022\373]\266\024\263\344\225\007_|\360F\030#\242\377\007"
+ .ascii "!Kr\335"
+ .text
+ .section .gnu.offload_lto_.symbol_nodes.f031cb8759bb7418,"e",@progbits
+ .string "x\234ce``\320\003b\006&\236z\006\206\t\347\030\030\200\324\212\205\013\0170300\362\3263\202\205\030\030\032\032\024\030\030\230\031\030\031\216\264\277\231\317\301"
+ .string ""
+ .ascii "\004N\0139"
+ .text
+ .section .gnu.offload_lto_.refs.f031cb8759bb7418,"e",@progbits
+ .string "x\234ce```\004b\006"
+ .string ""
+ .string ";"
+ .ascii "\007"
+ .text
+ .section .gnu.offload_lto_.offload_table.f031cb8759bb7418,"e",@progbits
+ .string "x\234ce```\006bF\006\006"
+ .string ""
+ .string "Z"
+ .ascii "\n"
+ .text
+ .section .gnu.offload_lto_.decls.f031cb8759bb7418,"e",@progbits
+ .ascii "x\234\215T\337O\333U\024\377\236\336oa\226\2262@C\f\017d!\031"
+ .ascii "\311\322v\350\037\240\017>\360\270\355\3057I\375\322\261F\370"
+ .ascii "\226\264_4{\362\322\221XA\035L\030Jp\351\346F\221!k\351X\367"
+ .ascii "\013\2500\030l\300\306&\242\213 \272\200/\023\331d\262lq\365"
+ .ascii "\334{\373\205\002\242\336\344\334~\317\271\237\3639\347\334{"
+ .ascii "N\215\222X?\247I\322\f\376~\206\222\300e\300\337\003\322\372"
+ .ascii "bz\030%\"m]\314nM\342\263QrQ\206Q\366\243\354C)A\031B\351G\351"
+ .ascii "C\031E\331\2152\2012\216r/\311?\200\022\377\217XM\004\306\347"
+ .ascii "\342\2473\346\340G@\025\330\302}#\b\222E\360\005\034\245\327"
+ .ascii " \255A7;I lI\360\026\312T\302u\216d|\236\213\b\260\201\017\203"
+ .ascii "nJ\006\326\230ak\246\234\b\364L6\227$ID\277\003\332\031x\f?\301"
+ .ascii ",p\255g52b&\030\t\277\037?j9m%;\371\367\330\350\261\363f\331"
+ .ascii " A;\020\341\367p\242\365~f=\201}&(@\365\344\374\303f\006\220"
+ .ascii "\240\003\3629\340\346\344\314\220\221Y$\370\ndfy\336\031\355"
+ .ascii "5\350\030\350\024\306\225\313\253\263F\262\233GI4v\f\247\261"
+ .ascii "(]@\340,\310\020\022\230\356\256ga\023w\334o\206\267\270\351"
+ .ascii "\367\225\346\017w\344\035\247\270\226kh\026\300\201\227\340\325"
+ .ascii "|\330\213\261\341\034\344\341\336\215\276<\221\341\253\023\027"
+ .ascii "\255\273\2024\036\244\363\261O\215\237\237\210\323#\224\326\310"
+ .ascii "\f\007gx=\3605\006\343\237\360%\314\341\265\205a\027z\266/\236"
+ .ascii "\250I\253#$\232&A\204[Z\257\254\3340b\325\331&\330\201j"
+ .string "l\351l\314\202j\256\t\342,T\355\365\2431\003\271\004\274\232?f\273\226\254d@(w\226\307\277\310\020\216Y\250^X\034\0331\tG\n\250?\273\037\353M'\023\002{t\241\277\305\"\260\r\324\200\206\245\310jM\232)\300\212\r\372\351\316\232bv\024\305;\354aw\r\227\301\204\273#\037.\241\212\360\217>Y>'\013\377k\224\245\331\026\231~n\020\321\202\265<\321\370J\364\003\310n`\214\270\025\310\354\031\257\"\341\025N\330\007\331:-\007O\266\005\300\\G\251_\017\217\200\224\370\375`\026\315$7O\306#2\024\026\346\024\235b\350\371\330\020\t\371i\340H:\224\230\341<>\3527\220\263\346\210\031\243s1n\022G\007\027\032\200\3762\017\214\017.\376#8\312\342I\215\224\006h\330O_\317\223 \206\347\027\330\371 \0241\007\004\r@!\304\241\2207x;\253\220\277:\215gq\302^ )\377"
+ .ascii "\277u\207Z2r\222\204\301\377A\210>\017\236\034{\323R\237r\033"
+ .ascii "C)yJp\r,I\340\355\216!j\325\221\376m\2210\f\026DO\325\206\333"
+ .ascii "\210x59e\276r\371|\221\256xS\263\025L\371\200\336\300;\241\301"
+ .ascii "O_(\221\340:\032F8\335(o\203\033\250\217\241\336\212%\321\345"
+ .ascii "D\342\375D\"\235\037q\003m<\365F\240\361en`\333\206\341\302\036"
+ .ascii "\330f\270\360D\037\256q>\\7\365\341\252\3778\324\277\2617\020"
+ .ascii "\220R\343\204\350\r\366\313F1\261\032\267\212\032Y_\327>8^g\331"
+ .ascii "\276\257o\351}=)\372z\360Q8d\026\336\204ME\355\302|\206\270\241"
+ .ascii "<T'Cg\232LdJ\214\320\364\223\276\247fr\227+\362X\357\323\036"
+ .ascii "\302\357\256H\277\274\345\223\224\340c\213\242\340;|\352oy\260"
+ .ascii "i\f>\243\337\036E2fn\027c\322 KL\205\273\311\016\202;\354C\334"
+ .ascii "\363k\034\310ns\365\327\2772\327\007\213\023\300\024\262\336"
+ .ascii "\346%}\317'\353\226>Y-=\177\006eS2\243\300\277f\204\350\305p"
+ .ascii "t\320\222\312\375\303\266\334p\017\225\314J\247[\265\227z*\253"
+ .ascii "J\017\252\366\275/\342G\201\346\364\226\273\264\002\227\252y"
+ .ascii "\017Wy\334\252f\3618\025\245\340`\265\252hn\217\352\007\207O"
+ .ascii "\361:5\345\220C\365\271\313\336sUT8<U.\0251\016\315[\255\276"
+ .ascii "c\253t!\201\303\347U\034\345\212bc\021*\334\252\313Q\341~\273"
+ .ascii "\034\371\035\232\313\247\371\252\335\332\232\305\316\370m\312"
+ .ascii "\236=\016\273}\223\215Ym\212\247\262\322\243:*<\236*[\271\255"
+ .ascii "\330\256HF\217Z\346z\327bgy\22795g\251f\177\2058\275\207A\221"
+ .ascii "\225CN\357\337(\266/\210"
+ .text
+ .section .gnu.offload_lto_.symtab.f031cb8759bb7418,"e",@progbits
+ .text
+ .section .gnu.offload_lto_.opts,"e",@progbits
+ .string "'-fexceptions' '-fmath-errno' '-fsigned-zeros' '-ftrapping-math' '-fno-trapv' '-fno-strict-overflow' '-fno-openmp' '-foffload-abi=lp64' '-fopenacc'"
+ .text
+ .section .gnu.offload_lto_.mode_table.f031cb8759bb7418,"e",@progbits
+ .string "x\234ce\200"
+ .string "e \026"
+ .string "\342\376#\r\035\r{:\004&\2664-h8\322\0210\251\245\345@\303\211\216\t\314\223[:\032\032\317t\\`f`\016\364d`\016\006b\027 \016\361d"
+ .string ""
+ .ascii "\225\020\024\253"
+ .text
+ .section .rodata
+.LC0:
+ .string "ary[%d]=%x expected %x\n"
+ .text
+ .globl main
+ .type main, @function
+main:
+.LFB11:
+ .cfi_startproc
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset 6, -16
+ movq %rsp, %rbp
+ .cfi_def_cfa_register 6
+ subq $131216, %rsp
+ movl $0, -8(%rbp)
+ movl $0, -131204(%rbp)
+ movl $0, -4(%rbp)
+.L3:
+ cmpl $32784, -4(%rbp)
+ jg .L2
+ movl -4(%rbp), %eax
+ cltq
+ movl $-1, -131200(%rbp,%rax,4)
+ addl $1, -4(%rbp)
+ jmp .L3
+.L2:
+ leaq -131204(%rbp), %rax
+ movq %rax, -48(%rbp)
+ leaq -131200(%rbp), %rax
+ movq %rax, -40(%rbp)
+ leaq -48(%rbp), %rax
+ subq $8, %rsp
+ pushq $0
+ movl $_ZZ4mainE17.omp_data_kinds.5, %r9d
+ movl $_ZZ4mainE17.omp_data_sizes.4, %r8d
+ movq %rax, %rcx
+ movl $2, %edx
+ movl $main._omp_fn.0, %esi
+ movl $-1, %edi
+ movl $0, %eax
+ call GOACC_parallel_keyed
+ addq $16, %rsp
+ movl $0, -4(%rbp)
+.L7:
+ cmpl $32784, -4(%rbp)
+ jg .L4
+ movl -4(%rbp), %eax
+ movl %eax, -12(%rbp)
+ movl -131204(%rbp), %eax
+ testl %eax, %eax
+ je .L5
+ movl -4(%rbp), %eax
+ movslq %eax, %rdx
+ imulq $2145388543, %rdx, %rdx
+ shrq $32, %rdx
+ sarl $9, %edx
+ sarl $31, %eax
+ subl %eax, %edx
+ movl %edx, %eax
+ movl %eax, -16(%rbp)
+ movl $0, -20(%rbp)
+ movl $0, -24(%rbp)
+ movl -16(%rbp), %eax
+ sall $16, %eax
+ movl %eax, %edx
+ movl -20(%rbp), %eax
+ sall $8, %eax
+ orl %edx, %eax
+ orl -24(%rbp), %eax
+ movl %eax, -12(%rbp)
+.L5:
+ movl -4(%rbp), %eax
+ cltq
+ movl -131200(%rbp,%rax,4), %eax
+ cmpl -12(%rbp), %eax
+ je .L6
+ movl $1, -8(%rbp)
+ movl -4(%rbp), %eax
+ cltq
+ movl -131200(%rbp,%rax,4), %edx
+ movl -12(%rbp), %ecx
+ movl -4(%rbp), %eax
+ movl %eax, %esi
+ movl $.LC0, %edi
+ movl $0, %eax
+ call printf
+.L6:
+ addl $1, -4(%rbp)
+ jmp .L7
+.L4:
+ movl -8(%rbp), %eax
+ leave
+ .cfi_def_cfa 7, 8
+ ret
+ .cfi_endproc
+.LFE11:
+ .size main, .-main
+ .type main._omp_fn.0, @function
+main._omp_fn.0:
+.LFB12:
+ .cfi_startproc
+ pushq %rbp
+ .cfi_def_cfa_offset 16
+ .cfi_offset 6, -16
+ movq %rsp, %rbp
+ .cfi_def_cfa_register 6
+ pushq %r15
+ pushq %r14
+ pushq %r13
+ pushq %r12
+ pushq %rbx
+ subq $40, %rsp
+ .cfi_offset 15, -24
+ .cfi_offset 14, -32
+ .cfi_offset 13, -40
+ .cfi_offset 12, -48
+ .cfi_offset 3, -56
+ movq %rdi, -72(%rbp)
+ movl $0, %r12d
+ movl $1, %r14d
+ movl $1, %eax
+ movl %eax, %r15d
+.L15:
+ movl $0, %eax
+ movl %eax, %ebx
+ movl $32785, %r13d
+ cmpl %r13d, %ebx
+ jge .L10
+.L13:
+ movl %ebx, %eax
+ movl %eax, -52(%rbp)
+ movl $5, %edi
+ call acc_on_device
+ testl %eax, %eax
+ jne .L11
+ jmp .L16
+.L14:
+ addl %r15d, %ebx
+ cmpl %r13d, %ebx
+ jl .L13
+ jmp .L10
+.L16:
+ movl -52(%rbp), %ecx
+ movq -72(%rbp), %rax
+ movq 8(%rax), %rax
+ movl -52(%rbp), %edx
+ movl %ecx, (%rax,%rdx,4)
+ jmp .L14
+.L11:
+ movl $0, -56(%rbp)
+ movl $0, -60(%rbp)
+ movl $0, -64(%rbp)
+#APP
+# 26 "/scratch/nsidwell/openacc/trunk-merge/src/gcc-mainline/libgomp/testsuite/libgomp.oacc-c++/../libgomp.oacc-c-c++-common/loop-g-1.c" 1
+ mov.u32 %eax,%ctaid.x;
+# 0 "" 2
+#NO_APP
+ movl %eax, -56(%rbp)
+#APP
+# 27 "/scratch/nsidwell/openacc/trunk-merge/src/gcc-mainline/libgomp/testsuite/libgomp.oacc-c++/../libgomp.oacc-c-c++-common/loop-g-1.c" 1
+ mov.u32 %eax,%tid.y;
+# 0 "" 2
+#NO_APP
+ movl %eax, -60(%rbp)
+#APP
+# 28 "/scratch/nsidwell/openacc/trunk-merge/src/gcc-mainline/libgomp/testsuite/libgomp.oacc-c++/../libgomp.oacc-c-c++-common/loop-g-1.c" 1
+ mov.u32 %eax,%tid.x;
+# 0 "" 2
+#NO_APP
+ movl %eax, -64(%rbp)
+ movl -56(%rbp), %eax
+ sall $16, %eax
+ movl %eax, %edx
+ movl -60(%rbp), %eax
+ sall $8, %eax
+ orl %edx, %eax
+ orl -64(%rbp), %eax
+ movl %eax, %ecx
+ movq -72(%rbp), %rax
+ movq 8(%rax), %rax
+ movl -52(%rbp), %edx
+ movl %ecx, (%rax,%rdx,4)
+ movq -72(%rbp), %rax
+ movq (%rax), %rax
+ movl $1, (%rax)
+ jmp .L14
+.L10:
+ addl $1, %r12d
+ cmpl %r14d, %r12d
+ jl .L15
+ movl $32785, -52(%rbp)
+ addq $40, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ popq %r15
+ popq %rbp
+ .cfi_def_cfa 7, 8
+ ret
+ .cfi_endproc
+.LFE12:
+ .size main._omp_fn.0, .-main._omp_fn.0
+ .data
+ .align 16
+ .type _ZZ4mainE17.omp_data_sizes.4, @object
+ .size _ZZ4mainE17.omp_data_sizes.4, 16
+_ZZ4mainE17.omp_data_sizes.4:
+ .quad 4
+ .quad 131140
+ .align 2
+ .type _ZZ4mainE17.omp_data_kinds.5, @object
+ .size _ZZ4mainE17.omp_data_kinds.5, 4
+_ZZ4mainE17.omp_data_kinds.5:
+ .value 643
+ .value 643
+ .section .gnu.offload_vars,"aw",@progbits
+ .align 8
+ .type .offload_var_table, @object
+ .size .offload_var_table, 0
+.offload_var_table:
+ .section .gnu.offload_funcs,"aw",@progbits
+ .align 8
+ .type .offload_func_table, @object
+ .size .offload_func_table, 8
+.offload_func_table:
+ .quad main._omp_fn.0
+ .comm __gnu_lto_v1,1,1
+ .ident "GCC: (Sourcery CodeBench (OpenACC/PTX) Lite 2016.05-999999) 6.0.0 20151019 (experimental)"
+ .section .note.GNU-stack,"",@progbits
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c (revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-g-2.c (working copy)
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+ int ary[N];
+ int ix;
+ int exit = 0;
+ int ondev = 0;
+
+ for (ix = 0; ix < N;ix++)
+ ary[ix] = -1;
+
+#pragma acc parallel num_gangs(32) vector_length(32) copy(ary) copy(ondev)
+ {
+#pragma acc loop gang (static:1)
+ for (unsigned ix = 0; ix < N; ix++)
+ {
+ if (__builtin_acc_on_device (5))
+ {
+ int g = 0, w = 0, v = 0;
+
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+ __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+ __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ ary[ix] = (g << 16) | (w << 8) | v;
+ ondev = 1;
+ }
+ else
+ ary[ix] = ix;
+ }
+ }
+
+ for (ix = 0; ix < N; ix++)
+ {
+ int expected = ix;
+ if(ondev)
+ {
+ int g = ix % 32;
+ int w = 0;
+ int v = 0;
+
+ expected = (g << 16) | (w << 8) | v;
+ }
+
+ if (ary[ix] != expected)
+ {
+ exit = 1;
+ printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected);
+ }
+ }
+
+ return exit;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c (revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c (working copy)
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+ int ary[N];
+ int ix;
+ int exit = 0;
+ int ondev = 0;
+
+ for (ix = 0; ix < N;ix++)
+ ary[ix] = -1;
+
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev)
+ {
+#pragma acc loop gang worker vector
+ for (unsigned ix = 0; ix < N; ix++)
+ {
+ if (__builtin_acc_on_device (5))
+ {
+ int g = 0, w = 0, v = 0;
+
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+ __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+ __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ ary[ix] = (g << 16) | (w << 8) | v;
+ ondev = 1;
+ }
+ else
+ ary[ix] = ix;
+ }
+ }
+
+ for (ix = 0; ix < N; ix++)
+ {
+ int expected = ix;
+ if(ondev)
+ {
+ int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+
+ int g = ix / (chunk_size * 32 * 32);
+ int w = ix / 32 % 32;
+ int v = ix % 32;
+
+ expected = (g << 16) | (w << 8) | v;
+ }
+
+ if (ary[ix] != expected)
+ {
+ exit = 1;
+ printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected);
+ }
+ }
+
+ return exit;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c (revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c (working copy)
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+ int ary[N];
+ int ix;
+ int exit = 0;
+ int ondev = 0;
+
+ for (ix = 0; ix < N;ix++)
+ ary[ix] = -1;
+
+#pragma acc parallel vector_length(32) copy(ary) copy(ondev)
+ {
+#pragma acc loop vector
+ for (unsigned ix = 0; ix < N; ix++)
+ {
+ if (__builtin_acc_on_device (5))
+ {
+ int g = 0, w = 0, v = 0;
+
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+ __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+ __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ ary[ix] = (g << 16) | (w << 8) | v;
+ ondev = 1;
+ }
+ else
+ ary[ix] = ix;
+ }
+ }
+
+ for (ix = 0; ix < N; ix++)
+ {
+ int expected = ix;
+ if(ondev)
+ {
+ int g = 0;
+ int w = 0;
+ int v = ix % 32;
+
+ expected = (g << 16) | (w << 8) | v;
+ }
+
+ if (ary[ix] != expected)
+ {
+ exit = 1;
+ printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected);
+ }
+ }
+
+ return exit;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c (revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c (working copy)
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+ int ary[N];
+ int ix;
+ int exit = 0;
+ int ondev = 0;
+
+ for (ix = 0; ix < N;ix++)
+ ary[ix] = -1;
+
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+ {
+#pragma acc loop worker
+ for (unsigned ix = 0; ix < N; ix++)
+ {
+ if (__builtin_acc_on_device (5))
+ {
+ int g = 0, w = 0, v = 0;
+
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+ __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+ __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ ary[ix] = (g << 16) | (w << 8) | v;
+ ondev = 1;
+ }
+ else
+ ary[ix] = ix;
+ }
+ }
+
+ for (ix = 0; ix < N; ix++)
+ {
+ int expected = ix;
+ if(ondev)
+ {
+ int g = 0;
+ int w = ix % 32;
+ int v = 0;
+
+ expected = (g << 16) | (w << 8) | v;
+ }
+
+ if (ary[ix] != expected)
+ {
+ exit = 1;
+ printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected);
+ }
+ }
+
+ return exit;
+}
Index: libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
===================================================================
--- libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c (revision 0)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c (working copy)
@@ -0,0 +1,57 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O2" */
+
+#include <stdio.h>
+
+#define N (32*32*32+17)
+int main ()
+{
+ int ary[N];
+ int ix;
+ int exit = 0;
+ int ondev = 0;
+
+ for (ix = 0; ix < N;ix++)
+ ary[ix] = -1;
+
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+ {
+#pragma acc loop worker vector
+ for (unsigned ix = 0; ix < N; ix++)
+ {
+ if (__builtin_acc_on_device (5))
+ {
+ int g = 0, w = 0, v = 0;
+
+ __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
+ __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
+ __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
+ ary[ix] = (g << 16) | (w << 8) | v;
+ ondev = 1;
+ }
+ else
+ ary[ix] = ix;
+ }
+ }
+
+ for (ix = 0; ix < N; ix++)
+ {
+ int expected = ix;
+ if(ondev)
+ {
+ int g = 0;
+ int w = (ix / 32) % 32;
+ int v = ix % 32;
+
+ expected = (g << 16) | (w << 8) | v;
+ }
+
+ if (ary[ix] != expected)
+ {
+ exit = 1;
+ printf ("ary[%d]=%x expected %x\n", ix, ary[ix], expected);
+ }
+ }
+
+ return exit;
+}