On Wed, Sep 18, 2013 at 11:54:35AM +0400, Kirill Yukhin wrote: > Hello, > It seems that currently GOMP_target perform call to host variant of the > routine: > > void > GOMP_target (int device, void (*fn) (void *), const char *fnname, > size_t mapnum, void **hostaddrs, size_t *sizes, > unsigned char *kinds) > { > device = resolve_device (device); > if (device == -1) > { > /* Host fallback. */ > fn (hostaddrs); > return; > } > ... > }
The problem with that is that what GOMP_target does right now for host fallback is not sufficient, but unfortunately I haven't been able to get answers to all my questions about what exactly must be done yet. The answers I got so far are that test1.c should print 0 0 0 test2.c should print 1 test3.c should print 1 test4.c should pass and what test5.c should print I have no idea (does ICC already support this and can you see what it prints?). In any case, the target construct even in the host fallback should have its own ICV set, so there is something that needs to be done in GOMP_target before calling the fn (hostaddrs) function, and there is something that needs to be done after fn (hostaddrs) returns. So, the only way to do what you are suggesting would be to have another call, GOMP_target_end or whatever, that would do the actions to free/unroll the ICVs. But then that would mean e.g. the ICVs have to be malloced, can't be for the host fallback e.g. in an automatic variable. test1.c: #include <omp.h> #include <stdio.h> int main () { printf ("%d\n", omp_get_level ()); #pragma omp target if (0) printf ("%d\n", omp_get_level ()); #pragma omp target if (0) #pragma omp teams printf ("%d\n", omp_get_level ()); return 0; } test2.c: #include <omp.h> #include <stdio.h> int main () { omp_set_dynamic (0); #pragma omp parallel num_threads (4) #pragma omp target if (0) #pragma omp single printf ("%d\n", omp_get_num_threads ()); return 0; } test3.c: #include <omp.h> #include <stdio.h> int main () { #pragma omp target if (0) #pragma omp teams thread_limit (1) printf ("%d\n", omp_get_thread_limit ()); return 0; } test4.c: #include <omp.h> #include <assert.h> int main () { omp_set_dynamic (1); int v = omp_get_dynamic (); #pragma omp target if (0) omp_set_dynamic (0); assert (v == omp_get_dynamic ()); return 0; } test5.c: #include <omp.h> #include <stdio.h> int main () { omp_set_dynamic (0); omp_set_nested (1); #pragma omp parallel num_threads (3) if (omp_get_thread_num () == 2) { #pragma omp parallel num_threads (3) if (omp_get_thread_num () == 1) { #pragma omp target if (0) { printf ("inp %d\n", omp_in_parallel ()); #pragma omp parallel num_threads (2) printf ("%d %d %d %d %d\n", omp_get_level (), omp_get_ancestor_thread_num (0), omp_get_ancestor_thread_num (1), omp_get_ancestor_thread_num (2), omp_get_ancestor_thread_num (3)); } } } return 0; } Jakub