On Fri, Sep 13, 2013 at 05:41:03PM +0200, Marek Polacek wrote: > On Fri, Sep 13, 2013 at 05:35:27PM +0200, Jakub Jelinek wrote: > > On Fri, Sep 13, 2013 at 03:15:56PM +0200, Jakub Jelinek wrote: > > > On Fri, Sep 13, 2013 at 05:11:09PM +0400, Michael V. Zolotukhin wrote: > > > > > FYI, I'm attaching a WIP patch with the splay tree stuff. > > > > Thanks, I'll take a look. By the way, isn't it better to move > > > > splay-tree > > > > implementation to a separate file? > > > > > > As it is just a few routines, heavily modified from include/splay-tree.h > > > (e.g. the data structures contain all the target.c specific stuff), and > > > will be > > > used just in target.c, I think it is fine to keep it in target.c. > > > > Anyway, here is an updated patch that moves the splay stuff into > > splay-tree.h and cleans up a bunch of other things. > > > > Will commit once the http://gcc.gnu.org/ml/gcc-patches/2013-09/msg01044.html > > issue is resolved. > > > > 2013-09-13 Jakub Jelinek <ja...@redhat.com> > > > > * ipa-prop.c (ipa_compute_jump_functions_for_edge): Return early > > for internal calls. > > Seems like a wrong patch is attached.
You're right, here is the right one. 2013-09-13 Jakub Jelinek <ja...@redhat.com> * splay-tree.h: New file. * target.c: Include stdbool.h. (splay_tree_node, splay_tree, splay_tree_key): New typedefs. (struct target_mem_desc, struct splay_tree_key_s): New structures. (splay_compare): New inline function. (gomp_get_num_devices): New function. (resolve_device): Use default_device_var ICV. Add temporarily magic testing device number 257. (dev_splay_tree, dev_env_lock): New variables. (gomp_map_vars_existing, gomp_map_vars, gomp_unmap_tgt, gomp_unmap_vars, gomp_update): New functions. (GOMP_target, GOMP_target_data, GOMP_target_end_data, GOMP_target_update): Add support for magic testing device number 257. * libgomp.h (struct target_mem_desc): Forward declare. (struct gomp_task_icv): Add default_device_var and target_data. (gomp_get_num_devices): New prototype. * env.c (gomp_global_icv): Add default_device_var initializer. (parse_int): New function. (handle_omp_display_env): Print OMP_DEFAULT_DEVICE. (initialize_env): Initialize default_device_var. (omp_set_default_device): Set default_device_var ICV. (omp_get_default_device): Query default_device_var ICV. (omp_get_num_devices): Call gomp_get_num_devices. (omp_get_num_teams, omp_get_team_num, omp_is_initial_device): Add comments. --- libgomp/splay-tree.h.jj 2013-09-13 16:32:48.381973559 +0200 +++ libgomp/splay-tree.h 2013-09-13 16:41:38.059701560 +0200 @@ -0,0 +1,232 @@ +/* A splay-tree datatype. + Copyright 1998-2013 + Free Software Foundation, Inc. + Contributed by Mark Mitchell (m...@markmitchell.com). + + This file is part of the GNU OpenMP Library (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* The splay tree code copied from include/splay-tree.h and adjusted, + so that all the data lives directly in splay_tree_node_s structure + and no extra allocations are needed. + + Files including this header should before including it add: +typedef struct splay_tree_node_s *splay_tree_node; +typedef struct splay_tree_s *splay_tree; +typedef struct splay_tree_key_s *splay_tree_key; + define splay_tree_key_s structure, and define + splay_compare inline function. */ + +/* For an easily readable description of splay-trees, see: + + Lewis, Harry R. and Denenberg, Larry. Data Structures and Their + Algorithms. Harper-Collins, Inc. 1991. + + The major feature of splay trees is that all basic tree operations + are amortized O(log n) time for a tree with n nodes. */ + +/* The nodes in the splay tree. */ +struct splay_tree_node_s { + struct splay_tree_key_s key; + /* The left and right children, respectively. */ + splay_tree_node left; + splay_tree_node right; +}; + +/* The splay tree. */ +struct splay_tree_s { + splay_tree_node root; +}; + +/* Rotate the edge joining the left child N with its parent P. PP is the + grandparents' pointer to P. */ + +static inline void +rotate_left (splay_tree_node *pp, splay_tree_node p, splay_tree_node n) +{ + splay_tree_node tmp; + tmp = n->right; + n->right = p; + p->left = tmp; + *pp = n; +} + +/* Rotate the edge joining the right child N with its parent P. PP is the + grandparents' pointer to P. */ + +static inline void +rotate_right (splay_tree_node *pp, splay_tree_node p, splay_tree_node n) +{ + splay_tree_node tmp; + tmp = n->left; + n->left = p; + p->right = tmp; + *pp = n; +} + +/* Bottom up splay of KEY. */ + +static void +splay_tree_splay (splay_tree sp, splay_tree_key key) +{ + if (sp->root == NULL) + return; + + do { + int cmp1, cmp2; + splay_tree_node n, c; + + n = sp->root; + cmp1 = splay_compare (key, &n->key); + + /* Found. */ + if (cmp1 == 0) + return; + + /* Left or right? If no child, then we're done. */ + if (cmp1 < 0) + c = n->left; + else + c = n->right; + if (!c) + return; + + /* Next one left or right? If found or no child, we're done + after one rotation. */ + cmp2 = splay_compare (key, &c->key); + if (cmp2 == 0 + || (cmp2 < 0 && !c->left) + || (cmp2 > 0 && !c->right)) + { + if (cmp1 < 0) + rotate_left (&sp->root, n, c); + else + rotate_right (&sp->root, n, c); + return; + } + + /* Now we have the four cases of double-rotation. */ + if (cmp1 < 0 && cmp2 < 0) + { + rotate_left (&n->left, c, c->left); + rotate_left (&sp->root, n, n->left); + } + else if (cmp1 > 0 && cmp2 > 0) + { + rotate_right (&n->right, c, c->right); + rotate_right (&sp->root, n, n->right); + } + else if (cmp1 < 0 && cmp2 > 0) + { + rotate_right (&n->left, c, c->right); + rotate_left (&sp->root, n, n->left); + } + else if (cmp1 > 0 && cmp2 < 0) + { + rotate_left (&n->right, c, c->left); + rotate_right (&sp->root, n, n->right); + } + } while (1); +} + +/* Insert a new NODE into SP. The NODE shouldn't exist in the tree. */ + +static void +splay_tree_insert (splay_tree sp, splay_tree_node node) +{ + int comparison = 0; + + splay_tree_splay (sp, &node->key); + + if (sp->root) + comparison = splay_compare (&sp->root->key, &node->key); + + if (sp->root && comparison == 0) + abort (); + else + { + /* Insert it at the root. */ + if (sp->root == NULL) + node->left = node->right = NULL; + else if (comparison < 0) + { + node->left = sp->root; + node->right = node->left->right; + node->left->right = NULL; + } + else + { + node->right = sp->root; + node->left = node->right->left; + node->right->left = NULL; + } + + sp->root = node; + } +} + +/* Remove node with KEY from SP. It is not an error if it did not exist. */ + +static void +splay_tree_remove (splay_tree sp, splay_tree_key key) +{ + splay_tree_splay (sp, key); + + if (sp->root && splay_compare (&sp->root->key, key) == 0) + { + splay_tree_node left, right; + + left = sp->root->left; + right = sp->root->right; + + /* One of the children is now the root. Doesn't matter much + which, so long as we preserve the properties of the tree. */ + if (left) + { + sp->root = left; + + /* If there was a right child as well, hang it off the + right-most leaf of the left child. */ + if (right) + { + while (left->right) + left = left->right; + left->right = right; + } + } + else + sp->root = right; + } +} + +/* Lookup KEY in SP, returning NODE if present, and NULL + otherwise. */ + +static splay_tree_key +splay_tree_lookup (splay_tree sp, splay_tree_key key) +{ + splay_tree_splay (sp, key); + + if (sp->root && splay_compare (&sp->root->key, key) == 0) + return &sp->root->key; + else + return NULL; +} --- libgomp/target.c.jj 2013-09-09 17:41:02.290429613 +0200 +++ libgomp/target.c 2013-09-13 16:41:24.514770386 +0200 @@ -26,15 +26,383 @@ creation and termination. */ #include "libgomp.h" +#include <stdbool.h> #include <stdlib.h> #include <string.h> +/* Forward declaration for a node in the tree. */ +typedef struct splay_tree_node_s *splay_tree_node; +typedef struct splay_tree_s *splay_tree; +typedef struct splay_tree_key_s *splay_tree_key; + +struct target_mem_desc { + /* Reference count. */ + uintptr_t refcount; + /* All the splay nodes allocated together. */ + splay_tree_node array; + /* Start of the target region. */ + uintptr_t tgt_start; + /* End of the targer region. */ + uintptr_t tgt_end; + /* Handle to free. */ + void *to_free; + /* Previous target_mem_desc. */ + struct target_mem_desc *prev; + /* Number of items in following list. */ + size_t list_count; + /* List of splay keys to remove (or decrease refcount) + at the end of region. */ + splay_tree_key list[]; +}; + +struct splay_tree_key_s { + /* Address of the host object. */ + uintptr_t host_start; + /* Address immediately after the host object. */ + uintptr_t host_end; + /* Descriptor of the target memory. */ + struct target_mem_desc *tgt; + /* Offset from tgt->tgt_start to the start of the target object. */ + uintptr_t tgt_offset; + /* Reference count. */ + uintptr_t refcount; + /* True if data should be copied from device to host at the end. */ + bool copy_from; +}; + +/* The comparison function. */ + +static int +splay_compare (splay_tree_key x, splay_tree_key y) +{ + if (x->host_start == x->host_end + && y->host_start == y->host_end) + return 0; + if (x->host_end <= y->host_start) + return -1; + if (x->host_start >= y->host_end) + return 1; + return 0; +} + +#include "splay-tree.h" + +attribute_hidden int +gomp_get_num_devices (void) +{ + /* FIXME: Scan supported accelerators when called the first time. */ + return 0; +} + static int resolve_device (int device) { + if (device == -1) + { + struct gomp_task_icv *icv = gomp_icv (false); + device = icv->default_device_var; + } + /* FIXME: Temporary hack for testing non-shared address spaces on host. */ + if (device == 257) + return 257; + if (device >= gomp_get_num_devices ()) + return -1; return -1; } +/* These variables would be per-accelerator (which doesn't have shared address + space. */ +static struct splay_tree_s dev_splay_tree; +static gomp_mutex_t dev_env_lock; + +/* Handle the case where splay_tree_lookup found oldn for newn. + Helper function of gomp_map_vars. */ + +static inline void +gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn, + unsigned char kind) +{ + if (oldn->host_start > newn->host_start + || oldn->host_end < newn->host_end) + gomp_fatal ("Trying to map into device [%p..%p) object when" + "[%p..%p) is already mapped", + (void *) newn->host_start, (void *) newn->host_end, + (void *) oldn->host_start, (void *) oldn->host_end); + if (((kind & 7) == 2 || (kind & 7) == 3) + && !oldn->copy_from + && oldn->host_start == newn->host_start + && oldn->host_end == newn->host_end) + oldn->copy_from = true; + oldn->refcount++; +} + +static struct target_mem_desc * +gomp_map_vars (size_t mapnum, void **hostaddrs, size_t *sizes, + unsigned char *kinds, bool is_target) +{ + size_t i, tgt_align, tgt_size, not_found_cnt = 0; + struct splay_tree_key_s cur_node; + struct target_mem_desc *tgt + = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * mapnum); + tgt->list_count = mapnum; + tgt->refcount = 1; + + if (mapnum == 0) + return tgt; + + tgt_align = sizeof (void *); + tgt_size = 0; + if (is_target) + { + size_t align = 4 * sizeof (void *); + tgt_align = align; + tgt_size = mapnum * sizeof (void *); + } + + gomp_mutex_lock (&dev_env_lock); + for (i = 0; i < mapnum; i++) + { + cur_node.host_start = (uintptr_t) hostaddrs[i]; + if ((kinds[i] & 7) != 4) + cur_node.host_end = cur_node.host_start + sizes[i]; + else + cur_node.host_end = cur_node.host_start + sizeof (void *); + splay_tree_key n = splay_tree_lookup (&dev_splay_tree, &cur_node); + if (n) + { + tgt->list[i] = n; + gomp_map_vars_existing (n, &cur_node, kinds[i]); + } + else + { + size_t align = (size_t) 1 << (kinds[i] >> 3); + tgt->list[i] = NULL; + not_found_cnt++; + if (tgt_align < align) + tgt_align = align; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + tgt_size += cur_node.host_end - cur_node.host_start; + } + } + + if (not_found_cnt || is_target) + { + /* FIXME: This would be accelerator memory allocation, not + host, and should allocate tgt_align aligned tgt_size block + of memory. */ + tgt->to_free = gomp_malloc (tgt_size + tgt_align - 1); + tgt->tgt_start = (uintptr_t) tgt->to_free; + tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); + tgt->tgt_end = tgt->tgt_start + tgt_size; + } + + tgt_size = 0; + if (is_target) + tgt_size = mapnum * sizeof (void *); + + if (not_found_cnt) + { + tgt->array = gomp_malloc (not_found_cnt * sizeof (*tgt->array)); + splay_tree_node array = tgt->array; + + for (i = 0; i < mapnum; i++) + if (tgt->list[i] == NULL) + { + splay_tree_key k = &array->key; + k->host_start = (uintptr_t) hostaddrs[i]; + if ((kinds[i] & 7) != 4) + k->host_end = k->host_start + sizes[i]; + else + k->host_end = k->host_start + sizeof (void *); + splay_tree_key n + = splay_tree_lookup (&dev_splay_tree, k); + if (n) + { + tgt->list[i] = n; + gomp_map_vars_existing (n, k, kinds[i]); + } + else + { + size_t align = (size_t) 1 << (kinds[i] >> 3); + tgt->list[i] = k; + tgt_size = (tgt_size + align - 1) & ~(align - 1); + k->tgt = tgt; + k->tgt_offset = tgt_size; + tgt_size += k->host_end - k->host_start; + if ((kinds[i] & 7) == 2 || (kinds[i] & 7) == 3) + k->copy_from = true; + k->refcount = 1; + tgt->refcount++; + array->left = NULL; + array->right = NULL; + splay_tree_insert (&dev_splay_tree, array); + switch (kinds[i] & 7) + { + case 0: /* ALLOC */ + case 2: /* FROM */ + break; + case 1: /* TO */ + case 3: /* TOFROM */ + /* FIXME: This is supposed to be copy from host to device + memory. Perhaps add some smarts, like if copying + several adjacent fields from host to target, use some + host buffer to avoid sending each var individually. */ + memcpy ((void *) (tgt->tgt_start + k->tgt_offset), + (void *) k->host_start, + k->host_end - k->host_start); + break; + case 4: /* POINTER */ + cur_node.host_start + = (uintptr_t) *(void **) k->host_start; + /* Add bias to the pointer value. */ + cur_node.host_start += sizes[i]; + cur_node.host_end = cur_node.host_start + 1; + n = splay_tree_lookup (&dev_splay_tree, &cur_node); + if (n == NULL) + { + /* Could be possibly zero size array section. */ + cur_node.host_end--; + n = splay_tree_lookup (&dev_splay_tree, &cur_node); + if (n == NULL) + { + cur_node.host_start--; + n = splay_tree_lookup (&dev_splay_tree, &cur_node); + cur_node.host_start++; + } + } + if (n == NULL) + gomp_fatal ("Pointer target of array section " + "wasn't mapped"); + cur_node.host_start -= n->host_start; + cur_node.tgt_offset = n->tgt->tgt_start + n->tgt_offset + + cur_node.host_start; + /* At this point tgt_offset is target address of the + array section. Now subtract bias to get what we want + to initialize the pointer with. */ + cur_node.tgt_offset -= sizes[i]; + /* FIXME: host to device copy, see above FIXME comment. */ + memcpy ((void *) (tgt->tgt_start + k->tgt_offset), + (void *) &cur_node.tgt_offset, + sizeof (void *)); + break; + } + array++; + } + } + } + if (is_target) + { + for (i = 0; i < mapnum; i++) + { + cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start + + tgt->list[i]->tgt_offset; + /* FIXME: host to device copy, see above FIXME comment. */ + memcpy ((void *) (tgt->tgt_start + i * sizeof (void *)), + (void *) &cur_node.tgt_offset, + sizeof (void *)); + } + } + + gomp_mutex_unlock (&dev_env_lock); + return tgt; +} + +static void +gomp_unmap_tgt (struct target_mem_desc *tgt) +{ + /* FIXME: Deallocate on target the tgt->tgt_start .. tgt->tgt_end + region. */ + if (tgt->tgt_end) + free (tgt->to_free); + + free (tgt->array); + free (tgt); +} + +static void +gomp_unmap_vars (struct target_mem_desc *tgt) +{ + if (tgt->list_count == 0) + { + free (tgt); + return; + } + + size_t i; + gomp_mutex_lock (&dev_env_lock); + for (i = 0; i < tgt->list_count; i++) + if (tgt->list[i]->refcount > 1) + tgt->list[i]->refcount--; + else + { + splay_tree_key k = tgt->list[i]; + if (k->copy_from) + /* FIXME: device to host copy. */ + memcpy ((void *) k->host_start, + (void *) (k->tgt->tgt_start + k->tgt_offset), + k->host_end - k->host_start); + splay_tree_remove (&dev_splay_tree, k); + if (k->tgt->refcount > 1) + k->tgt->refcount--; + else + gomp_unmap_tgt (k->tgt); + } + + if (tgt->refcount > 1) + tgt->refcount--; + else + gomp_unmap_tgt (tgt); + gomp_mutex_unlock (&dev_env_lock); +} + +static void +gomp_update (size_t mapnum, void **hostaddrs, size_t *sizes, + unsigned char *kinds) +{ + size_t i; + struct splay_tree_key_s cur_node; + + if (mapnum == 0) + return; + + gomp_mutex_lock (&dev_env_lock); + for (i = 0; i < mapnum; i++) + if (sizes[i]) + { + cur_node.host_start = (uintptr_t) hostaddrs[i]; + cur_node.host_end = cur_node.host_start + sizes[i]; + splay_tree_key n = splay_tree_lookup (&dev_splay_tree, &cur_node); + if (n) + { + if (n->host_start > cur_node.host_start + || n->host_end < cur_node.host_end) + gomp_fatal ("Trying to update [%p..%p) object when" + "only [%p..%p) is mapped", + (void *) cur_node.host_start, + (void *) cur_node.host_end, + (void *) n->host_start, + (void *) n->host_end); + if ((kinds[i] & 7) == 1) + /* FIXME: host to device copy. */ + memcpy ((void *) (n->tgt->tgt_start + n->tgt_offset + + cur_node.host_start - n->host_start), + (void *) cur_node.host_start, + cur_node.host_end - cur_node.host_start); + else if ((kinds[i] & 7) == 2) + /* FIXME: device to host copy. */ + memcpy ((void *) cur_node.host_start, + (void *) (n->tgt->tgt_start + n->tgt_offset + + cur_node.host_start - n->host_start), + cur_node.host_end - cur_node.host_start); + } + else + gomp_fatal ("Trying to update [%p..%p) object that is not mapped", + (void *) cur_node.host_start, + (void *) cur_node.host_end); + } + gomp_mutex_unlock (&dev_env_lock); +} + /* Called when encountering a target directive. If DEVICE is -1, it means use device-var ICV. If it is -2 (or any other value larger than last available hw device, use host fallback. @@ -49,32 +417,77 @@ GOMP_target (int device, void (*fn) (voi size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) { - if (resolve_device (device) == -1) + device = resolve_device (device); + if (device == -1) { + /* Host fallback. */ fn (hostaddrs); return; } + if (device == 257) + { + struct target_mem_desc *tgt + = gomp_map_vars (mapnum, hostaddrs, sizes, kinds, true); + fn ((void *) tgt->tgt_start); + gomp_unmap_vars (tgt); + } } void GOMP_target_data (int device, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) { - if (resolve_device (device) == -1) - return; + device = resolve_device (device); + if (device == -1) + { + /* Host fallback. */ + struct gomp_task_icv *icv = gomp_icv (false); + if (icv->target_data) + { + /* Even when doing a host fallback, if there are any active + #pragma omp target data constructs, need to remember the + new #pragma omp target data, otherwise GOMP_target_end_data + would get out of sync. */ + struct target_mem_desc *tgt + = gomp_map_vars (0, NULL, NULL, NULL, false); + tgt->prev = icv->target_data; + icv->target_data = tgt; + } + return; + } + + if (device == 257) + { + struct target_mem_desc *tgt + = gomp_map_vars (mapnum, hostaddrs, sizes, kinds, false); + struct gomp_task_icv *icv = gomp_icv (true); + tgt->prev = icv->target_data; + icv->target_data = tgt; + } } void GOMP_target_end_data (void) { + struct gomp_task_icv *icv = gomp_icv (false); + if (icv->target_data) + { + struct target_mem_desc *tgt = icv->target_data; + icv->target_data = tgt->prev; + gomp_unmap_vars (tgt); + } } void GOMP_target_update (int device, size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) { - if (resolve_device (device) == -1) + device = resolve_device (device); + if (device == -1) return; + + if (device == 257) + gomp_update (mapnum, hostaddrs, sizes, kinds); } void --- libgomp/libgomp.h.jj 2013-09-09 17:41:02.388429108 +0200 +++ libgomp/libgomp.h 2013-09-13 12:19:13.489052710 +0200 @@ -214,18 +214,23 @@ struct gomp_team_state unsigned long static_trip; }; -/* These are the OpenMP 3.0 Internal Control Variables described in +struct target_mem_desc; + +/* These are the OpenMP 4.0 Internal Control Variables described in section 2.3.1. Those described as having one copy per task are stored within the structure; those described as having one copy for the whole program are (naturally) global variables. */ - + struct gomp_task_icv { unsigned long nthreads_var; enum gomp_schedule_type run_sched_var; int run_sched_modifier; + int default_device_var; bool dyn_var; bool nest_var; + /* Internal ICV. */ + struct target_mem_desc *target_data; }; extern struct gomp_task_icv gomp_global_icv; @@ -496,6 +501,10 @@ extern void gomp_team_start (void (*) (v struct gomp_team *); extern void gomp_team_end (void); +/* target.c */ + +extern int gomp_get_num_devices (void); + /* work.c */ extern void gomp_init_work_share (struct gomp_work_share *, bool, unsigned); --- libgomp/env.c.jj 2013-09-09 17:41:02.335429381 +0200 +++ libgomp/env.c 2013-09-12 17:39:42.435446713 +0200 @@ -56,6 +56,7 @@ struct gomp_task_icv gomp_global_icv = { .nthreads_var = 1, .run_sched_var = GFS_DYNAMIC, .run_sched_modifier = 1, + .default_device_var = 0, .dyn_var = false, .nest_var = false }; @@ -188,6 +189,24 @@ parse_unsigned_long (const char *name, u return false; } +/* Parse a positive int environment variable. Return true if one was + present and it was successfully parsed. */ + +static bool +parse_int (const char *name, int *pvalue, bool allow_zero) +{ + unsigned long value; + if (!parse_unsigned_long (name, &value, allow_zero)) + return false; + if (value > INT_MAX) + { + gomp_error ("Invalid value for environment variable %s", name); + return false; + } + *pvalue = (int) value; + return true; +} + /* Parse an unsigned long list environment variable. Return true if one was present and it was successfully parsed. */ @@ -658,8 +677,9 @@ handle_omp_display_env (bool proc_bind, /* FIXME: Unimplemented OpenMP 4.0 environment variables. fprintf (stderr, " OMP_PLACES = ''\n"); - fprintf (stderr, " OMP_CANCELLATION = ''\n"); - fprintf (stderr, " OMP_DEFAULT_DEVICE = ''\n"); */ + fprintf (stderr, " OMP_CANCELLATION = ''\n"); */ + fprintf (stderr, " OMP_DEFAULT_DEVICE = '%d'\n", + gomp_global_icv.default_device_var); if (verbose) { @@ -699,6 +719,7 @@ initialize_env (void) parse_boolean ("OMP_DYNAMIC", &gomp_global_icv.dyn_var); parse_boolean ("OMP_NESTED", &gomp_global_icv.nest_var); parse_boolean ("OMP_PROC_BIND", &bind_var); + parse_int ("OMP_DEFAULT_DEVICE", &gomp_global_icv.default_device_var, true); parse_unsigned_long ("OMP_MAX_ACTIVE_LEVELS", &gomp_max_active_levels_var, true); parse_unsigned_long ("OMP_THREAD_LIMIT", &gomp_thread_limit_var, false); @@ -881,36 +902,41 @@ omp_get_proc_bind (void) void omp_set_default_device (int device_num) { - (void) device_num; + struct gomp_task_icv *icv = gomp_icv (true); + icv->default_device_var = device_num >= 0 ? device_num : 0; } int omp_get_default_device (void) { - return 0; + struct gomp_task_icv *icv = gomp_icv (false); + return icv->default_device_var; } int omp_get_num_devices (void) { - return 0; + return gomp_get_num_devices (); } int omp_get_num_teams (void) { + /* Hardcoded to 1 on host, MIC, HSAIL? Maybe variable on PTX. */ return 1; } int omp_get_team_num (void) { + /* Hardcoded to 0 on host, MIC, HSAIL? Maybe variable on PTX. */ return 0; } int omp_is_initial_device (void) { + /* Hardcoded to 1 on host, should be 0 on MIC, HSAIL, PTX. */ return 1; } Jakub