Hi Tobias! On 2024-09-19T19:11:32+0200, Tobias Burnus <tbur...@baylibre.com> wrote: > OpenMP: Add get_device_from_uid/omp_get_uid_from_device routines
'[omp_]get_device_from_uid'. > Those TR13/OpenMP 6.0 routines permit a reproducible offloading to > a specific device by mapping an OpenMP device number to a > unique ID (UID). The GPU device UIDs should be universally unique, > the one for the host is not. > --- a/gcc/omp-general.cc > +++ b/gcc/omp-general.cc > @@ -3260,6 +3260,7 @@ omp_runtime_api_procname (const char *name) > "alloc", > "calloc", > "free", > + "get_device_from_uid", > "get_interop_int", > "get_interop_ptr", > "get_mapped_ptr", > @@ -3338,12 +3339,13 @@ omp_runtime_api_procname (const char *name) > as DECL_NAME only omp_* and omp_*_8 appear. */ > "display_env", > "get_ancestor_thread_num", > - "init_allocator", > + "omp_get_uid_from_device", > "get_partition_place_nums", > "get_place_num_procs", > "get_place_proc_ids", > "get_schedule", > "get_team_size", > + "init_allocator", > "set_default_device", > "set_dynamic", > "set_max_active_levels", ..., but here without 'omp_' prefix: 'get_uid_from_device' (and properly sorted). Do we apparently not have test suite coverage for these things? > --- a/libgomp/libgomp.h > +++ b/libgomp/libgomp.h > @@ -1387,6 +1387,7 @@ struct gomp_device_descr > > /* The name of the device. */ > const char *name; > + const char *uid; Caching this here, instead of acquiring via 'GOMP_OFFLOAD_get_uid' for each call, is a minor performance optimization? (Similar to other items cached here, I guess.) > @@ -1399,6 +1400,7 @@ struct gomp_device_descr > > /* Function handlers. */ > __typeof (GOMP_OFFLOAD_get_name) *get_name_func; > + __typeof (GOMP_OFFLOAD_get_uid) *get_uid_func; > __typeof (GOMP_OFFLOAD_get_caps) *get_caps_func; > __typeof (GOMP_OFFLOAD_get_type) *get_type_func; > __typeof (GOMP_OFFLOAD_get_num_devices) *get_num_devices_func; Please also update 'libgomp/oacc-host.c:host_dispatch'. > --- a/libgomp/omp_lib.f90.in > +++ b/libgomp/omp_lib.f90.in > + interface > + ! Note: In gfortran, strings are \0 termined > + integer(c_int) function omp_get_device_from_uid(uid) bind(C) > + use iso_c_binding > + character(c_char), intent(in) :: uid(*) > + end function omp_get_device_from_uid > + end interface For my understanding: in general, Fortran strings are *not* NUL-terminated, right? So this is a specific properly of 'gfortran' and/or this GCC/OpenMP interface, that it requires passing in a NUL-terminated string? (..., so that you're permitted to simply 'bind(C)' to the C 'omp_get_device_from_uid'?) > + interface omp_get_uid_from_device > + ! Deviation from OpenMP 6.0: VALUE added. (..., which I suppose you've reported to OpenMP...) > + character(:) function omp_get_uid_from_device (device_num) > + use iso_c_binding > + pointer :: omp_get_uid_from_device > + integer(c_int32_t), intent(in), value :: device_num > + end function omp_get_uid_from_device > + > + character(:) function omp_get_uid_from_device_8 (device_num) > + use iso_c_binding > + pointer :: omp_get_uid_from_device_8 > + integer(c_int64_t), intent(in), value :: device_num > + end function omp_get_uid_from_device_8 > + end interface omp_get_uid_from_device > --- a/libgomp/omp_lib.h.in > +++ b/libgomp/omp_lib.h.in Likewise. > --- a/libgomp/plugin/plugin-gcn.c > +++ b/libgomp/plugin/plugin-gcn.c > +const char * > +GOMP_OFFLOAD_get_uid (int ord) > +{ > + char *str; > + hsa_status_t status; > + struct agent_info *agent = get_agent_info (ord); > + > + /* HSA documentation states: maximally 21 characters including NUL. */ > + str = GOMP_PLUGIN_malloc (21 * sizeof (char)); > + status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AMD_AGENT_INFO_UUID, > + str); > + if (status != HSA_STATUS_SUCCESS) > + hsa_fatal ("Could not obtain device UUID", status); > + return str; > +} I guess I'd have just put this code into 'init_hsa_context', filling a new statically-sized 'uuid' field in 'hsa_context_info' (like 'driver_version_s'; and assuming that 'hsa_context_info' is the right abstraction for this), and then just return that 'uuid' from 'GOMP_OFFLOAD_get_uid'. That way, you'd avoid the unclear semantics of who gets to 'free' the buffer returned from 'GOMP_OFFLOAD_get_uid' upon 'GOMP_OFFLOAD_fini_device' -- currently the memory is lost? > --- a/libgomp/plugin/plugin-nvptx.c > +++ b/libgomp/plugin/plugin-nvptx.c > +const char * > +GOMP_OFFLOAD_get_uid (int ord) > +{ Likewise. ('nvptx_init', new statically-sized 'uuid' field in 'ptx_device', or similar.) > + CUresult r; > + CUuuid s; > + struct ptx_device *dev = ptx_devices[ord]; > + > + if (CUDA_CALL_EXISTS (cuDeviceGetUuid_v2)) > + r = CUDA_CALL_NOCHECK (cuDeviceGetUuid_v2, &s, dev->dev); > + else if (CUDA_CALL_EXISTS (cuDeviceGetUuid)) > + r = CUDA_CALL_NOCHECK (cuDeviceGetUuid, &s, dev->dev); > + else > + r = CUDA_ERROR_NOT_FOUND; That might qualify as creative use of 'CUDA_ERROR_NOT_FOUND' (..., which normally means that a CUDA Driver API routine has not found something, not that a CUDA Driver API routine has not been found), but OK... But actually, should we just 'return NULL;' in that case, and let libgomp proper handle this gracefully, similar to as if 'GOMP_OFFLOAD_get_uid' isn't implemented at all by a plugin? (What are the expected OpenMP semantics?) > + if (r != CUDA_SUCCESS) > + GOMP_PLUGIN_fatal ("cuDeviceGetUuid error: %s", cuda_error (r)); > + > + size_t len = strlen ("GPU-12345678-9abc-defg-hijk-lmniopqrstuv"); > + char *str = (char *) GOMP_PLUGIN_malloc (len + 1); > + sprintf (str, > + "GPU-%02x" "%02x" "%02x" "%02x" > + "-%02x" "%02x" > + "-%02x" "%02x" > + "-%02x" "%02x" "%02x" "%02x" "%02x" "%02x" "%02x" "%02x", > + (unsigned char) s.bytes[0], (unsigned char) s.bytes[1], > + (unsigned char) s.bytes[2], (unsigned char) s.bytes[3], > + (unsigned char) s.bytes[4], (unsigned char) s.bytes[5], > + (unsigned char) s.bytes[6], (unsigned char) s.bytes[7], > + (unsigned char) s.bytes[8], (unsigned char) s.bytes[9], > + (unsigned char) s.bytes[10], (unsigned char) s.bytes[11], > + (unsigned char) s.bytes[12], (unsigned char) s.bytes[13], > + (unsigned char) s.bytes[14], (unsigned char) s.bytes[15]); > + return str; > +} Grüße Thomas