Hi Tobias!

On 2024-09-19T19:11:32+0200, Tobias Burnus <tbur...@baylibre.com> wrote:
> OpenMP: Add get_device_from_uid/omp_get_uid_from_device routines

'[omp_]get_device_from_uid'.

> Those TR13/OpenMP 6.0 routines permit a reproducible offloading to
> a specific device by mapping an OpenMP device number to a
> unique ID (UID). The GPU device UIDs should be universally unique,
> the one for the host is not.

> --- a/gcc/omp-general.cc
> +++ b/gcc/omp-general.cc
> @@ -3260,6 +3260,7 @@ omp_runtime_api_procname (const char *name)
>        "alloc",
>        "calloc",
>        "free",
> +      "get_device_from_uid",
>        "get_interop_int",
>        "get_interop_ptr",
>        "get_mapped_ptr",
> @@ -3338,12 +3339,13 @@ omp_runtime_api_procname (const char *name)
>        as DECL_NAME only omp_* and omp_*_8 appear.  */
>        "display_env",
>        "get_ancestor_thread_num",
> -      "init_allocator",
> +      "omp_get_uid_from_device",
>        "get_partition_place_nums",
>        "get_place_num_procs",
>        "get_place_proc_ids",
>        "get_schedule",
>        "get_team_size",
> +      "init_allocator",
>        "set_default_device",
>        "set_dynamic",
>        "set_max_active_levels",

..., but here without 'omp_' prefix: 'get_uid_from_device' (and properly
sorted).

Do we apparently not have test suite coverage for these things?

> --- a/libgomp/libgomp.h
> +++ b/libgomp/libgomp.h
> @@ -1387,6 +1387,7 @@ struct gomp_device_descr
>  
>    /* The name of the device.  */
>    const char *name;
> +  const char *uid;

Caching this here, instead of acquiring via 'GOMP_OFFLOAD_get_uid' for
each call, is a minor performance optimization?  (Similar to other items
cached here, I guess.)

> @@ -1399,6 +1400,7 @@ struct gomp_device_descr
>  
>    /* Function handlers.  */
>    __typeof (GOMP_OFFLOAD_get_name) *get_name_func;
> +  __typeof (GOMP_OFFLOAD_get_uid) *get_uid_func;
>    __typeof (GOMP_OFFLOAD_get_caps) *get_caps_func;
>    __typeof (GOMP_OFFLOAD_get_type) *get_type_func;
>    __typeof (GOMP_OFFLOAD_get_num_devices) *get_num_devices_func;

Please also update 'libgomp/oacc-host.c:host_dispatch'.

> --- a/libgomp/omp_lib.f90.in
> +++ b/libgomp/omp_lib.f90.in

> +        interface
> +          ! Note: In gfortran, strings are \0 termined
> +          integer(c_int) function omp_get_device_from_uid(uid) bind(C)
> +            use iso_c_binding
> +            character(c_char), intent(in) :: uid(*)
> +          end function omp_get_device_from_uid
> +        end interface

For my understanding: in general, Fortran strings are *not*
NUL-terminated, right?  So this is a specific properly of 'gfortran'
and/or this GCC/OpenMP interface, that it requires passing in a
NUL-terminated string?  (..., so that you're permitted to simply
'bind(C)' to the C 'omp_get_device_from_uid'?)

> +        interface omp_get_uid_from_device
> +          ! Deviation from OpenMP 6.0: VALUE added.

(..., which I suppose you've reported to OpenMP...)

> +          character(:) function omp_get_uid_from_device (device_num)
> +            use iso_c_binding
> +            pointer :: omp_get_uid_from_device
> +            integer(c_int32_t), intent(in), value :: device_num
> +          end function omp_get_uid_from_device
> +
> +          character(:) function omp_get_uid_from_device_8 (device_num)
> +            use iso_c_binding
> +            pointer :: omp_get_uid_from_device_8
> +            integer(c_int64_t), intent(in), value :: device_num
> +          end function omp_get_uid_from_device_8
> +        end interface omp_get_uid_from_device

> --- a/libgomp/omp_lib.h.in
> +++ b/libgomp/omp_lib.h.in

Likewise.

> --- a/libgomp/plugin/plugin-gcn.c
> +++ b/libgomp/plugin/plugin-gcn.c

> +const char *
> +GOMP_OFFLOAD_get_uid (int ord)
> +{
> +  char *str;
> +  hsa_status_t status;
> +  struct agent_info *agent = get_agent_info (ord);
> +
> +  /* HSA documentation states: maximally 21 characters including NUL.  */
> +  str = GOMP_PLUGIN_malloc (21 * sizeof (char));
> +  status = hsa_fns.hsa_agent_get_info_fn (agent->id, HSA_AMD_AGENT_INFO_UUID,
> +                                       str);
> +  if (status != HSA_STATUS_SUCCESS)
> +    hsa_fatal ("Could not obtain device UUID", status);
> +  return str;
> +}

I guess I'd have just put this code into 'init_hsa_context', filling a
new statically-sized 'uuid' field in 'hsa_context_info' (like
'driver_version_s'; and assuming that 'hsa_context_info' is the right
abstraction for this), and then just return that 'uuid' from
'GOMP_OFFLOAD_get_uid'.  That way, you'd avoid the unclear semantics of
who gets to 'free' the buffer returned from 'GOMP_OFFLOAD_get_uid' upon
'GOMP_OFFLOAD_fini_device' -- currently the memory is lost?

> --- a/libgomp/plugin/plugin-nvptx.c
> +++ b/libgomp/plugin/plugin-nvptx.c

> +const char *
> +GOMP_OFFLOAD_get_uid (int ord)
> +{

Likewise.  ('nvptx_init', new statically-sized 'uuid' field in
'ptx_device', or similar.)

> +  CUresult r;
> +  CUuuid s;
> +  struct ptx_device *dev = ptx_devices[ord];
> +
> +  if (CUDA_CALL_EXISTS (cuDeviceGetUuid_v2))
> +    r = CUDA_CALL_NOCHECK (cuDeviceGetUuid_v2, &s, dev->dev);
> +  else if (CUDA_CALL_EXISTS (cuDeviceGetUuid))
> +    r = CUDA_CALL_NOCHECK (cuDeviceGetUuid, &s, dev->dev);
> +  else
> +    r = CUDA_ERROR_NOT_FOUND;

That might qualify as creative use of 'CUDA_ERROR_NOT_FOUND' (..., which
normally means that a CUDA Driver API routine has not found something,
not that a CUDA Driver API routine has not been found), but OK...

But actually, should we just 'return NULL;' in that case, and let libgomp
proper handle this gracefully, similar to as if 'GOMP_OFFLOAD_get_uid'
isn't implemented at all by a plugin?  (What are the expected OpenMP
semantics?)

> +  if (r != CUDA_SUCCESS)
> +    GOMP_PLUGIN_fatal ("cuDeviceGetUuid error: %s", cuda_error (r));
> +
> +  size_t len = strlen ("GPU-12345678-9abc-defg-hijk-lmniopqrstuv");
> +  char *str = (char *) GOMP_PLUGIN_malloc (len + 1);
> +  sprintf (str,
> +        "GPU-%02x" "%02x" "%02x" "%02x"
> +        "-%02x" "%02x"
> +        "-%02x" "%02x"
> +        "-%02x" "%02x" "%02x" "%02x" "%02x" "%02x" "%02x" "%02x",
> +        (unsigned char) s.bytes[0], (unsigned char) s.bytes[1],
> +        (unsigned char) s.bytes[2], (unsigned char) s.bytes[3],
> +        (unsigned char) s.bytes[4], (unsigned char) s.bytes[5],
> +        (unsigned char) s.bytes[6], (unsigned char) s.bytes[7],
> +        (unsigned char) s.bytes[8], (unsigned char) s.bytes[9],
> +        (unsigned char) s.bytes[10], (unsigned char) s.bytes[11],
> +         (unsigned char) s.bytes[12], (unsigned char) s.bytes[13],
> +        (unsigned char) s.bytes[14], (unsigned char) s.bytes[15]);
> +  return str;
> +}


Grüße
 Thomas

Reply via email to