Hi! On 2017-01-13T19:11:23+0100, Jakub Jelinek <ja...@redhat.com> wrote: > [...] If the nvptx libgomp plugin is installed, but libcuda.so.1 > can't be found, then the plugin behaves as if there are no PTX devices > available. [...]
ACK. > --- libgomp/plugin/plugin-nvptx.c.jj 2017-01-13 12:07:56.000000000 +0100 > +++ libgomp/plugin/plugin-nvptx.c 2017-01-13 18:00:39.693284346 +0100 > +/* -1 if init_cuda_lib has not been called yet, false > + if it has been and failed, true if it has been and succeeded. */ > +static char cuda_lib_inited = -1; > > - return desc; > +/* Dynamically load the CUDA runtime library and initialize function > + pointers, return false if unsuccessful, true if successful. */ > +static bool > +init_cuda_lib (void) > +{ > + if (cuda_lib_inited != -1) > + return cuda_lib_inited; > + const char *cuda_runtime_lib = "libcuda.so.1"; > + void *h = dlopen (cuda_runtime_lib, RTLD_LAZY); > + cuda_lib_inited = false; > + if (h == NULL) > + return false; ..., so this has to stay. > +# undef CUDA_ONE_CALL > +# define CUDA_ONE_CALL(call) CUDA_ONE_CALL_1 (call) > +# define CUDA_ONE_CALL_1(call) \ > + cuda_lib.call = dlsym (h, #call); \ > + if (cuda_lib.call == NULL) \ > + return false; However, this (missing symbol) I'd like to make a fatal error, instead of silently disabling the plugin/device. OK to push the attached "GCN, nvptx: Fatal error for missing symbols in 'libhsa-runtime64.so.1', 'libcuda.so.1'"? > + [...] > + cuda_lib_inited = true; > + return true; > } Grüße Thomas
>From 6a6520e01f7e7118b556683c2934f2c64c6dbc81 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge <tschwi...@baylibre.com> Date: Thu, 7 Mar 2024 12:31:52 +0100 Subject: [PATCH] GCN, nvptx: Fatal error for missing symbols in 'libhsa-runtime64.so.1', 'libcuda.so.1' If 'libhsa-runtime64.so.1', 'libcuda.so.1' are not available, the corresponding libgomp plugin/device gets disabled, as before. But if they are available, report any inconsistencies such as missing symbols, similar to how we fail in presence of other issues during device initialization. libgomp/ * plugin/plugin-gcn.c (init_hsa_runtime_functions): Fatal error for missing symbols. * plugin/plugin-nvptx.c (init_cuda_lib): Likewise. --- libgomp/plugin/plugin-gcn.c | 3 ++- libgomp/plugin/plugin-nvptx.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index 464164afb03..338225db6f4 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -1382,9 +1382,10 @@ init_hsa_runtime_functions (void) #define DLSYM_FN(function) \ hsa_fns.function##_fn = dlsym (handle, #function); \ if (hsa_fns.function##_fn == NULL) \ - return false; + GOMP_PLUGIN_fatal ("'%s' is missing '%s'", hsa_runtime_lib, #function); #define DLSYM_OPT_FN(function) \ hsa_fns.function##_fn = dlsym (handle, #function); + void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY); if (handle == NULL) return false; diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 3fd6cd42fa6..ffb1db67d20 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -127,7 +127,7 @@ init_cuda_lib (void) # define CUDA_ONE_CALL_1(call, allow_null) \ cuda_lib.call = dlsym (h, #call); \ if (!allow_null && cuda_lib.call == NULL) \ - return false; + GOMP_PLUGIN_fatal ("'%s' is missing '%s'", cuda_runtime_lib, #call); #include "cuda-lib.def" # undef CUDA_ONE_CALL # undef CUDA_ONE_CALL_1 -- 2.34.1