Hi Tobias!

Earlier today, I happened to be testing current OG14 branch.  On a
multi-Nvidia GPU system, I saw:

    +PASS: libgomp.c/interop-fr-1.c (test for excess errors)
    +FAIL: libgomp.c/interop-fr-1.c execution test

    spawn [open ...]
    Running on the nvptx device (-99)
    Running on the host device (-1)
    Running on the nvptx device (0)
    interop-fr-1.exe: [...]/libgomp.c/interop-fr-1.c:287: check_nvptx: 
Assertion `dev_num == dev' failed.
    FAIL: libgomp.c/interop-fr-1.c execution test

    271        ret_code = omp_irc_no_value;
    272        int vendor = (int) omp_get_interop_int (obj, omp_ipr_vendor, 
&ret_code);
    273        assert (ret_code == omp_irc_success);
    274        assert (vendor == 11);  /* Nvidia */
    275  
    276        ret_code = omp_irc_no_value;
    277        const char *vendor_name = omp_get_interop_str (obj, 
omp_ipr_vendor_name, &ret_code);
    278        assert (ret_code == omp_irc_success);
    279        assert (strcmp (vendor_name, "nvidia") == 0);
    280  
    281        ret_code = omp_irc_no_value;
    282        int dev_num = (int) omp_get_interop_int (obj, 
omp_ipr_device_num, &ret_code);
    283        assert (ret_code == omp_irc_success);
    284        if (dev == DEFAULT_DEVICE)
    285          assert (dev_num == omp_get_default_device ());
    286        else
    287          assert (dev_num == dev);

However -- ..., and assuming that this is the scenario that the following
patch intended to address:

On 2025-03-24T12:33:35+0100, Tobias Burnus <tbur...@baylibre.com> wrote:
> When re-reading the current plugin code, I noticed that 'interop' 
> created the cuStream created in the current CUDA context (current CUDA 
> device) and not on the specified device.
>
> That's obviously the same if there is only a single nvptx device.
>
> The patch mimics what other code in the plugin uses and has been lightly 
> tested so far.
>
> Comments before I push it?

I still see 'libgomp.c/interop-fr-1.c' FAIL in the same way with this
patch applied (to OG14).


Grüße
 Thomas


> libgomp/plugin/plugin-nvptx.c: Fix device used for stream creation
>
> libgomp/ChangeLog:
>
>       * plugin/plugin-nvptx.c (GOMP_OFFLOAD_interop): Set context for
>       stream creation to use the specified device.
>
>  libgomp/plugin/plugin-nvptx.c | 18 ++++++++++++++++--
>  1 file changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
> index 822c6a410e2..a5cf859db19 100644
> --- a/libgomp/plugin/plugin-nvptx.c
> +++ b/libgomp/plugin/plugin-nvptx.c
> @@ -2483,12 +2483,26 @@ GOMP_OFFLOAD_interop (struct interop_obj_t *obj, int 
> ord,
>         break;
>        }
>  
> -  obj->device_data = ptx_devices[ord];
> +  struct ptx_device *ptx_dev = obj->device_data = ptx_devices[ord];
>  
>    if (targetsync)
>      {
>        CUstream stream = NULL;
> -      CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT);
> +      CUdevice cur_ctx_dev;
> +      CUresult res = CUDA_CALL_NOCHECK (cuCtxGetDevice, &cur_ctx_dev);
> +      if (res != CUDA_SUCCESS && res != CUDA_ERROR_INVALID_CONTEXT)
> +     GOMP_PLUGIN_fatal ("cuCtxGetDevice error: %s", cuda_error (res));
> +      if (res != CUDA_ERROR_INVALID_CONTEXT && ptx_dev->dev == cur_ctx_dev)
> +     CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT);
> +      else
> +     {
> +       CUcontext old_ctx;
> +       assert (ptx_dev->ctx);
> +       CUDA_CALL_ASSERT (cuCtxPushCurrent, ptx_dev->ctx);
> +       CUDA_CALL_ASSERT (cuStreamCreate, &stream, CU_STREAM_DEFAULT);
> +       if (res != CUDA_ERROR_INVALID_CONTEXT)
> +         CUDA_CALL_ASSERT (cuCtxPopCurrent, &old_ctx);
> +     }
>        obj->stream = stream;
>      }
>  }

Reply via email to