Hi!

Ping.

On Tue, 23 May 2017 17:31:11 +0200, I wrote:
> On Thu, 29 Oct 2015 17:22:46 -0700, Nathan Sidwell <nat...@acm.org> wrote:
> > acc_on_device and it's builtin had a conflict.  The function formally takes 
> > an 
> > enum argument, but the builtin takes an int -- primarily to avoid the 
> > compiler 
> > having to generate the enum  type internally.
> > 
> > This works fine for C,  where the external declaration of the function (in 
> > openacc.h) matches up with the builtin, and we optimize the builtin as 
> > expected.
> > 
> > It fails for C++ where the builtin doesn't match the declaration in the 
> > header. 
> >   We end up with emitting a call to acc_on_device,  which is resolved by 
> > libgomp.  Unfortunately that means we fail to optimize.  [...]
> 
> > [Nathan's trunk r229562] leaves things unchanged for C --  declare a 
> > function with an enum arg. 
> >   But for C++ we the extern "C" declaration takes an int -- and therefore 
> > matches the builtin.  We insert an inline wrapper that takes an enum 
> > argument. 
> > Because of C++'s overload resolution both the wrapper and the int-taking 
> > declaration can have the same source name.
> 
> > --- libgomp/openacc.h       (revision 229535)
> > +++ libgomp/openacc.h       (working copy)
> 
> > -int acc_on_device (acc_device_t) __GOACC_NOTHROW;
> > +#ifdef __cplusplus
> > +int acc_on_device (int __arg) __GOACC_NOTHROW;
> > +#else
> > +int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW;
> > +#endif
> 
> >  #ifdef __cplusplus
> >  }
> > +
> > +/* Forwarding function with correctly typed arg.  */
> > +
> > +inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW
> > +{
> > +  return acc_on_device ((int) __arg);
> > +}
> >  #endif
> 
> > --- libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c     
> > (revision 0)
> > +++ libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c     
> > (working copy)
> > @@ -0,0 +1,12 @@
> > +/* { dg-do compile } */
> > +/* { dg-additional-options "-O2" } */
> > +
> > +#include <openacc.h>
> > +
> > +int Foo (acc_device_t x)
> > +{
> > +  return acc_on_device (x);
> > +}
> > +
> > +/* { dg-final { scan-assembler-not "acc_on_device" } } */
> 
> As a user, I'd expect that when compiling such code with "-O0" instead of
> "-O2", but adding "__attribute__ ((optimize ("O2")))" to "Foo", that I'd
> then get "acc_on_device" expanded as a builtin, and no calls to the
> "acc_on_device library function.  In C++ that is currently not working,
> because the "Forwarding function with correctly typed arg" (cited above)
> doesn't "inherit" that "optimize" attribute.  Making that one "always
> inline" resolves the problem.  Also I cleaned up and extended testing
> some more.  OK for trunk?
> 
> commit 9cc3a384c17e9f692f7864c604d2e2f9fbf0bac9
> Author: Thomas Schwinge <tho...@codesourcery.com>
> Date:   Tue May 23 13:21:14 2017 +0200
> 
>     Make the OpenACC C++ acc_on_device wrapper "always inline"
>     
>             libgomp/
>             * openacc.h [__cplusplus] (acc_on_device): Mark as "always
>             inline".
>             * testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c: Remove
>             file; test cases already present...
>             * testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c: ... in
>             this file.  Update.
>             * testsuite/libgomp.oacc-c-c++-common/acc-on-device.c: Remove
>             file; test cases now present...
>             * testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c: ... in
>             this new file.
>             * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Update.
> ---
>  libgomp/openacc.h                                  |  3 +-
>  .../libgomp.oacc-c-c++-common/acc-on-device-2.c    | 22 -------------
>  .../libgomp.oacc-c-c++-common/acc-on-device.c      | 12 -------
>  .../libgomp.oacc-c-c++-common/acc_on_device-1.c    | 38 
> +++++++++++++---------
>  .../libgomp.oacc-c-c++-common/acc_on_device-2.c    | 21 ++++++++++++
>  .../libgomp.oacc-c-c++-common/parallel-dims.c      | 14 ++++----
>  6 files changed, 52 insertions(+), 58 deletions(-)
> 
> diff --git libgomp/openacc.h libgomp/openacc.h
> index 137e2c1..266f559 100644
> --- libgomp/openacc.h
> +++ libgomp/openacc.h
> @@ -121,7 +121,8 @@ int acc_set_cuda_stream (int, void *) __GOACC_NOTHROW;
>  /* Forwarding function with correctly typed arg.  */
>  
>  #pragma acc routine seq
> -inline int acc_on_device (acc_device_t __arg) __GOACC_NOTHROW
> +inline __attribute__ ((__always_inline__)) int
> +acc_on_device (acc_device_t __arg) __GOACC_NOTHROW
>  {
>    return acc_on_device ((int) __arg);
>  }
> diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c 
> libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c
> deleted file mode 100644
> index bfcb67d..0000000
> --- libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device-2.c
> +++ /dev/null
> @@ -1,22 +0,0 @@
> -/* Test the acc_on_device library function. */
> -/* { dg-additional-options "-fno-builtin-acc_on_device" } */
> -
> -#include <openacc.h>
> -
> -int main ()
> -{
> -  int dev;
> -  
> -#pragma acc parallel copyout (dev)
> -  {
> -    dev = acc_on_device (acc_device_not_host);
> -  }
> -
> -  int expect = 1;
> -  
> -#if  ACC_DEVICE_TYPE_host
> -  expect = 0;
> -#endif
> -  
> -  return dev != expect;
> -}
> diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c 
> libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c
> deleted file mode 100644
> index e0d8710..0000000
> --- libgomp/testsuite/libgomp.oacc-c-c++-common/acc-on-device.c
> +++ /dev/null
> @@ -1,12 +0,0 @@
> -/* { dg-do compile } */
> -/* We don't expect this to work with optimizations disabled.
> -   { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */
> -
> -#include <openacc.h>
> -
> -int Foo (acc_device_t x)
> -{
> -  return acc_on_device (x);
> -}
> -
> -/* { dg-final { scan-assembler-not "acc_on_device" } } */
> diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c 
> libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c
> index 8112745..eb962e4 100644
> --- libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-1.c
> @@ -1,6 +1,9 @@
>  /* Disable the acc_on_device builtin; we want to test the libgomp library
>     function.  */
> +/* { dg-additional-options "-DACC_ON_DEVICE=acc_on_device" } */
>  /* { dg-additional-options "-fno-builtin-acc_on_device" } */
> +/* { dg-additional-options "-fdump-rtl-expand" }
> +   { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]* acc_on_device" 12 
> "expand" } } */
>  
>  #include <stdlib.h>
>  #include <openacc.h>
> @@ -11,13 +14,13 @@ main (int argc, char *argv[])
>    /* Host.  */
>  
>    {
> -    if (!acc_on_device (acc_device_none))
> +    if (!ACC_ON_DEVICE (acc_device_none))
>        abort ();
> -    if (!acc_on_device (acc_device_host))
> +    if (!ACC_ON_DEVICE (acc_device_host))
>        abort ();
> -    if (acc_on_device (acc_device_not_host))
> +    if (ACC_ON_DEVICE (acc_device_not_host))
>        abort ();
> -    if (acc_on_device (acc_device_nvidia))
> +    if (ACC_ON_DEVICE (acc_device_nvidia))
>        abort ();
>    }
>  
> @@ -26,39 +29,44 @@ main (int argc, char *argv[])
>  
>  #pragma acc parallel if(0)
>    {
> -    if (!acc_on_device (acc_device_none))
> +    if (!ACC_ON_DEVICE (acc_device_none))
>        abort ();
> -    if (!acc_on_device (acc_device_host))
> +    if (!ACC_ON_DEVICE (acc_device_host))
>        abort ();
> -    if (acc_on_device (acc_device_not_host))
> +    if (ACC_ON_DEVICE (acc_device_not_host))
>        abort ();
> -    if (acc_on_device (acc_device_nvidia))
> +    if (ACC_ON_DEVICE (acc_device_nvidia))
>        abort ();
>    }
>  
>  
> -#if !ACC_DEVICE_TYPE_host
> +  int on_host_p;
> +#if ACC_DEVICE_TYPE_host
> +  on_host_p = 1;
> +#else
> +  on_host_p = 0;
> +#endif
>  
>    /* Offloaded.  */
>  
>  #pragma acc parallel
>    {
> -    if (acc_on_device (acc_device_none))
> +    if (on_host_p != ACC_ON_DEVICE (acc_device_none))
>        abort ();
> -    if (acc_on_device (acc_device_host))
> +    if (on_host_p != ACC_ON_DEVICE (acc_device_host))
>        abort ();
> -    if (!acc_on_device (acc_device_not_host))
> +    if (on_host_p == ACC_ON_DEVICE (acc_device_not_host))
>        abort ();
> +
>  #if ACC_DEVICE_TYPE_nvidia
> -    if (!acc_on_device (acc_device_nvidia))
> +    if (!ACC_ON_DEVICE (acc_device_nvidia))
>        abort ();
>  #else
> -    if (acc_on_device (acc_device_nvidia))
> +    if (ACC_ON_DEVICE (acc_device_nvidia))
>        abort ();
>  #endif
>    }
>  
> -#endif
>  
>    return 0;
>  }
> diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c 
> libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c
> new file mode 100644
> index 0000000..c3b3378
> --- /dev/null
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/acc_on_device-2.c
> @@ -0,0 +1,21 @@
> +/* With the acc_on_device builtin enabled, we don't expect any calls to the
> +   libgomp library function.  */
> +/* { dg-additional-options "-fdump-rtl-expand" }
> +   { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]* acc_on_device" 0 
> "expand" } } */
> +
> +#include <openacc.h>
> +
> +#ifdef __OPTIMIZE__
> +# define ACC_ON_DEVICE acc_on_device
> +#else
> +/* Without optimizations enabled, we're not expecting the acc_on_device 
> builtin
> +   to be used, so use here a "-O2" wrapper.  */
> +#pragma acc routine seq
> +static int __attribute__ ((optimize ("O2")))
> +ACC_ON_DEVICE (acc_device_t arg)
> +{
> +  return acc_on_device (arg);
> +}
> +#endif
> +
> +#include "acc_on_device-1.c"
> diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c 
> libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
> index 8308f7c..1c48ab3 100644
> --- libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
> +++ libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
> @@ -4,14 +4,12 @@
>  #include <limits.h>
>  #include <openacc.h>
>  
> -/* TODO: "(int) acc_device_*" casts because of the C++ acc_on_device wrapper
> -   not behaving as expected for -O0.  */
>  #pragma acc routine seq
>  static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
>  {
> -  if (acc_on_device ((int) acc_device_host))
> +  if (acc_on_device (acc_device_host))
>      return 0;
> -  else if (acc_on_device ((int) acc_device_nvidia))
> +  else if (acc_on_device (acc_device_nvidia))
>      {
>        unsigned int r;
>        asm volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (r));
> @@ -24,9 +22,9 @@ static unsigned int __attribute__ ((optimize ("O2"))) 
> acc_gang ()
>  #pragma acc routine seq
>  static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
>  {
> -  if (acc_on_device ((int) acc_device_host))
> +  if (acc_on_device (acc_device_host))
>      return 0;
> -  else if (acc_on_device ((int) acc_device_nvidia))
> +  else if (acc_on_device (acc_device_nvidia))
>      {
>        unsigned int r;
>        asm volatile ("mov.u32 %0,%%tid.y;" : "=r" (r));
> @@ -39,9 +37,9 @@ static unsigned int __attribute__ ((optimize ("O2"))) 
> acc_worker ()
>  #pragma acc routine seq
>  static unsigned int __attribute__ ((optimize ("O2"))) acc_vector ()
>  {
> -  if (acc_on_device ((int) acc_device_host))
> +  if (acc_on_device (acc_device_host))
>      return 0;
> -  else if (acc_on_device ((int) acc_device_nvidia))
> +  else if (acc_on_device (acc_device_nvidia))
>      {
>        unsigned int r;
>        asm volatile ("mov.u32 %0,%%tid.x;" : "=r" (r));


Grüße
 Thomas

Reply via email to