Hi!

On Thu, 12 Dec 2013 10:53:02 +0100, I wrote:
> On Thu, 5 Sep 2013 18:11:05 +0200, Jakub Jelinek <ja...@redhat.com> wrote:
> > 3) I figured out we need to tell the runtime library not just
> > address, size and kind, but also alignment (we won't need that for
> > the #pragma omp declare target global vars though), so that the
> > runtime library can properly align it.  As TYPE_ALIGN/DECL_ALIGN
> > is in bits and is 32 bit wide, when that is in bytes and we only care
> > about power of twos, I've decided to encode it in the upper 5 bits
> > of the kind (lower 3 bits are used for OMP_CLAUSE_MAP_* kind).
> 
> Unfortunately, this scheme breaks down with OpenACC: we need an
> additional bit to codify a flag for present_or_* map clauses (meaning:
> only map the data (allocate/to/from/tofrom, as for OpenMP) if not already
> present on the device).
> 
> With five bits available for the OpenMP case, we can describe alignments
> up to 2 GiB, and I've empirically found on my development system that the
> largest possible alignment is MAX_OFILE_ALIGNMENT, 256 MiB for ELF
> systems, so that's fine.  But with only four bits available, we get to
> describe alignments up to 1 << ((1 << 4) - 1) = 32 KiB, which is too
> small -- even though it'd be fine for "normal" usage of __attribute__
> ((aligned (x))).
> 
> So it seems our options are to use a bigger datatype for the kinds array,
> to split off from the kinds array a new alignments array, or to generally
> switch to using an array of a struct containing hostaddr, size,
> alignment, kind.  The latter would require additional changes in the
> child_fn.
> 
> As it's an ABI change no matter what, would you like to see this limited
> to OpenACC?  Changing it also for OpenMP's GOMP_target would have the
> advantage to have them not diverge (especially at the generating side in
> omp-low.c's lowering functions), but I'm not sure whether such an ABI
> change would easily be possible now, with the OpenMP 4 support merged
> into trunk -- though, it is not yet part of a regular GCC release?

Here is the patch I propose for gomp-4_0-branch; OK?

commit ea56cdbd257b08421fefc8e30fd4a28d37d6e481
Author: Thomas Schwinge <tho...@codesourcery.com>
Date:   Sun Dec 15 11:03:47 2013 +0100

    OpenACC memory mapping interface: Move alignments into its own array.
    
        gcc/
        * builtin-types.def
        (BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR_PTR): New type.
        gcc/fortran/
        * types.def (BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR_PTR): New
        type.
        gcc/
        * oacc-builtins.def (BUILT_IN_GOACC_PARALLEL): Use it.
        * omp-low.c (expand_oacc_parallel, lower_oacc_parallel): Move
        alignments into its own array.
        libgomp/
        * libgomp_g.h (GOACC_parallel): Add alignments array.
        * oacc-parallel.c (GOACC_parallel): Likewise.
        * testsuite/libgomp.oacc-c/goacc_parallel.c (main): Likewise.

diff --git gcc/builtin-types.def gcc/builtin-types.def
index e7bfaf9..59660cd 100644
--- gcc/builtin-types.def
+++ gcc/builtin-types.def
@@ -529,6 +529,9 @@ DEF_FUNCTION_TYPE_8 
(BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR,
                     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
                     BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG,
                     BT_BOOL, BT_UINT, BT_PTR)
+DEF_FUNCTION_TYPE_8 (BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR_PTR,
+                    BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_PTR, BT_SIZE,
+                    BT_PTR, BT_PTR, BT_PTR, BT_PTR)
 
 DEF_FUNCTION_TYPE_VAR_0 (BT_FN_VOID_VAR, BT_VOID)
 DEF_FUNCTION_TYPE_VAR_0 (BT_FN_INT_VAR, BT_INT)
diff --git gcc/fortran/types.def gcc/fortran/types.def
index 9bbee35..9ec752a 100644
--- gcc/fortran/types.def
+++ gcc/fortran/types.def
@@ -213,5 +213,8 @@ DEF_FUNCTION_TYPE_8 
(BT_FN_VOID_OMPFN_PTR_OMPCPYFN_LONG_LONG_BOOL_UINT_PTR,
                     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR,
                     BT_PTR_FN_VOID_PTR_PTR, BT_LONG, BT_LONG,
                     BT_BOOL, BT_UINT, BT_PTR)
+DEF_FUNCTION_TYPE_8 (BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR_PTR,
+                    BT_VOID, BT_INT, BT_PTR_FN_VOID_PTR, BT_PTR, BT_SIZE,
+                    BT_PTR, BT_PTR, BT_PTR, BT_PTR)
 
 DEF_FUNCTION_TYPE_VAR_0 (BT_FN_VOID_VAR, BT_VOID)
diff --git gcc/oacc-builtins.def gcc/oacc-builtins.def
index a75e42d..5057e13 100644
--- gcc/oacc-builtins.def
+++ gcc/oacc-builtins.def
@@ -28,4 +28,5 @@ along with GCC; see the file COPYING3.  If not see
    See builtins.def for details.  */
 
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_PARALLEL, "GOACC_parallel",
-                  BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST)
+                  BT_FN_VOID_INT_OMPFN_PTR_SIZE_PTR_PTR_PTR_PTR,
+                  ATTR_NOTHROW_LIST)
diff --git gcc/omp-low.c gcc/omp-low.c
index e0f7d1d..ce99835 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -4886,7 +4886,7 @@ expand_oacc_parallel (struct omp_region *region)
     }
 
   /* Emit a library call to launch CHILD_FN.  */
-  tree t1, t2, t3, t4, device, c, clauses;
+  tree t1, t2, t3, t4, t5, device, c, clauses;
   enum built_in_function start_ix;
   location_t clause_loc;
 
@@ -4918,6 +4918,7 @@ expand_oacc_parallel (struct omp_region *region)
       t2 = build_zero_cst (ptr_type_node);
       t3 = t2;
       t4 = t2;
+      t5 = t2;
     }
   else
     {
@@ -4926,6 +4927,7 @@ expand_oacc_parallel (struct omp_region *region)
       t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
       t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
       t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
+      t5 = build_fold_addr_expr (TREE_VEC_ELT (t, 3));
     }
 
   gimple g;
@@ -4935,7 +4937,7 @@ expand_oacc_parallel (struct omp_region *region)
   tree openmp_target = build_zero_cst (ptr_type_node);
   tree fnaddr = build_fold_addr_expr (child_fn);
   g = gimple_build_call (builtin_decl_explicit (start_ix),
-                        7, device, fnaddr, openmp_target, t1, t2, t3, t4);
+                        8, device, fnaddr, openmp_target, t1, t2, t3, t4, t5);
   gimple_set_location (g, gimple_location (entry_stmt));
   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
 }
@@ -8766,7 +8768,7 @@ lower_oacc_parallel (gimple_stmt_iterator *gsi_p, 
omp_context *ctx)
        = create_tmp_var (ctx->record_type, ".omp_data_arr");
       DECL_NAMELESS (ctx->sender_decl) = 1;
       TREE_ADDRESSABLE (ctx->sender_decl) = 1;
-      t = make_tree_vec (3);
+      t = make_tree_vec (4);
       TREE_VEC_ELT (t, 0) = ctx->sender_decl;
       TREE_VEC_ELT (t, 1)
        = create_tmp_var (build_array_type_nelts (size_type_node, map_cnt),
@@ -8777,15 +8779,24 @@ lower_oacc_parallel (gimple_stmt_iterator *gsi_p, 
omp_context *ctx)
       TREE_VEC_ELT (t, 2)
        = create_tmp_var (build_array_type_nelts (unsigned_char_type_node,
                                                  map_cnt),
-                         ".omp_data_kinds");
+                         ".omp_data_alignments");
       DECL_NAMELESS (TREE_VEC_ELT (t, 2)) = 1;
       TREE_ADDRESSABLE (TREE_VEC_ELT (t, 2)) = 1;
       TREE_STATIC (TREE_VEC_ELT (t, 2)) = 1;
+      TREE_VEC_ELT (t, 3)
+       = create_tmp_var (build_array_type_nelts (unsigned_char_type_node,
+                                                 map_cnt),
+                         ".omp_data_kinds");
+      DECL_NAMELESS (TREE_VEC_ELT (t, 3)) = 1;
+      TREE_ADDRESSABLE (TREE_VEC_ELT (t, 3)) = 1;
+      TREE_STATIC (TREE_VEC_ELT (t, 3)) = 1;
       gimple_oacc_parallel_set_data_arg (stmt, t);
 
       vec<constructor_elt, va_gc> *vsize;
+      vec<constructor_elt, va_gc> *valign;
       vec<constructor_elt, va_gc> *vkind;
       vec_alloc (vsize, map_cnt);
+      vec_alloc (valign, map_cnt);
       vec_alloc (vkind, map_cnt);
       unsigned int map_idx = 0;
 
@@ -8884,6 +8895,14 @@ lower_oacc_parallel (gimple_stmt_iterator *gsi_p, 
omp_context *ctx)
            if (TREE_CODE (s) != INTEGER_CST)
              TREE_STATIC (TREE_VEC_ELT (t, 1)) = 0;
 
+           unsigned int talign = TYPE_ALIGN_UNIT (TREE_TYPE (ovar));
+           if (DECL_P (ovar) && DECL_ALIGN_UNIT (ovar) > talign)
+             talign = DECL_ALIGN_UNIT (ovar);
+           talign = ceil_log2 (talign);
+           CONSTRUCTOR_APPEND_ELT (valign, purpose,
+                                   build_int_cst (unsigned_char_type_node,
+                                                  talign));
+
            unsigned char tkind = 0;
            switch (OMP_CLAUSE_CODE (c))
              {
@@ -8899,14 +8918,10 @@ lower_oacc_parallel (gimple_stmt_iterator *gsi_p, 
omp_context *ctx)
              default:
                gcc_unreachable ();
              }
-           unsigned int talign = TYPE_ALIGN_UNIT (TREE_TYPE (ovar));
-           if (DECL_P (ovar) && DECL_ALIGN_UNIT (ovar) > talign)
-             talign = DECL_ALIGN_UNIT (ovar);
-           talign = ceil_log2 (talign);
-           tkind |= talign << 3;
            CONSTRUCTOR_APPEND_ELT (vkind, purpose,
                                    build_int_cst (unsigned_char_type_node,
                                                   tkind));
+
            if (nc && nc != c)
              c = nc;
          }
@@ -8916,7 +8931,9 @@ lower_oacc_parallel (gimple_stmt_iterator *gsi_p, 
omp_context *ctx)
       DECL_INITIAL (TREE_VEC_ELT (t, 1))
        = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, 1)), vsize);
       DECL_INITIAL (TREE_VEC_ELT (t, 2))
-       = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, 2)), vkind);
+       = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, 2)), valign);
+      DECL_INITIAL (TREE_VEC_ELT (t, 3))
+       = build_constructor (TREE_TYPE (TREE_VEC_ELT (t, 3)), vkind);
       if (!TREE_STATIC (TREE_VEC_ELT (t, 1)))
        {
          gimple_seq initlist = NULL;
diff --git libgomp/libgomp_g.h libgomp/libgomp_g.h
index 394f3a8..06d7750 100644
--- libgomp/libgomp_g.h
+++ libgomp/libgomp_g.h
@@ -217,6 +217,7 @@ extern void GOMP_teams (unsigned int, unsigned int);
 /* oacc-parallel.c */
 
 extern void GOACC_parallel (int, void (*) (void *), const void *,
-                           size_t, void **, size_t *, unsigned char *);
+                           size_t, void **, size_t *, unsigned char *,
+                           unsigned char *);
 
 #endif /* LIBGOMP_G_H */
diff --git libgomp/oacc-parallel.c libgomp/oacc-parallel.c
index 730b83b..6cc04e1 100644
--- libgomp/oacc-parallel.c
+++ libgomp/oacc-parallel.c
@@ -25,12 +25,24 @@
 
 /* This file handles the OpenACC parallel construct.  */
 
+#include "libgomp.h"
 #include "libgomp_g.h"
 
 void
 GOACC_parallel (int device, void (*fn) (void *), const void *openmp_target,
                size_t mapnum, void **hostaddrs, size_t *sizes,
-               unsigned char *kinds)
+               unsigned char *alignments, unsigned char *kinds)
 {
+  size_t i;
+
+  for (i = 0; i < mapnum; ++i)
+    {
+      if (kinds[i] > 4)
+       gomp_fatal ("memory mapping kind %x for %zd is not yet supported",
+                   kinds[i], i);
+
+      kinds[i] |= alignments[i] << 3;
+    }
+
   GOMP_target (device, fn, openmp_target, mapnum, hostaddrs, sizes, kinds);
 }
diff --git libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c 
libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c
index b9bdffa..142c394 100644
--- libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c
+++ libgomp/testsuite/libgomp.oacc-c/goacc_parallel.c
@@ -17,7 +17,8 @@ f (void *data)
 int main(void)
 {
   i = -1;
-  GOACC_parallel (0, f, (const void *) 0, 0, (void *) 0, (void *) 0, (void *) 
0);
+  GOACC_parallel (0, f, (const void *) 0,
+                 0, (void *) 0, (void *) 0, (void *) 0, (void *) 0);
   if (i != 42)
     abort ();
 


> > --- gcc/omp-low.c.jj        2013-09-05 09:19:03.000000000 +0200
> > +++ gcc/omp-low.c   2013-09-05 17:11:14.693638660 +0200
> > @@ -9342,6 +9349,11 @@ lower_omp_target (gimple_stmt_iterator *
> |         unsigned char tkind = 0;
> |         switch (OMP_CLAUSE_CODE (c))
> |           {
> |           case OMP_CLAUSE_MAP:
> |             tkind = OMP_CLAUSE_MAP_KIND (c);
> |             break;
> |           case OMP_CLAUSE_TO:
> |             tkind = OMP_CLAUSE_MAP_TO;
> |             break;
> |           case OMP_CLAUSE_FROM:
> |             tkind = OMP_CLAUSE_MAP_FROM;
> |             break;
> >           default:
> >             gcc_unreachable ();
> >           }
> > +       unsigned int talign = TYPE_ALIGN_UNIT (TREE_TYPE (ovar));
> > +       if (DECL_P (ovar) && DECL_ALIGN_UNIT (ovar) > talign)
> > +         talign = DECL_ALIGN_UNIT (ovar);
> > +       talign = ceil_log2 (talign);
> > +       tkind |= talign << 3;
> >         CONSTRUCTOR_APPEND_ELT (vkind, purpose,
> >                                 build_int_cst (unsigned_char_type_node,
> >                                                tkind));
> 
> The use of OMP_CLAUSE_MAP_* on the generating and integer numerals on the
> receiving (libgomp) side is a bit unesthetic, likewise for the hard-coded
> 3 in the bit shift.  What would be the standard GCC way of sharing a
> description of the tkind layout between gcc/omp-low.c and
> libgomp/target.c?  Are we allowed to #include (a new header file)
> libgomp/target.h from gcc/omp-low.c?


> To avoid silent breakage should alignments bigger than 2 GiB be allowed
> in a distant future, would a check like the following be appropriate?
> 
> --- gcc/omp-low.c
> +++ gcc/omp-low.c
> @@ -10378,6 +10383,11 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, 
> omp_context *ctx)
>           unsigned int talign = TYPE_ALIGN_UNIT (TREE_TYPE (ovar));
>           if (DECL_P (ovar) && DECL_ALIGN_UNIT (ovar) > talign)
>             talign = DECL_ALIGN_UNIT (ovar);
> +         const unsigned int talign_max
> +           = 1 << ((1 << (BITS_PER_UNIT - 3)) - 1);
> +         if (talign > talign_max)
> +           sorry ("can't encode alignment of %u bytes, which is bigger than "
> +                  "%u bytes", talign, talign_max);
>           talign = ceil_log2 (talign);
>           tkind |= talign << 3;
>           CONSTRUCTOR_APPEND_ELT (vkind, purpose,


Grüße,
 Thomas

Attachment: pgpIDmVHSGrIt.pgp
Description: PGP signature

Reply via email to