Hi!

On Fri, 10 Apr 2015 09:20:30 -0700, Cesar Philippidis 
<cesar_philippi...@mentor.com> wrote:
> This patch implements the various atomic __sync_* functions inside
> libgcc. The original [motivation], were the calls to
> __sync_val_compare_and_swap_1 by openacc kernels involving boolean
> expressions. The reason for this failure is due to the fact that nvptx
> doesn't support byte-sized atomic compare and swaps, and boolean
> reductions update a bool which is an unsigned char.
> 
> Bernd suggested porting ARM's linux-atomic.c to nvptx. I tried that, but
> that particular port assumed that pointers were 4 bytes, so that caused
> some problems. I ended up using linux-atomic.c from nios2, because it
> didn't have that problem with pointers.
> 
> There are some caveats with this patch:
> 
>  * It only supports little endian nvptx targets.
> 
>  * My __kernel_cmpxchg wrapper is just an inline assembly function. It
>    could, and probably should, be a nvptx specific built-in.
> 
>  * I'm not sure if I should be using a membar.cta or global, so I
>    chose cta.

Committed to gomp-4_0-branch in r223177:

commit bf2d0c71d1ad06eb4e17dbb4fcc0951fd8920713
Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4>
Date:   Wed May 13 20:17:52 2015 +0000

    nvptx libgcc atomic routines
    
        libgcc/
        * config/nvptx/atomic.c: New file.
        * config/nvptx/t-nvptx (LIB2ADD): Include it.
    
    git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@223177 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgcc/ChangeLog.gomp        |   5 +
 libgcc/config/nvptx/atomic.c | 279 +++++++++++++++++++++++++++++++++++++++++++
 libgcc/config/nvptx/t-nvptx  |   3 +-
 3 files changed, 286 insertions(+), 1 deletion(-)

diff --git libgcc/ChangeLog.gomp libgcc/ChangeLog.gomp
index d872575..71ea38e 100644
--- libgcc/ChangeLog.gomp
+++ libgcc/ChangeLog.gomp
@@ -1,3 +1,8 @@
+2015-05-13  Cesar Philippidis  <ce...@codesourcery.com>
+
+       * config/nvptx/atomic.c: New file.
+       * config/nvptx/t-nvptx (LIB2ADD): Include it.
+
 2015-05-13  Bernd Schmidt  <ber...@codesourcery.com>
            Cesar Philippidis  <ce...@codesourcery.com>
 
diff --git libgcc/config/nvptx/atomic.c libgcc/config/nvptx/atomic.c
new file mode 100644
index 0000000..deb1750
--- /dev/null
+++ libgcc/config/nvptx/atomic.c
@@ -0,0 +1,279 @@
+/* Atomic operations for PTX.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by Mentor Graphics.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Kernel helper for compare-and-exchange.  */
+static int
+nvidia_cas (int oldval, int newval, int *ptr)
+{
+  int ret;
+
+  asm volatile ("atom.cas.b32 %0, [%1], %2, %3;" : "=r"(ret) : "r"(ptr),
+               "r"(oldval), "r"(newval));
+
+  return ret;
+}
+
+#define __kernel_cmpxchg (nvidia_cas)
+
+/* Kernel helper for memory barrier.  */
+static void
+__threadfence_block (void)
+{
+  asm volatile ("membar.cta;");
+}
+
+#define __kernel_dmb (__threadfence_block)
+
+#define HIDDEN
+
+/* Warning: this assumes that all nvptx targets are little endian.  */
+
+#define INVERT_MASK_1 0
+#define INVERT_MASK_2 0
+
+#define MASK_1 0xffu
+#define MASK_2 0xffffu
+
+#define FETCH_AND_OP_WORD(OP, PFX_OP, INF_OP)                          \
+  int HIDDEN                                                           \
+  __sync_fetch_and_##OP##_4 (int *ptr, int val)                                
\
+  {                                                                    \
+    int failure, tmp;                                                  \
+                                                                       \
+    do {                                                               \
+      tmp = *ptr;                                                      \
+      failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);  \
+    } while (failure != 0);                                            \
+                                                                       \
+    return tmp;                                                                
\
+  }
+
+FETCH_AND_OP_WORD (add,   , +)
+FETCH_AND_OP_WORD (sub,   , -)
+FETCH_AND_OP_WORD (or,    , |)
+FETCH_AND_OP_WORD (and,   , &)
+FETCH_AND_OP_WORD (xor,   , ^)
+FETCH_AND_OP_WORD (nand, ~, &)
+
+#define NAME_oldval(OP, WIDTH) __sync_fetch_and_##OP##_##WIDTH
+#define NAME_newval(OP, WIDTH) __sync_##OP##_and_fetch_##WIDTH
+
+/* Implement both __sync_<op>_and_fetch and __sync_fetch_and_<op> for
+   subword-sized quantities.  */
+
+#define SUBWORD_SYNC_OP(OP, PFX_OP, INF_OP, TYPE, WIDTH, RETURN)       \
+  TYPE HIDDEN                                                          \
+  NAME##_##RETURN (OP, WIDTH) (TYPE *ptr, TYPE val)                    \
+  {                                                                    \
+    int *wordptr = (int *) ((unsigned long) ptr & ~3);                 \
+    unsigned int mask, shift, oldval, newval;                          \
+    int failure;                                                       \
+                                                                       \
+    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;    \
+    mask = MASK_##WIDTH << shift;                                      \
+                                                                       \
+    do {                                                               \
+      oldval = *wordptr;                                               \
+      newval = ((PFX_OP (((oldval & mask) >> shift)                    \
+                        INF_OP (unsigned int) val)) << shift) & mask;  \
+      newval |= oldval & ~mask;                                                
\
+      failure = __kernel_cmpxchg (oldval, newval, wordptr);            \
+    } while (failure != 0);                                            \
+                                                                       \
+    return (RETURN & mask) >> shift;                                   \
+  }
+
+SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, oldval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, oldval)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, oldval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, oldval)
+
+#define OP_AND_FETCH_WORD(OP, PFX_OP, INF_OP)                          \
+  int HIDDEN                                                           \
+  __sync_##OP##_and_fetch_4 (int *ptr, int val)                                
\
+  {                                                                    \
+    int tmp, failure;                                                  \
+                                                                       \
+    do {                                                               \
+      tmp = *ptr;                                                      \
+      failure = __kernel_cmpxchg (tmp, PFX_OP (tmp INF_OP val), ptr);  \
+    } while (failure != 0);                                            \
+                                                                       \
+    return PFX_OP (tmp INF_OP val);                                    \
+  }
+
+OP_AND_FETCH_WORD (add,   , +)
+OP_AND_FETCH_WORD (sub,   , -)
+OP_AND_FETCH_WORD (or,    , |)
+OP_AND_FETCH_WORD (and,   , &)
+OP_AND_FETCH_WORD (xor,   , ^)
+OP_AND_FETCH_WORD (nand, ~, &)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (or,    , |, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (and,   , &, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned short, 2, newval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned short, 2, newval)
+
+SUBWORD_SYNC_OP (add,   , +, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (sub,   , -, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (or,    , |, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (and,   , &, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (xor,   , ^, unsigned char, 1, newval)
+SUBWORD_SYNC_OP (nand, ~, &, unsigned char, 1, newval)
+
+int HIDDEN
+__sync_val_compare_and_swap_4 (int *ptr, int oldval, int newval)
+{
+  int actual_oldval, fail;
+
+  while (1)
+    {
+      actual_oldval = *ptr;
+
+      if (oldval != actual_oldval)
+       return actual_oldval;
+
+      fail = __kernel_cmpxchg (actual_oldval, newval, ptr);
+
+      if (!fail)
+       return oldval;
+    }
+}
+
+#define SUBWORD_VAL_CAS(TYPE, WIDTH)                                   \
+  TYPE HIDDEN                                                          \
+  __sync_val_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,         \
+                                      TYPE newval)                     \
+  {                                                                    \
+    int *wordptr = (int *)((unsigned long) ptr & ~3), fail;            \
+    unsigned int mask, shift, actual_oldval, actual_newval;            \
+                                                                       \
+    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;    \
+    mask = MASK_##WIDTH << shift;                                      \
+                                                                       \
+    while (1)                                                          \
+      {                                                                        
\
+       actual_oldval = *wordptr;                                       \
+                                                                       \
+       if (((actual_oldval & mask) >> shift) != (unsigned int) oldval) \
+          return (actual_oldval & mask) >> shift;                      \
+                                                                       \
+       actual_newval = (actual_oldval & ~mask)                         \
+                       | (((unsigned int) newval << shift) & mask);    \
+                                                                       \
+       fail = __kernel_cmpxchg (actual_oldval, actual_newval,          \
+                                wordptr);                              \
+                                                                       \
+       if (!fail)                                                      \
+         return oldval;                                                \
+      }                                                                        
\
+  }
+
+SUBWORD_VAL_CAS (unsigned short, 2)
+SUBWORD_VAL_CAS (unsigned char,  1)
+
+typedef unsigned char bool;
+
+bool HIDDEN
+__sync_bool_compare_and_swap_4 (int *ptr, int oldval, int newval)
+{
+  int failure = __kernel_cmpxchg (oldval, newval, ptr);
+  return (failure == 0);
+}
+
+#define SUBWORD_BOOL_CAS(TYPE, WIDTH)                                  \
+  bool HIDDEN                                                          \
+  __sync_bool_compare_and_swap_##WIDTH (TYPE *ptr, TYPE oldval,                
\
+                                       TYPE newval)                    \
+  {                                                                    \
+    TYPE actual_oldval                                                 \
+      = __sync_val_compare_and_swap_##WIDTH (ptr, oldval, newval);     \
+    return (oldval == actual_oldval);                                  \
+  }
+
+SUBWORD_BOOL_CAS (unsigned short, 2)
+SUBWORD_BOOL_CAS (unsigned char,  1)
+
+int HIDDEN
+__sync_lock_test_and_set_4 (int *ptr, int val)
+{
+  int failure, oldval;
+
+  do {
+    oldval = *ptr;
+    failure = __kernel_cmpxchg (oldval, val, ptr);
+  } while (failure != 0);
+
+  return oldval;
+}
+
+#define SUBWORD_TEST_AND_SET(TYPE, WIDTH)                              \
+  TYPE HIDDEN                                                          \
+  __sync_lock_test_and_set_##WIDTH (TYPE *ptr, TYPE val)               \
+  {                                                                    \
+    int failure;                                                       \
+    unsigned int oldval, newval, shift, mask;                          \
+    int *wordptr = (int *) ((unsigned long) ptr & ~3);                 \
+                                                                       \
+    shift = (((unsigned long) ptr & 3) << 3) ^ INVERT_MASK_##WIDTH;    \
+    mask = MASK_##WIDTH << shift;                                      \
+                                                                       \
+    do {                                                               \
+      oldval = *wordptr;                                               \
+      newval = (oldval & ~mask)                                                
\
+              | (((unsigned int) val << shift) & mask);                \
+      failure = __kernel_cmpxchg (oldval, newval, wordptr);            \
+    } while (failure != 0);                                            \
+                                                                       \
+    return (oldval & mask) >> shift;                                   \
+  }
+
+SUBWORD_TEST_AND_SET (unsigned short, 2)
+SUBWORD_TEST_AND_SET (unsigned char,  1)
+
+#define SYNC_LOCK_RELEASE(TYPE, WIDTH)                                 \
+  void HIDDEN                                                          \
+  __sync_lock_release_##WIDTH (TYPE *ptr)                              \
+  {                                                                    \
+    /* All writes before this point must be seen before we release     \
+       the lock itself.  */                                            \
+    __kernel_dmb ();                                                   \
+    *ptr = 0;                                                          \
+  }
+
+SYNC_LOCK_RELEASE (int,   4)
+SYNC_LOCK_RELEASE (short, 2)
+SYNC_LOCK_RELEASE (char,  1)
diff --git libgcc/config/nvptx/t-nvptx libgcc/config/nvptx/t-nvptx
index a9e56a9..8fae397 100644
--- libgcc/config/nvptx/t-nvptx
+++ libgcc/config/nvptx/t-nvptx
@@ -1,6 +1,7 @@
 LIB2ADD=$(srcdir)/config/nvptx/malloc.asm \
        $(srcdir)/config/nvptx/free.asm \
-       $(srcdir)/config/nvptx/realloc.c
+       $(srcdir)/config/nvptx/realloc.c \
+       $(srcdir)/config/nvptx/atomic.c
 
 LIB2ADDEH=
 LIB2FUNCS_EXCLUDE=__main


Grüße,
 Thomas

Attachment: pgpRYi8uzkr1o.pgp
Description: PGP signature

Reply via email to