On 01/07/2020 3:28 pm, Tom de Vries wrote:
So, I think gcc needs a copy of (some of) the
gcc/testsuite/gcc.dg/ia64-sync-*.c tests for effective target
sync_char_short.
However, since this patch only adds partial support, we cannot enable
sync_char_short for nvptx yet. So, if you stick to partial support, you
should add a char/short copy of ia64-sync-3.c to gcc.target/nvptx (which
ideally could be an include of a generic test-case that is active for
sync_char_short only, with mention that it can be removed once
sync_char_short is enabled for nvptx).
I have added gcc.target/nvptx/sync.c, which is a version of ia64-sync-3.c
extended to test chars and shorts too. I kept the original int and long tests
because sync_int_long isn't indicated as being supported on nvptx either.
I looked at the implementation, and it looks ok to me, though I think we
need to make explicit in a comment what the assumptions are:
- that we have read and write access to the entire word, and
- that the word is not volatile.
I've added some extra comments in the implementation. Like I said previously,
the loop accounts for the larger word being volatile.
As for the oacc test-case, you could add the __int128 bit, perhaps along
the lines of how things are done in
libgomp/testsuite/libgomp.c++/target-8.C ?
I've added a extra test for __int128 types in my libgomp testcase that runs if
128-bit types are supported.
I've tested that there are no regressions with the patch on standalone nvptx,
and that the new reduction-16.c testcase passes with both nvptx and AMD GCN
offloading.
Is this version okay for master and og10?
Thanks
Kwok
commit 4661232905d55a4bc1354cb717b2e5d950d215af
Author: Kwok Cheung Yeung <k...@codesourcery.com>
Date: Thu Jul 16 12:00:24 2020 -0700
nvptx: Add support for subword compare-and-swap
This adds support for __sync_val_compare_and_swap and
__sync_bool_compare_and_swap for 1-byte and 2-byte long
values, which are not natively supported on nvptx.
2020-07-16 Kwok Cheung Yeung <k...@codesourcery.com>
libgcc/
* config/nvptx/atomic.c: New.
* config/nvptx/t-nvptx (LIB2ADD): Add atomic.c.
gcc/testsuite/
* gcc.target/nvptx/sync.c: New.
libgomp/
* testsuite/libgomp.c-c++-common/reduction-16.c: New.
diff --git a/gcc/testsuite/gcc.target/nvptx/sync.c
b/gcc/testsuite/gcc.target/nvptx/sync.c
new file mode 100644
index 0000000..a573824
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/sync.c
@@ -0,0 +1,143 @@
+/* { dg-do run } */
+
+/* Test basic functionality of the intrinsics. */
+
+/* This is a copy of gcc.dg/ia64-sync-2.c, extended to test 8-bit and 16-bit
+ values as well. */
+
+/* Ideally this test should require sync_char_short and sync_int_long, but we
+ only support a subset at the moment. */
+
+__extension__ typedef __SIZE_TYPE__ size_t;
+
+extern void abort (void);
+extern void *memcpy (void *, const void *, size_t);
+extern int memcmp (const void *, const void *, size_t);
+
+static char AC[4];
+static char init_qi[4] = { -30,-30,-50,-50 };
+static char test_qi[4] = { -115,-115,25,25 };
+
+static void
+do_qi (void)
+{
+ if (__sync_val_compare_and_swap(AC+0, -30, -115) != -30)
+ abort ();
+ if (__sync_val_compare_and_swap(AC+0, -30, -115) != -115)
+ abort ();
+ if (__sync_bool_compare_and_swap(AC+1, -30, -115) != 1)
+ abort ();
+ if (__sync_bool_compare_and_swap(AC+1, -30, -115) != 0)
+ abort ();
+
+ if (__sync_val_compare_and_swap(AC+2, AC[2], 25) != -50)
+ abort ();
+ if (__sync_val_compare_and_swap(AC+2, AC[2], 25) != 25)
+ abort ();
+ if (__sync_bool_compare_and_swap(AC+3, AC[3], 25) != 1)
+ abort ();
+ if (__sync_bool_compare_and_swap(AC+3, AC[3], 25) != 1)
+ abort ();
+}
+
+static short AS[4];
+static short init_hi[4] = { -30,-30,-50,-50 };
+static short test_hi[4] = { -115,-115,25,25 };
+
+static void
+do_hi (void)
+{
+ if (__sync_val_compare_and_swap(AS+0, -30, -115) != -30)
+ abort ();
+ if (__sync_val_compare_and_swap(AS+0, -30, -115) != -115)
+ abort ();
+ if (__sync_bool_compare_and_swap(AS+1, -30, -115) != 1)
+ abort ();
+ if (__sync_bool_compare_and_swap(AS+1, -30, -115) != 0)
+ abort ();
+
+ if (__sync_val_compare_and_swap(AS+2, AS[2], 25) != -50)
+ abort ();
+ if (__sync_val_compare_and_swap(AS+2, AS[2], 25) != 25)
+ abort ();
+ if (__sync_bool_compare_and_swap(AS+3, AS[3], 25) != 1)
+ abort ();
+ if (__sync_bool_compare_and_swap(AS+3, AS[3], 25) != 1)
+ abort ();
+}
+
+static int AI[4];
+static int init_si[4] = { -30,-30,-50,-50 };
+static int test_si[4] = { -115,-115,25,25 };
+
+static void
+do_si (void)
+{
+ if (__sync_val_compare_and_swap(AI+0, -30, -115) != -30)
+ abort ();
+ if (__sync_val_compare_and_swap(AI+0, -30, -115) != -115)
+ abort ();
+ if (__sync_bool_compare_and_swap(AI+1, -30, -115) != 1)
+ abort ();
+ if (__sync_bool_compare_and_swap(AI+1, -30, -115) != 0)
+ abort ();
+
+ if (__sync_val_compare_and_swap(AI+2, AI[2], 25) != -50)
+ abort ();
+ if (__sync_val_compare_and_swap(AI+2, AI[2], 25) != 25)
+ abort ();
+ if (__sync_bool_compare_and_swap(AI+3, AI[3], 25) != 1)
+ abort ();
+ if (__sync_bool_compare_and_swap(AI+3, AI[3], 25) != 1)
+ abort ();
+}
+
+static long AL[4];
+static long init_di[4] = { -30,-30,-50,-50 };
+static long test_di[4] = { -115,-115,25,25 };
+
+static void
+do_di (void)
+{
+ if (__sync_val_compare_and_swap(AL+0, -30, -115) != -30)
+ abort ();
+ if (__sync_val_compare_and_swap(AL+0, -30, -115) != -115)
+ abort ();
+ if (__sync_bool_compare_and_swap(AL+1, -30, -115) != 1)
+ abort ();
+ if (__sync_bool_compare_and_swap(AL+1, -30, -115) != 0)
+ abort ();
+
+ if (__sync_val_compare_and_swap(AL+2, AL[2], 25) != -50)
+ abort ();
+ if (__sync_val_compare_and_swap(AL+2, AL[2], 25) != 25)
+ abort ();
+ if (__sync_bool_compare_and_swap(AL+3, AL[3], 25) != 1)
+ abort ();
+ if (__sync_bool_compare_and_swap(AL+3, AL[3], 25) != 1)
+ abort ();
+}
+
+int main()
+{
+ memcpy(AC, init_qi, sizeof(init_qi));
+ memcpy(AS, init_hi, sizeof(init_hi));
+ memcpy(AI, init_si, sizeof(init_si));
+ memcpy(AL, init_di, sizeof(init_di));
+
+ do_qi ();
+ do_hi ();
+ do_si ();
+ do_di ();
+
+ if (memcmp (AC, test_qi, sizeof(test_qi)))
+ abort ();
+ if (memcmp (AS, test_hi, sizeof(test_hi)))
+ abort ();
+ if (memcmp (AI, test_si, sizeof(test_si)))
+ abort ();
+ if (memcmp (AL, test_di, sizeof(test_di)))
+ abort ();
+
+ return 0;
+}
diff --git a/libgcc/config/nvptx/atomic.c b/libgcc/config/nvptx/atomic.c
new file mode 100644
index 0000000..25a34fb
--- /dev/null
+++ b/libgcc/config/nvptx/atomic.c
@@ -0,0 +1,70 @@
+/* NVPTX atomic operations
+ Copyright (C) 2020 Free Software Foundation, Inc.
+ Contributed by Mentor Graphics.
+
+ This file is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 3, or (at your option) any
+ later version.
+
+ This file is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdbool.h>
+
+/* Implement __sync_val_compare_and_swap and __sync_bool_compare_and_swap
+ for 1 and 2-byte values (which are not natively supported) in terms of
+ __sync_val_compare_and_swap for 4-byte values (which is supported).
+ This assumes that the contents of the word surrounding the subword
+ value that we are interested in are accessible as well (which should
+ normally be the case). */
+
+#define __SYNC_SUBWORD_COMPARE_AND_SWAP(TYPE, SIZE) \
+ \
+TYPE \
+__sync_val_compare_and_swap_##SIZE (TYPE *ptr, TYPE oldval, TYPE newval) \
+{ \
+ unsigned int *wordptr = (unsigned int *)((__UINTPTR_TYPE__ ) ptr & ~3UL); \
+ int shift = ((__UINTPTR_TYPE__ ) ptr & 3UL) * 8; \
+ unsigned int valmask = (1 << (SIZE * 8)) - 1;
\
+ unsigned int wordmask = ~(valmask << shift); \
+ unsigned int oldword = *wordptr; \
+ for (;;) \
+ { \
+ TYPE prevval = (oldword >> shift) & valmask; \
+ /* Exit if the subword value previously read from memory is not */ \
+ /* equal to the expected value OLDVAL. */ \
+ if (__builtin_expect (prevval != oldval, 0)) \
+ return prevval; \
+ unsigned int newword = oldword & wordmask; \
+ newword |= ((unsigned int) newval) << shift; \
+ unsigned int prevword \
+ = __sync_val_compare_and_swap_4 (wordptr, oldword, newword); \
+ /* Exit only if the compare-and-swap succeeds on the whole word */ \
+ /* (i.e. the contents of *WORDPTR have not changed since the last */ \
+ /* memory read). */ \
+ if (__builtin_expect (prevword == oldword, 1)) \
+ return oldval; \
+ oldword = prevword; \
+ } \
+} \
+ \
+bool \
+__sync_bool_compare_and_swap_##SIZE (TYPE *ptr, TYPE oldval, TYPE newval) \
+{ \
+ return __sync_val_compare_and_swap_##SIZE (ptr, oldval, newval) == oldval; \
+}
+
+__SYNC_SUBWORD_COMPARE_AND_SWAP (unsigned char, 1)
+__SYNC_SUBWORD_COMPARE_AND_SWAP (unsigned short, 2)
diff --git a/libgcc/config/nvptx/t-nvptx b/libgcc/config/nvptx/t-nvptx
index c4d20c9..ede0bf0 100644
--- a/libgcc/config/nvptx/t-nvptx
+++ b/libgcc/config/nvptx/t-nvptx
@@ -1,5 +1,6 @@
LIB2ADD=$(srcdir)/config/nvptx/reduction.c \
- $(srcdir)/config/nvptx/mgomp.c
+ $(srcdir)/config/nvptx/mgomp.c \
+ $(srcdir)/config/nvptx/atomic.c
LIB2ADDEH=
LIB2FUNCS_EXCLUDE=__main
diff --git a/libgomp/testsuite/libgomp.c-c++-common/reduction-16.c
b/libgomp/testsuite/libgomp.c-c++-common/reduction-16.c
new file mode 100644
index 0000000..d0e82b0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/reduction-16.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define N 512
+
+#define GENERATE_TEST(T) \
+int test_##T (void) \
+{ \
+ T a[N], res = 0; \
+ \
+ for (int i = 0; i < N; ++i) \
+ a[i] = i & 1; \
+ \
+_Pragma("omp target teams distribute reduction(||:res)
defaultmap(tofrom:scalar)") \
+ for (int i = 0; i < N; ++i) \
+ res = res || a[i]; \
+ \
+ /* res should be non-zero. */\
+ if (!res) \
+ return 1; \
+ \
+_Pragma("omp target teams distribute reduction(&&:res)
defaultmap(tofrom:scalar)") \
+ for (int i = 0; i < N; ++i) \
+ res = res && a[i]; \
+ \
+ /* res should be zero. */ \
+ return res; \
+}
+
+GENERATE_TEST(char)
+GENERATE_TEST(short)
+GENERATE_TEST(int)
+GENERATE_TEST(long)
+#ifdef __SIZEOF_INT128__
+GENERATE_TEST(__int128)
+#endif
+
+int main(void)
+{
+ if (test_char ())
+ abort ();
+ if (test_short ())
+ abort ();
+ if (test_int ())
+ abort ();
+ if (test_long ())
+ abort ();
+#ifdef __SIZEOF_INT128__
+ if (test___int128 ())
+ abort ();
+#endif
+}