Re: [PATCH], Add PowerPC ISA 3.0 Atomic Memory Operation functions

Michael Meissner Fri, 06 Oct 2017 11:31:01 -0700

On Fri, Oct 06, 2017 at 01:25:33PM -0500, Segher Boessenkool wrote:
> Hi!
> 
> On Fri, Oct 06, 2017 at 02:03:43PM -0400, Michael Meissner wrote:
> > > Do you really need the mr insns?  Can't you express that in the
> > > arguments?  Perhaps using a union of __int128 with something that
> > > is two long ints?
> > 
> > My first attempt resulted in the compiler doing move directs to form the
> > __int128 in the vector unit and then move directs back.  So, I figured 
> > having
> > two mr's was a simple price to pay.
> > 
> > I do have thoughts to replace the two with a built-in (but keep amo.h and 
> > the
> > names), and we can probably eliminate some of the moves.
> 
> It's so ugly, even if it doesn't cost much ;-)
> 
> But don't worry about it, certainly not if the plan is to expand it
> as a builtin later.


Here is the revised amo.h.  I tested the two test files amo1.c and amo2.c, and
they both compile.  It is interesting, use %P0 results in fewer addi's than the
older one (there were redunant addi's in passing the address).  Can I check it
in?

[gcc]
2017-10-06  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * config/rs6000/amo.h: New include file to provide ISA 3.0 atomic
        memory operation instruction support.
        * config.gcc (powerpc*-*-*): Include amo.h as an extra header.
        (rs6000-ibm-aix[789]*): Likewise.
        * doc/extend.texi (PowerPC Atomic Memory Operation Functions):
        Document new functions.

[gcc/testsuite]
2017-10-06  Michael Meissner  <meiss...@linux.vnet.ibm.com>

        * gcc.target/powerpc/amo1.c: New test.
        * gcc.target/powerpc/amo2.c: Likewise.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797

Index: gcc/config/rs6000/amo.h
===================================================================
--- gcc/config/rs6000/amo.h     (revision 0)
+++ gcc/config/rs6000/amo.h     (revision 0)
@@ -0,0 +1,152 @@
+ /* Power ISA 3.0 atomic memory operation include file.
+    Copyright (C) 2017 Free Software Foundation, Inc.
+    Contributed by Michael Meissner <meiss...@linux.vnet.ibm.com>.
+
+    This file is part of GCC.
+
+    GCC is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published
+    by the Free Software Foundation; either version 3, or (at your
+    option) any later version.
+
+    GCC is distributed in the hope that it will be useful, but WITHOUT
+    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+    License for more details.
+
+    Under Section 7 of GPL version 3, you are granted additional
+    permissions described in the GCC Runtime Library Exception, version
+    3.1, as published by the Free Software Foundation.
+
+    You should have received a copy of the GNU General Public License and
+    a copy of the GCC Runtime Library Exception along with this program;
+    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+    <http://www.gnu.org/licenses/>.  */
+
+ #ifndef _AMO_H
+ #define _AMO_H
+
+ #if !defined(_ARCH_PWR9) || !defined(_ARCH_PPC64)
+ #error "The atomic memory operations require Power 64-bit ISA 3.0"
+
+ #else
+ #include <stdint.h>
+
+ /* Enumeration of the LWAT/LDAT sub-opcodes.  */
+ enum _AMO_LD {
+   _AMO_LD_ADD         = 0x00,         /* Fetch and Add.  */
+   _AMO_LD_XOR         = 0x01,         /* Fetch and Xor.  */
+   _AMO_LD_IOR         = 0x02,         /* Fetch and Ior.  */
+   _AMO_LD_AND         = 0x03,         /* Fetch and And.  */
+   _AMO_LD_UMAX                = 0x04,         /* Fetch and Unsigned Maximum.  
*/
+   _AMO_LD_SMAX                = 0x05,         /* Fetch and Signed Maximum.  */
+   _AMO_LD_UMIN                = 0x06,         /* Fetch and Unsigned Minimum.  
*/
+   _AMO_LD_SMIN                = 0x07,         /* Fetch and Signed Minimum.  */
+   _AMO_LD_SWAP                = 0x08,         /* Swap.  */
+   _AMO_LD_CS_NE               = 0x10,         /* Compare and Swap Not Equal.  
*/
+   _AMO_LD_INC_BOUNDED = 0x18,         /* Fetch and Increment Bounded.  */
+   _AMO_LD_INC_EQUAL   = 0x19,         /* Fetch and Increment Equal.  */
+   _AMO_LD_DEC_BOUNDED = 0x1A          /* Fetch and Decrement Bounded.  */
+ };
+
+ /* Implementation of the simple LWAT/LDAT operations that take one register 
and
+    modify one word or double-word of memory and return the value that was
+    previously in the memory location.
+
+    The LWAT/LDAT opcode requires the address to be a single register, and that
+    points to a suitably aligned memory location.  Asm volatile is used to
+    prevent the optimizer from moving the operation.  */
+
+ #define _AMO_LD_SIMPLE(NAME, TYPE, OPCODE, FC)                                
\
+ static __inline__ TYPE                                                        
\
+ NAME (TYPE *_PTR, TYPE _VALUE)                                                
\
+ {                                                                     \
+   unsigned __int128 _TMP;                                             \
+   TYPE _RET;                                                          \
+   __asm__ volatile ("mr %L1,%3\n"                                     \
+                   "\t" OPCODE " %1,%P0,%4\n"                          \
+                   "\tmr %2,%1\n"                                      \
+                   : "+Q" (_PTR[0]), "=&r" (_TMP), "=r" (_RET)         \
+                   : "r" (_VALUE), "n" (FC));                          \
+   return _RET;                                                                
\
+ }
+
+ _AMO_LD_SIMPLE (amo_lwat_add,   uint32_t, "lwat", _AMO_LD_ADD)
+ _AMO_LD_SIMPLE (amo_lwat_xor,   uint32_t, "lwat", _AMO_LD_XOR)
+ _AMO_LD_SIMPLE (amo_lwat_ior,   uint32_t, "lwat", _AMO_LD_IOR)
+ _AMO_LD_SIMPLE (amo_lwat_and,   uint32_t, "lwat", _AMO_LD_AND)
+ _AMO_LD_SIMPLE (amo_lwat_umax,  uint32_t, "lwat", _AMO_LD_UMAX)
+ _AMO_LD_SIMPLE (amo_lwat_umin,  uint32_t, "lwat", _AMO_LD_UMIN)
+ _AMO_LD_SIMPLE (amo_lwat_swap,  uint32_t, "lwat", _AMO_LD_SWAP)
+
+ _AMO_LD_SIMPLE (amo_lwat_sadd,  int32_t,  "lwat", _AMO_LD_ADD)
+ _AMO_LD_SIMPLE (amo_lwat_smax,  int32_t,  "lwat", _AMO_LD_SMAX)
+ _AMO_LD_SIMPLE (amo_lwat_smin,  int32_t,  "lwat", _AMO_LD_SMIN)
+ _AMO_LD_SIMPLE (amo_lwat_sswap, int32_t,  "lwat", _AMO_LD_SWAP)
+
+ _AMO_LD_SIMPLE (amo_ldat_add,   uint64_t, "ldat", _AMO_LD_ADD)
+ _AMO_LD_SIMPLE (amo_ldat_xor,   uint64_t, "ldat", _AMO_LD_XOR)
+ _AMO_LD_SIMPLE (amo_ldat_ior,   uint64_t, "ldat", _AMO_LD_IOR)
+ _AMO_LD_SIMPLE (amo_ldat_and,   uint64_t, "ldat", _AMO_LD_AND)
+ _AMO_LD_SIMPLE (amo_ldat_umax,  uint64_t, "ldat", _AMO_LD_UMAX)
+ _AMO_LD_SIMPLE (amo_ldat_umin,  uint64_t, "ldat", _AMO_LD_UMIN)
+ _AMO_LD_SIMPLE (amo_ldat_swap,  uint64_t, "ldat", _AMO_LD_SWAP)
+
+ _AMO_LD_SIMPLE (amo_ldat_sadd,  int64_t,  "ldat", _AMO_LD_ADD)
+ _AMO_LD_SIMPLE (amo_ldat_smax,  int64_t,  "ldat", _AMO_LD_SMAX)
+ _AMO_LD_SIMPLE (amo_ldat_smin,  int64_t,  "ldat", _AMO_LD_SMIN)
+ _AMO_LD_SIMPLE (amo_ldat_sswap, int64_t,  "ldat", _AMO_LD_SWAP)
+
+ /* Enumeration of the STWAT/STDAT sub-opcodes.  */
+ enum _AMO_ST {
+   _AMO_ST_ADD         = 0x00,         /* Store Add.  */
+   _AMO_ST_XOR         = 0x01,         /* Store Xor.  */
+   _AMO_ST_IOR         = 0x02,         /* Store Ior.  */
+   _AMO_ST_AND         = 0x03,         /* Store And.  */
+   _AMO_ST_UMAX                = 0x04,         /* Store Unsigned Maximum.  */
+   _AMO_ST_SMAX                = 0x05,         /* Store Signed Maximum.  */
+   _AMO_ST_UMIN                = 0x06,         /* Store Unsigned Minimum.  */
+   _AMO_ST_SMIN                = 0x07,         /* Store Signed Minimum.  */
+   _AMO_ST_TWIN                = 0x18          /* Store Twin.  */
+ };
+
+ /* Implementation of the simple STWAT/STDAT operations that take one register
+    and modify one word or double-word of memory.  No value is returned.
+
+    The STWAT/STDAT opcode requires the address to be a single register, and
+    that points to a suitably aligned memory location.  Asm volatile is used to
+    prevent the optimizer from moving the operation.  */
+
+ #define _AMO_ST_SIMPLE(NAME, TYPE, OPCODE, FC)                                
\
+ static __inline__ void                                                        
\
+ NAME (TYPE *_PTR, TYPE _VALUE)                                                
\
+ {                                                                     \
+   __asm__ volatile (OPCODE " %1,%P0,%2"                                       
\
+                   : "+Q" (_PTR[0])                                    \
+                   : "r" (_VALUE), "n" (FC));                          \
+   return;                                                             \
+ }
+
+ _AMO_ST_SIMPLE (amo_stwat_add,  uint32_t, "stwat", _AMO_ST_ADD)
+ _AMO_ST_SIMPLE (amo_stwat_xor,  uint32_t, "stwat", _AMO_ST_XOR)
+ _AMO_ST_SIMPLE (amo_stwat_ior,  uint32_t, "stwat", _AMO_ST_IOR)
+ _AMO_ST_SIMPLE (amo_stwat_and,  uint32_t, "stwat", _AMO_ST_AND)
+ _AMO_ST_SIMPLE (amo_stwat_umax, uint32_t, "stwat", _AMO_ST_UMAX)
+ _AMO_ST_SIMPLE (amo_stwat_umin, uint32_t, "stwat", _AMO_ST_UMIN)
+
+ _AMO_ST_SIMPLE (amo_stwat_sadd, int32_t,  "stwat", _AMO_ST_ADD)
+ _AMO_ST_SIMPLE (amo_stwat_smax, int32_t,  "stwat", _AMO_ST_SMAX)
+ _AMO_ST_SIMPLE (amo_stwat_smin, int32_t,  "stwat", _AMO_ST_SMIN)
+
+ _AMO_ST_SIMPLE (amo_stdat_add,  uint64_t, "stdat", _AMO_ST_ADD)
+ _AMO_ST_SIMPLE (amo_stdat_xor,  uint64_t, "stdat", _AMO_ST_XOR)
+ _AMO_ST_SIMPLE (amo_stdat_ior,  uint64_t, "stdat", _AMO_ST_IOR)
+ _AMO_ST_SIMPLE (amo_stdat_and,  uint64_t, "stdat", _AMO_ST_AND)
+ _AMO_ST_SIMPLE (amo_stdat_umax, uint64_t, "stdat", _AMO_ST_UMAX)
+ _AMO_ST_SIMPLE (amo_stdat_umin, uint64_t, "stdat", _AMO_ST_UMIN)
+
+ _AMO_ST_SIMPLE (amo_stdat_sadd, int64_t,  "stdat", _AMO_ST_ADD)
+ _AMO_ST_SIMPLE (amo_stdat_smax, int64_t,  "stdat", _AMO_ST_SMAX)
+ _AMO_ST_SIMPLE (amo_stdat_smin, int64_t,  "stdat", _AMO_ST_SMIN)
+ #endif        /* _ARCH_PWR9 && _ARCH_PPC64.  */
+ #endif        /* _POWERPC_AMO_H.  */
Index: gcc/config.gcc
===================================================================
--- gcc/config.gcc      (revision 253429)
+++ gcc/config.gcc      (working copy)
@@ -461,6 +461,7 @@ powerpc*-*-*)
        extra_headers="${extra_headers} mmintrin.h x86intrin.h"
        extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h 
si2vmx.h"
        extra_headers="${extra_headers} paired.h"
+       extra_headers="${extra_headers} amo.h"
        case x$with_cpu in
            
xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500)
                cpu_is_64bit=yes
@@ -2627,7 +2628,7 @@ rs6000-ibm-aix[789].* | powerpc-ibm-aix[
        use_collect2=yes
        thread_file='aix'
        use_gcc_stdint=wrap
-       extra_headers=altivec.h
+       extra_headers="altivec.h amo.h"
        default_use_cxa_atexit=yes
        ;;
 rl78-*-elf*)
Index: gcc/doc/extend.texi
===================================================================
--- gcc/doc/extend.texi (revision 253429)
+++ gcc/doc/extend.texi (working copy)
@@ -12041,6 +12041,7 @@ instructions, but allow the compiler to 
 * PowerPC Built-in Functions::
 * PowerPC AltiVec/VSX Built-in Functions::
 * PowerPC Hardware Transactional Memory Built-in Functions::
+* PowerPC Atomic Memory Operation Functions::
 * RX Built-in Functions::
 * S/390 System z Built-in Functions::
 * SH Built-in Functions::
@@ -19126,6 +19127,67 @@ while (1)
   @}
 @end smallexample
 
+@node PowerPC Atomic Memory Operation Functions
+@subsection PowerPC Atomic Memory Operation Functions
+ISA 3.0 of the PowerPC added new atomic memory operation (amo)
+instructions.  GCC provides support for these instructions in 64-bit
+environments.  All of the functions are declared in the include file
+@code{amo.h}.
+
+The functions supported are:
+
+@smallexample
+#include <amo.h>
+
+uint32_t amo_lwat_add (uint32_t *, uint32_t);
+uint32_t amo_lwat_xor (uint32_t *, uint32_t);
+uint32_t amo_lwat_ior (uint32_t *, uint32_t);
+uint32_t amo_lwat_and (uint32_t *, uint32_t);
+uint32_t amo_lwat_umax (uint32_t *, uint32_t);
+uint32_t amo_lwat_umin (uint32_t *, uint32_t);
+uint32_t amo_lwat_swap (uint32_t *, uint32_t);
+
+int32_t amo_lwat_sadd (int32_t *, int32_t);
+int32_t amo_lwat_smax (int32_t *, int32_t);
+int32_t amo_lwat_smin (int32_t *, int32_t);
+int32_t amo_lwat_sswap (int32_t *, int32_t);
+
+uint64_t amo_ldat_add (uint64_t *, uint64_t);
+uint64_t amo_ldat_xor (uint64_t *, uint64_t);
+uint64_t amo_ldat_ior (uint64_t *, uint64_t);
+uint64_t amo_ldat_and (uint64_t *, uint64_t);
+uint64_t amo_ldat_umax (uint64_t *, uint64_t);
+uint64_t amo_ldat_umin (uint64_t *, uint64_t);
+uint64_t amo_ldat_swap (uint64_t *, uint64_t);
+
+int64_t amo_ldat_sadd (int64_t *, int64_t);
+int64_t amo_ldat_smax (int64_t *, int64_t);
+int64_t amo_ldat_smin (int64_t *, int64_t);
+int64_t amo_ldat_sswap (int64_t *, int64_t);
+
+void amo_stwat_add (uint32_t *, uint32_t);
+void amo_stwat_xor (uint32_t *, uint32_t);
+void amo_stwat_ior (uint32_t *, uint32_t);
+void amo_stwat_and (uint32_t *, uint32_t);
+void amo_stwat_umax (uint32_t *, uint32_t);
+void amo_stwat_umin (uint32_t *, uint32_t);
+
+void amo_stwat_sadd (int32_t *, int32_t);
+void amo_stwat_smax (int32_t *, int32_t);
+void amo_stwat_smin (int32_t *, int32_t);
+
+void amo_stdat_add (uint64_t *, uint64_t);
+void amo_stdat_xor (uint64_t *, uint64_t);
+void amo_stdat_ior (uint64_t *, uint64_t);
+void amo_stdat_and (uint64_t *, uint64_t);
+void amo_stdat_umax (uint64_t *, uint64_t);
+void amo_stdat_umin (uint64_t *, uint64_t);
+
+void amo_stdat_sadd (int64_t *, int64_t);
+void amo_stdat_smax (int64_t *, int64_t);
+void amo_stdat_smin (int64_t *, int64_t);
+@end smallexample
+
 @node RX Built-in Functions
 @subsection RX Built-in Functions
 GCC supports some of the RX instructions which cannot be expressed in
Index: gcc/testsuite/gcc.target/powerpc/amo1.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/amo1.c     (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/amo1.c     (revision 0)
@@ -0,0 +1,253 @@
+/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-vector -mpower9-misc -O2" } */
+
+/* Verify P9 atomic memory operations.  */
+
+#include <amo.h>
+#include <stdint.h>
+
+uint32_t
+do_lw_add (uint32_t *mem, uint32_t value)
+{
+  return amo_lwat_add (mem, value);
+}
+
+int32_t
+do_lw_sadd (int32_t *mem, int32_t value)
+{
+  return amo_lwat_sadd (mem, value);
+}
+
+uint32_t
+do_lw_xor (uint32_t *mem, uint32_t value)
+{
+  return amo_lwat_xor (mem, value);
+}
+
+uint32_t
+do_lw_ior (uint32_t *mem, uint32_t value)
+{
+  return amo_lwat_ior (mem, value);
+}
+
+uint32_t
+do_lw_and (uint32_t *mem, uint32_t value)
+{
+  return amo_lwat_and (mem, value);
+}
+
+uint32_t
+do_lw_umax (uint32_t *mem, uint32_t value)
+{
+  return amo_lwat_umax (mem, value);
+}
+
+int32_t
+do_lw_smax (int32_t *mem, int32_t value)
+{
+  return amo_lwat_smax (mem, value);
+}
+
+uint32_t
+do_lw_umin (uint32_t *mem, uint32_t value)
+{
+  return amo_lwat_umin (mem, value);
+}
+
+int32_t
+do_lw_smin (int32_t *mem, int32_t value)
+{
+  return amo_lwat_smin (mem, value);
+}
+
+uint32_t
+do_lw_swap (uint32_t *mem, uint32_t value)
+{
+  return amo_lwat_swap (mem, value);
+}
+
+int32_t
+do_lw_sswap (int32_t *mem, int32_t value)
+{
+  return amo_lwat_sswap (mem, value);
+}
+
+uint64_t
+do_ld_add (uint64_t *mem, uint64_t value)
+{
+  return amo_ldat_add (mem, value);
+}
+
+int64_t
+do_ld_sadd (int64_t *mem, int64_t value)
+{
+  return amo_ldat_sadd (mem, value);
+}
+
+uint64_t
+do_ld_xor (uint64_t *mem, uint64_t value)
+{
+  return amo_ldat_xor (mem, value);
+}
+
+uint64_t
+do_ld_ior (uint64_t *mem, uint64_t value)
+{
+  return amo_ldat_ior (mem, value);
+}
+
+uint64_t
+do_ld_and (uint64_t *mem, uint64_t value)
+{
+  return amo_ldat_and (mem, value);
+}
+
+uint64_t
+do_ld_umax (uint64_t *mem, uint64_t value)
+{
+  return amo_ldat_umax (mem, value);
+}
+
+int64_t
+do_ld_smax (int64_t *mem, int64_t value)
+{
+  return amo_ldat_smax (mem, value);
+}
+
+uint64_t
+do_ld_umin (uint64_t *mem, uint64_t value)
+{
+  return amo_ldat_umin (mem, value);
+}
+
+int64_t
+do_ld_smin (int64_t *mem, int64_t value)
+{
+  return amo_ldat_smin (mem, value);
+}
+
+uint64_t
+do_ld_swap (uint64_t *mem, uint64_t value)
+{
+  return amo_ldat_swap (mem, value);
+}
+
+int64_t
+do_ld_sswap (int64_t *mem, int64_t value)
+{
+  return amo_ldat_sswap (mem, value);
+}
+
+void
+do_sw_add (uint32_t *mem, uint32_t value)
+{
+  amo_stwat_add (mem, value);
+}
+
+void
+do_sw_sadd (int32_t *mem, int32_t value)
+{
+  amo_stwat_sadd (mem, value);
+}
+
+void
+do_sw_xor (uint32_t *mem, uint32_t value)
+{
+  amo_stwat_xor (mem, value);
+}
+
+void
+do_sw_ior (uint32_t *mem, uint32_t value)
+{
+  amo_stwat_ior (mem, value);
+}
+
+void
+do_sw_and (uint32_t *mem, uint32_t value)
+{
+  amo_stwat_and (mem, value);
+}
+
+void
+do_sw_umax (int32_t *mem, int32_t value)
+{
+  amo_stwat_umax (mem, value);
+}
+
+void
+do_sw_smax (int32_t *mem, int32_t value)
+{
+  amo_stwat_smax (mem, value);
+}
+
+void
+do_sw_umin (int32_t *mem, int32_t value)
+{
+  amo_stwat_umin (mem, value);
+}
+
+void
+do_sw_smin (int32_t *mem, int32_t value)
+{
+  amo_stwat_smin (mem, value);
+}
+
+void
+do_sd_add (uint64_t *mem, uint64_t value)
+{
+  amo_stdat_add (mem, value);
+}
+
+void
+do_sd_sadd (int64_t *mem, int64_t value)
+{
+  amo_stdat_sadd (mem, value);
+}
+
+void
+do_sd_xor (uint64_t *mem, uint64_t value)
+{
+  amo_stdat_xor (mem, value);
+}
+
+void
+do_sd_ior (uint64_t *mem, uint64_t value)
+{
+  amo_stdat_ior (mem, value);
+}
+
+void
+do_sd_and (uint64_t *mem, uint64_t value)
+{
+  amo_stdat_and (mem, value);
+}
+
+void
+do_sd_umax (int64_t *mem, int64_t value)
+{
+  amo_stdat_umax (mem, value);
+}
+
+void
+do_sd_smax (int64_t *mem, int64_t value)
+{
+  amo_stdat_smax (mem, value);
+}
+
+void
+do_sd_umin (int64_t *mem, int64_t value)
+{
+  amo_stdat_umin (mem, value);
+}
+
+void
+do_sd_smin (int64_t *mem, int64_t value)
+{
+  amo_stdat_smin (mem, value);
+}
+
+/* { dg-final { scan-assembler-times {\mldat\M}  11 } } */
+/* { dg-final { scan-assembler-times {\mlwat\M}  11 } } */
+/* { dg-final { scan-assembler-times {\mstdat\M}  9 } } */
+/* { dg-final { scan-assembler-times {\mstwat\M}  9 } } */
Index: gcc/testsuite/gcc.target/powerpc/amo2.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/amo2.c     (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/amo2.c     (revision 0)
@@ -0,0 +1,121 @@
+/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mpower9-vector -mpower9-misc" } */
+
+#include <amo.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+/* Test whether the ISA 3.0 amo (atomic memory operations) functions perform as
+   expected.  */
+
+/* 32-bit tests.  */
+static uint32_t u32_ld[4] = {
+  9,                           /* add */
+  7,                           /* xor */
+  6,                           /* ior */
+  7,                           /* and */
+};
+
+static uint32_t u32_st[4] = {
+  9,                           /* add */
+  7,                           /* xor */
+  6,                           /* ior */
+  7,                           /* and */
+};
+
+static uint32_t u32_result[4];
+
+static uint32_t u32_update[4] = {
+  9 + 1,                       /* add */
+  7 ^ 1,                       /* xor */
+  6 | 1,                       /* ior */
+  7 & 1,                       /* and */
+};
+
+static uint32_t u32_prev[4] = {
+  9,                           /* add */
+  7,                           /* xor */
+  6,                           /* ior */
+  7,                           /* and */
+};
+
+/* 64-bit tests.  */
+static uint64_t u64_ld[4] = {
+  9,                           /* add */
+  7,                           /* xor */
+  6,                           /* ior */
+  7,                           /* and */
+};
+
+static uint64_t u64_st[4] = {
+  9,                           /* add */
+  7,                           /* xor */
+  6,                           /* ior */
+  7,                           /* and */
+};
+
+static uint64_t u64_result[4];
+
+static uint64_t u64_update[4] = {
+  9 + 1,                       /* add */
+  7 ^ 1,                       /* xor */
+  6 | 1,                       /* ior */
+  7 & 1,                       /* and */
+};
+
+static uint64_t u64_prev[4] = {
+  9,                           /* add */
+  7,                           /* xor */
+  6,                           /* ior */
+  7,                           /* and */
+};
+
+int
+main (void)
+{
+  size_t i;
+
+  u32_result[0] = amo_lwat_add (&u32_ld[0], 1);
+  u32_result[1] = amo_lwat_xor (&u32_ld[1], 1);
+  u32_result[2] = amo_lwat_ior (&u32_ld[2], 1);
+  u32_result[3] = amo_lwat_and (&u32_ld[3], 1);
+
+  u64_result[0] = amo_ldat_add (&u64_ld[0], 1);
+  u64_result[1] = amo_ldat_xor (&u64_ld[1], 1);
+  u64_result[2] = amo_ldat_ior (&u64_ld[2], 1);
+  u64_result[3] = amo_ldat_and (&u64_ld[3], 1);
+
+  amo_stwat_add (&u32_st[0], 1);
+  amo_stwat_xor (&u32_st[1], 1);
+  amo_stwat_ior (&u32_st[2], 1);
+  amo_stwat_and (&u32_st[3], 1);
+
+  amo_stdat_add (&u64_st[0], 1);
+  amo_stdat_xor (&u64_st[1], 1);
+  amo_stdat_ior (&u64_st[2], 1);
+  amo_stdat_and (&u64_st[3], 1);
+
+  for (i = 0; i < 4; i++)
+    {
+      if (u32_result[i] != u32_prev[i])
+       abort ();
+
+      if (u32_ld[i] != u32_update[i])
+       abort ();
+
+      if (u32_st[i] != u32_update[i])
+       abort ();
+
+      if (u64_result[i] != u64_prev[i])
+       abort ();
+
+      if (u64_ld[i] != u64_update[i])
+       abort ();
+
+      if (u64_st[i] != u64_update[i])
+       abort ();
+    }
+
+  return 0;
+}

Re: [PATCH], Add PowerPC ISA 3.0 Atomic Memory Operation functions

Reply via email to