Introduce the %I and %J flags for setting the .aqrl bits on LR/SC pairs
as needed.

Atomic compare and exchange ops provide success and failure memory
models. C++17 and later place no restrictions on the relative strength
of each model, so ensure we cover both by using a model that enforces
the ordering of both given models.

This change brings LR/SC ops in line with table A.6 of the ISA manual.

2023-04-27 Patrick O'Neill <patr...@rivosinc.com>

gcc/ChangeLog:

        * config/riscv/riscv-protos.h (riscv_union_memmodels): Expose
        riscv_union_memmodels function to sync.md.
        * config/riscv/riscv.cc (riscv_union_memmodels): Add function to
        get the union of two memmodels in sync.md.
        (riscv_print_operand): Add %I and %J flags that output the
        optimal LR/SC flag bits for a given memory model.
        * config/riscv/sync.md: Remove static .aqrl bits on LR op/.rl
        bits on SC op and replace with optimized %I, %J flags.

Signed-off-by: Patrick O'Neill <patr...@rivosinc.com>
---
v3 Changelog:
* Consolidate tests in [PATCH v3 10/10]
---
v5 Changelog:
* Also optimize subword LR/SC ops based on given memory model.
---
 gcc/config/riscv/riscv-protos.h |   3 +
 gcc/config/riscv/riscv.cc       |  44 ++++++++++++
 gcc/config/riscv/sync.md        | 114 +++++++++++++++++++-------------
 3 files changed, 114 insertions(+), 47 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index f87661bde2c..5fa9e1122ab 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -22,6 +22,8 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_RISCV_PROTOS_H
 #define GCC_RISCV_PROTOS_H
 
+#include "memmodel.h"
+
 /* Symbol types we understand.  The order of this list must match that of
    the unspec enum in riscv.md, subsequent to UNSPEC_ADDRESS_FIRST.  */
 enum riscv_symbol_type {
@@ -81,6 +83,7 @@ extern bool riscv_v_ext_vector_mode_p (machine_mode);
 extern bool riscv_shamt_matches_mask_p (int, HOST_WIDE_INT);
 extern void riscv_subword_address (rtx, rtx *, rtx *, rtx *, rtx *);
 extern void riscv_lshift_subword (machine_mode, rtx, rtx, rtx *);
+extern enum memmodel riscv_union_memmodels (enum memmodel, enum memmodel);
 
 /* Routines implemented in riscv-c.cc.  */
 void riscv_cpu_cpp_builtins (cpp_reader *);
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 9eba03ac189..69e9b2aa548 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -4289,6 +4289,36 @@ riscv_print_operand_reloc (FILE *file, rtx op, bool 
hi_reloc)
   fputc (')', file);
 }
 
+/* Return the memory model that encapuslates both given models.  */
+
+enum memmodel
+riscv_union_memmodels (enum memmodel model1, enum memmodel model2)
+{
+  model1 = memmodel_base (model1);
+  model2 = memmodel_base (model2);
+
+  enum memmodel weaker = model1 <= model2 ? model1: model2;
+  enum memmodel stronger = model1 > model2 ? model1: model2;
+
+  switch (stronger)
+    {
+      case MEMMODEL_SEQ_CST:
+      case MEMMODEL_ACQ_REL:
+       return stronger;
+      case MEMMODEL_RELEASE:
+       if (weaker == MEMMODEL_ACQUIRE || weaker == MEMMODEL_CONSUME)
+         return MEMMODEL_ACQ_REL;
+       else
+         return stronger;
+      case MEMMODEL_ACQUIRE:
+      case MEMMODEL_CONSUME:
+      case MEMMODEL_RELAXED:
+       return stronger;
+      default:
+       gcc_unreachable ();
+    }
+}
+
 /* Return true if the .AQ suffix should be added to an AMO to implement the
    acquire portion of memory model MODEL.  */
 
@@ -4342,6 +4372,8 @@ riscv_memmodel_needs_amo_release (enum memmodel model)
    'R' Print the low-part relocation associated with OP.
    'C' Print the integer branch condition for comparison OP.
    'A' Print the atomic operation suffix for memory model OP.
+   'I' Print the LR suffix for memory model OP.
+   'J' Print the SC suffix for memory model OP.
    'z' Print x0 if OP is zero, otherwise print OP normally.
    'i' Print i if the operand is not a register.
    'S' Print shift-index of single-bit mask OP.
@@ -4511,6 +4543,18 @@ riscv_print_operand (FILE *file, rtx op, int letter)
        fputs (".rl", file);
       break;
 
+    case 'I':
+      if (model == MEMMODEL_SEQ_CST)
+       fputs (".aqrl", file);
+      else if (riscv_memmodel_needs_amo_acquire (model))
+       fputs (".aq", file);
+      break;
+
+    case 'J':
+      if (riscv_memmodel_needs_amo_release (model))
+       fputs (".rl", file);
+      break;
+
     case 'i':
       if (code != REG)
         fputs ("i", file);
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 9a3b57bd09f..3e6345e83a3 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -116,21 +116,22 @@
        (unspec_volatile:SI
          [(any_atomic:SI (match_dup 1)
                     (match_operand:SI 2 "register_operand" "rI")) ;; value for 
op
-          (match_operand:SI 3 "register_operand" "rI")]           ;; mask
+          (match_operand:SI 3 "const_int_operand")]               ;; model
         UNSPEC_SYNC_OLD_OP_SUBWORD))
-    (match_operand:SI 4 "register_operand" "rI")                  ;; not_mask
-    (clobber (match_scratch:SI 5 "=&r"))                          ;; tmp_1
-    (clobber (match_scratch:SI 6 "=&r"))]                         ;; tmp_2
+    (match_operand:SI 4 "register_operand" "rI")                  ;; mask
+    (match_operand:SI 5 "register_operand" "rI")                  ;; not_mask
+    (clobber (match_scratch:SI 6 "=&r"))                          ;; tmp_1
+    (clobber (match_scratch:SI 7 "=&r"))]                         ;; tmp_2
   "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
   {
     return "1:\;"
-          "lr.w.aqrl\t%0, %1\;"
-          "<insn>\t%5, %0, %2\;"
-          "and\t%5, %5, %3\;"
-          "and\t%6, %0, %4\;"
-          "or\t%6, %6, %5\;"
-          "sc.w.rl\t%5, %6, %1\;"
-          "bnez\t%5, 1b";
+          "lr.w%I3\t%0, %1\;"
+          "<insn>\t%6, %0, %2\;"
+          "and\t%6, %6, %4\;"
+          "and\t%7, %0, %5\;"
+          "or\t%7, %7, %6\;"
+          "sc.w%J3\t%6, %7, %1\;"
+          "bnez\t%6, 1b";
   }
   [(set (attr "length") (const_int 28))])
 
@@ -151,6 +152,7 @@
   rtx old = gen_reg_rtx (SImode);
   rtx mem = operands[1];
   rtx value = operands[2];
+  rtx model = operands[3];
   rtx aligned_mem = gen_reg_rtx (SImode);
   rtx shift = gen_reg_rtx (SImode);
   rtx mask = gen_reg_rtx (SImode);
@@ -162,7 +164,7 @@
   riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value);
 
   emit_insn (gen_subword_atomic_fetch_strong_nand (old, aligned_mem,
-                                                  shifted_value,
+                                                  shifted_value, model,
                                                   mask, not_mask));
 
   emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
@@ -180,22 +182,23 @@
        (unspec_volatile:SI
          [(not:SI (and:SI (match_dup 1)
                           (match_operand:SI 2 "register_operand" "rI"))) ;; 
value for op
-          (match_operand:SI 3 "register_operand" "rI")]                  ;; 
mask
+          (match_operand:SI 3 "const_int_operand")]                      ;; 
mask
         UNSPEC_SYNC_OLD_OP_SUBWORD))
-    (match_operand:SI 4 "register_operand" "rI")                         ;; 
not_mask
-    (clobber (match_scratch:SI 5 "=&r"))                                 ;; 
tmp_1
-    (clobber (match_scratch:SI 6 "=&r"))]                                ;; 
tmp_2
+    (match_operand:SI 4 "register_operand" "rI")                         ;; 
mask
+    (match_operand:SI 5 "register_operand" "rI")                         ;; 
not_mask
+    (clobber (match_scratch:SI 6 "=&r"))                                 ;; 
tmp_1
+    (clobber (match_scratch:SI 7 "=&r"))]                                ;; 
tmp_2
   "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
   {
     return "1:\;"
-          "lr.w.aqrl\t%0, %1\;"
-          "and\t%5, %0, %2\;"
-          "not\t%5, %5\;"
-          "and\t%5, %5, %3\;"
-          "and\t%6, %0, %4\;"
-          "or\t%6, %6, %5\;"
-          "sc.w.rl\t%5, %6, %1\;"
-          "bnez\t%5, 1b";
+          "lr.w%I3\t%0, %1\;"
+          "and\t%6, %0, %2\;"
+          "not\t%6, %6\;"
+          "and\t%6, %6, %4\;"
+          "and\t%7, %0, %5\;"
+          "or\t%7, %7, %6\;"
+          "sc.w%J3\t%6, %7, %1\;"
+          "bnez\t%6, 1b";
   }
   [(set (attr "length") (const_int 32))])
 
@@ -216,6 +219,7 @@
   rtx old = gen_reg_rtx (SImode);
   rtx mem = operands[1];
   rtx value = operands[2];
+  rtx model = operands[3];
   rtx aligned_mem = gen_reg_rtx (SImode);
   rtx shift = gen_reg_rtx (SImode);
   rtx mask = gen_reg_rtx (SImode);
@@ -228,7 +232,8 @@
 
   emit_insn (gen_subword_atomic_fetch_strong_<atomic_optab> (old, aligned_mem,
                                                             shifted_value,
-                                                            mask, not_mask));
+                                                            model, mask,
+                                                            not_mask));
 
   emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
                                         gen_lowpart (QImode, shift)));
@@ -261,6 +266,7 @@
   rtx old = gen_reg_rtx (SImode);
   rtx mem = operands[1];
   rtx value = operands[2];
+  rtx model = operands[3];
   rtx aligned_mem = gen_reg_rtx (SImode);
   rtx shift = gen_reg_rtx (SImode);
   rtx mask = gen_reg_rtx (SImode);
@@ -272,7 +278,8 @@
   riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value);
 
   emit_insn (gen_subword_atomic_exchange_strong (old, aligned_mem,
-                                                shifted_value, not_mask));
+                                                shifted_value, model,
+                                                not_mask));
 
   emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
                                         gen_lowpart (QImode, shift)));
@@ -286,18 +293,19 @@
        (match_operand:SI 1 "memory_operand" "+A"))      ;; mem location
    (set (match_dup 1)
        (unspec_volatile:SI
-         [(match_operand:SI 2 "reg_or_0_operand" "rI")  ;; value
-          (match_operand:SI 3 "reg_or_0_operand" "rI")] ;; not_mask
+         [(match_operand:SI 2 "reg_or_0_operand" "rI")  ;; value
+          (match_operand:SI 3 "const_int_operand")]     ;; model
       UNSPEC_SYNC_EXCHANGE_SUBWORD))
-    (clobber (match_scratch:SI 4 "=&r"))]               ;; tmp_1
+    (match_operand:SI 4 "reg_or_0_operand" "rI")        ;; not_mask
+    (clobber (match_scratch:SI 5 "=&r"))]               ;; tmp_1
   "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
   {
     return "1:\;"
-          "lr.w.aqrl\t%0, %1\;"
-          "and\t%4, %0, %3\;"
-          "or\t%4, %4, %2\;"
-          "sc.w.rl\t%4, %4, %1\;"
-          "bnez\t%4, 1b";
+          "lr.w%I3\t%0, %1\;"
+          "and\t%5, %0, %4\;"
+          "or\t%5, %5, %2\;"
+          "sc.w%J3\t%5, %5, %1\;"
+          "bnez\t%5, 1b";
   }
   [(set (attr "length") (const_int 20))])
 
@@ -313,10 +321,15 @@
    (clobber (match_scratch:GPR 6 "=&r"))]
   "TARGET_ATOMIC"
   {
+    enum memmodel model_success = (enum memmodel) INTVAL (operands[4]);
+    enum memmodel model_failure = (enum memmodel) INTVAL (operands[5]);
+    /* Find the union of the two memory models so we can satisfy both success
+       and failure memory models.  */
+    operands[5] = GEN_INT (riscv_union_memmodels (model_success, 
model_failure));
     return "1:\;"
-          "lr.<amo>.aqrl\t%0,%1\;"
+          "lr.<amo>%I5\t%0,%1\;"
           "bne\t%0,%z2,1f\;"
-          "sc.<amo>.rl\t%6,%z3,%1\;"
+          "sc.<amo>%J5\t%6,%z3,%1\;"
           "bnez\t%6,1b\;"
           "1:";
   }
@@ -440,9 +453,15 @@
   emit_move_insn (shifted_o, gen_rtx_AND (SImode, shifted_o, mask));
   emit_move_insn (shifted_n, gen_rtx_AND (SImode, shifted_n, mask));
 
+  enum memmodel model_success = (enum memmodel) INTVAL (operands[4]);
+  enum memmodel model_failure = (enum memmodel) INTVAL (operands[5]);
+  /* Find the union of the two memory models so we can satisfy both success
+     and failure memory models.  */
+  rtx model = GEN_INT (riscv_union_memmodels (model_success, model_failure));
+
   emit_insn (gen_subword_atomic_cas_strong (old, aligned_mem,
                                            shifted_o, shifted_n,
-                                           mask, not_mask));
+                                           model, mask, not_mask));
 
   emit_move_insn (old, gen_rtx_ASHIFTRT (SImode, old,
                                         gen_lowpart (QImode, shift)));
@@ -459,19 +478,20 @@
        (unspec_volatile:SI [(match_operand:SI 2 "reg_or_0_operand" "rJ")  ;; 
expected value
                             (match_operand:SI 3 "reg_or_0_operand" "rJ")] ;; 
desired value
         UNSPEC_COMPARE_AND_SWAP_SUBWORD))
-       (match_operand:SI 4 "register_operand" "rI")                       ;; 
mask
-       (match_operand:SI 5 "register_operand" "rI")                       ;; 
not_mask
-       (clobber (match_scratch:SI 6 "=&r"))]                              ;; 
tmp_1
+       (match_operand:SI 4 "const_int_operand")                           ;; 
model
+       (match_operand:SI 5 "register_operand" "rI")                       ;; 
mask
+       (match_operand:SI 6 "register_operand" "rI")                       ;; 
not_mask
+       (clobber (match_scratch:SI 7 "=&r"))]                              ;; 
tmp_1
   "TARGET_ATOMIC && TARGET_INLINE_SUBWORD_ATOMIC"
   {
     return "1:\;"
-          "lr.w.aqrl\t%0, %1\;"
-          "and\t%6, %0, %4\;"
-          "bne\t%6, %z2, 1f\;"
-          "and\t%6, %0, %5\;"
-          "or\t%6, %6, %3\;"
-          "sc.w.rl\t%6, %6, %1\;"
-          "bnez\t%6, 1b\;"
+          "lr.w%I4\t%0, %1\;"
+          "and\t%7, %0, %5\;"
+          "bne\t%7, %z2, 1f\;"
+          "and\t%7, %0, %6\;"
+          "or\t%7, %7, %3\;"
+          "sc.w%J4\t%7, %7, %1\;"
+          "bnez\t%7, 1b\;"
           "1:";
   }
   [(set (attr "length") (const_int 28))])
-- 
2.34.1

Reply via email to