A couple of small tweaks to PowerPc atomic operations.  The first
omits the "cmp; bc; isync" barrier on atomic_load with mem model
__ATOMIC_CONSUME.  PowerPC pointer loads don't need a barrier.  Ref
http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
As best I can see, mem_thread_fence should not be changed similarly,
since __ATOMIC_CONSUME doesn't really make sense on a fence.  So a
fence with __ATOMIC_CONSUME ought to behave as __ATOMIC_ACQUIRE.

The second tweak forces the address used by load_locked and
store_conditional to a reg when the address is not legitimate for
those instructions, saving reload some work, reducing register
pressure and sometimes code size.  Not a big deal, just something I
noticed a while ago when looking at libgomp.  eg. (-original, +patched)

@@ -1533,13 +1533,13 @@
     4844:      3f de 00 02     addis   r30,r30,2
     4848:      3b de 2e 74     addi    r30,r30,11892
     484c:      80 7e 80 00     lwz     r3,-32768(r30)
-    4850:      7c 69 1b 78     mr      r9,r3
-    4854:      39 09 00 04     addi    r8,r9,4
-    4858:      7c 80 40 28     lwarx   r4,0,r8
+    4850:      38 63 00 04     addi    r3,r3,4
+    4854:      7c 69 1b 78     mr      r9,r3
+    4858:      7c 80 48 28     lwarx   r4,0,r9
     485c:      2c 04 00 00     cmpwi   r4,0
     4860:      40 82 00 10     bne-    4870 <GOMP_atomic_start+0x50>
-    4864:      7d 40 41 2d     stwcx.  r10,0,r8
-    4868:      40 a2 ff ec     bne-    4854 <GOMP_atomic_start+0x34>
+    4864:      7d 40 49 2d     stwcx.  r10,0,r9
+    4868:      40 a2 ff f0     bne-    4858 <GOMP_atomic_start+0x38>
     486c:      4c 00 01 2c     isync
     4870:      90 81 00 08     stw     r4,8(r1)
     4874:      40 82 00 18     bne-    488c <GOMP_atomic_start+0x6c>
@@ -1548,9 +1548,9 @@
     4880:      38 21 00 20     addi    r1,r1,32
     4884:      7c 08 03 a6     mtlr    r0
     4888:      4e 80 00 20     blr
-    488c:      38 63 00 04     addi    r3,r3,4
-    4890:      48 00 79 c1     bl      c250 <gomp_mutex_lock_slow>
-    4894:      4b ff ff e4     b       4878 <GOMP_atomic_start+0x58>
+    488c:      48 00 79 c5     bl      c250 <gomp_mutex_lock_slow>
+    4890:      4b ff ff e8     b       4878 <GOMP_atomic_start+0x58>
+    4894:      60 00 00 00     nop
     4898:      60 00 00 00     nop
     489c:      60 00 00 00     nop

Bootstrapped and regression tested powerpc64-linux.  OK for mainline?

        * config/rs6000/sync.md (atomic_load): Don't emit synchronisation
        barrier for MEMMODEL_CONSUME.
        * config/rs6000/rs6000.c (rs6000_pre_atomic_barrier): Pass in and
        return mem.  Convert to indirect addressing if not indirect or
        indexed.  Adjust all callers.

Index: gcc/config/rs6000/sync.md
===================================================================
--- gcc/config/rs6000/sync.md   (revision 188723)
+++ gcc/config/rs6000/sync.md   (working copy)
@@ -126,8 +126,8 @@
   switch (model)
     {
     case MEMMODEL_RELAXED:
+    case MEMMODEL_CONSUME:
       break;
-    case MEMMODEL_CONSUME:
     case MEMMODEL_ACQUIRE:
     case MEMMODEL_SEQ_CST:
       emit_insn (gen_loadsync (operands[0]));
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c  (revision 188723)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -16527,9 +16572,19 @@ emit_store_conditional (enum machine_mode mode, rt
 
 /* Expand barriers before and after a load_locked/store_cond sequence.  */
 
-static void
-rs6000_pre_atomic_barrier (enum memmodel model)
+static rtx
+rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
 {
+  rtx addr = XEXP (mem, 0);
+  int strict_p = (reload_in_progress || reload_completed);
+
+  if (!legitimate_indirect_address_p (addr, strict_p)
+      && !legitimate_indexed_address_p (addr, strict_p))
+    {
+      addr = force_reg (Pmode, addr);
+      mem = replace_equiv_address_nv (mem, addr);
+    }
+
   switch (model)
     {
     case MEMMODEL_RELAXED:
@@ -16546,6 +16601,7 @@ emit_store_conditional (enum machine_mode mode, rt
     default:
       gcc_unreachable ();
     }
+  return mem;
 }
 
 static void
@@ -16684,7 +16740,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operand
   else if (reg_overlap_mentioned_p (retval, oldval))
     oldval = copy_to_reg (oldval);
 
-  rs6000_pre_atomic_barrier (mod_s);
+  mem = rs6000_pre_atomic_barrier (mem, mod_s);
 
   label1 = NULL_RTX;
   if (!is_weak)
@@ -16769,7 +16825,7 @@ rs6000_expand_atomic_exchange (rtx operands[])
       mode = SImode;
     }
 
-  rs6000_pre_atomic_barrier (model);
+  mem = rs6000_pre_atomic_barrier (mem, model);
 
   label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
   emit_label (XEXP (label, 0));
@@ -16853,7 +16909,7 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx m
       mode = SImode;
     }
 
-  rs6000_pre_atomic_barrier (model);
+  mem = rs6000_pre_atomic_barrier (mem, model);
 
   label = gen_label_rtx ();
   emit_label (label);

-- 
Alan Modra
Australia Development Lab, IBM

Reply via email to