A couple of small tweaks to PowerPc atomic operations. The first omits the "cmp; bc; isync" barrier on atomic_load with mem model __ATOMIC_CONSUME. PowerPC pointer loads don't need a barrier. Ref http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html As best I can see, mem_thread_fence should not be changed similarly, since __ATOMIC_CONSUME doesn't really make sense on a fence. So a fence with __ATOMIC_CONSUME ought to behave as __ATOMIC_ACQUIRE.
The second tweak forces the address used by load_locked and store_conditional to a reg when the address is not legitimate for those instructions, saving reload some work, reducing register pressure and sometimes code size. Not a big deal, just something I noticed a while ago when looking at libgomp. eg. (-original, +patched) @@ -1533,13 +1533,13 @@ 4844: 3f de 00 02 addis r30,r30,2 4848: 3b de 2e 74 addi r30,r30,11892 484c: 80 7e 80 00 lwz r3,-32768(r30) - 4850: 7c 69 1b 78 mr r9,r3 - 4854: 39 09 00 04 addi r8,r9,4 - 4858: 7c 80 40 28 lwarx r4,0,r8 + 4850: 38 63 00 04 addi r3,r3,4 + 4854: 7c 69 1b 78 mr r9,r3 + 4858: 7c 80 48 28 lwarx r4,0,r9 485c: 2c 04 00 00 cmpwi r4,0 4860: 40 82 00 10 bne- 4870 <GOMP_atomic_start+0x50> - 4864: 7d 40 41 2d stwcx. r10,0,r8 - 4868: 40 a2 ff ec bne- 4854 <GOMP_atomic_start+0x34> + 4864: 7d 40 49 2d stwcx. r10,0,r9 + 4868: 40 a2 ff f0 bne- 4858 <GOMP_atomic_start+0x38> 486c: 4c 00 01 2c isync 4870: 90 81 00 08 stw r4,8(r1) 4874: 40 82 00 18 bne- 488c <GOMP_atomic_start+0x6c> @@ -1548,9 +1548,9 @@ 4880: 38 21 00 20 addi r1,r1,32 4884: 7c 08 03 a6 mtlr r0 4888: 4e 80 00 20 blr - 488c: 38 63 00 04 addi r3,r3,4 - 4890: 48 00 79 c1 bl c250 <gomp_mutex_lock_slow> - 4894: 4b ff ff e4 b 4878 <GOMP_atomic_start+0x58> + 488c: 48 00 79 c5 bl c250 <gomp_mutex_lock_slow> + 4890: 4b ff ff e8 b 4878 <GOMP_atomic_start+0x58> + 4894: 60 00 00 00 nop 4898: 60 00 00 00 nop 489c: 60 00 00 00 nop Bootstrapped and regression tested powerpc64-linux. OK for mainline? * config/rs6000/sync.md (atomic_load): Don't emit synchronisation barrier for MEMMODEL_CONSUME. * config/rs6000/rs6000.c (rs6000_pre_atomic_barrier): Pass in and return mem. Convert to indirect addressing if not indirect or indexed. Adjust all callers. Index: gcc/config/rs6000/sync.md =================================================================== --- gcc/config/rs6000/sync.md (revision 188723) +++ gcc/config/rs6000/sync.md (working copy) @@ -126,8 +126,8 @@ switch (model) { case MEMMODEL_RELAXED: + case MEMMODEL_CONSUME: break; - case MEMMODEL_CONSUME: case MEMMODEL_ACQUIRE: case MEMMODEL_SEQ_CST: emit_insn (gen_loadsync (operands[0])); Index: gcc/config/rs6000/rs6000.c =================================================================== --- gcc/config/rs6000/rs6000.c (revision 188723) +++ gcc/config/rs6000/rs6000.c (working copy) @@ -16527,9 +16572,19 @@ emit_store_conditional (enum machine_mode mode, rt /* Expand barriers before and after a load_locked/store_cond sequence. */ -static void -rs6000_pre_atomic_barrier (enum memmodel model) +static rtx +rs6000_pre_atomic_barrier (rtx mem, enum memmodel model) { + rtx addr = XEXP (mem, 0); + int strict_p = (reload_in_progress || reload_completed); + + if (!legitimate_indirect_address_p (addr, strict_p) + && !legitimate_indexed_address_p (addr, strict_p)) + { + addr = force_reg (Pmode, addr); + mem = replace_equiv_address_nv (mem, addr); + } + switch (model) { case MEMMODEL_RELAXED: @@ -16546,6 +16601,7 @@ emit_store_conditional (enum machine_mode mode, rt default: gcc_unreachable (); } + return mem; } static void @@ -16684,7 +16740,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operand else if (reg_overlap_mentioned_p (retval, oldval)) oldval = copy_to_reg (oldval); - rs6000_pre_atomic_barrier (mod_s); + mem = rs6000_pre_atomic_barrier (mem, mod_s); label1 = NULL_RTX; if (!is_weak) @@ -16769,7 +16825,7 @@ rs6000_expand_atomic_exchange (rtx operands[]) mode = SImode; } - rs6000_pre_atomic_barrier (model); + mem = rs6000_pre_atomic_barrier (mem, model); label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); emit_label (XEXP (label, 0)); @@ -16853,7 +16909,7 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx m mode = SImode; } - rs6000_pre_atomic_barrier (model); + mem = rs6000_pre_atomic_barrier (mem, model); label = gen_label_rtx (); emit_label (label); -- Alan Modra Australia Development Lab, IBM