Revised patch, incorporates fixes from Alexander's review comments. I removed some implementation details from Alexander's description of memory_blockage named pattern.
2017-09-05 Uros Bizjak <ubiz...@gmail.com> * target-insns.def: Add memory_blockage. * optabs.c (expand_memory_blockage): New function. (expand_asm_memory_barrier): Rename ... (expand_asm_memory_blockage): ... to this. (expand_mem_thread_fence): Call expand_memory_blockage instead of expand_asm_memory_barrier. (expand_mem_singnal_fence): Ditto. (expand_atomic_load): Ditto. (expand_atomic_store): Ditto. * doc/md.texi (Standard Pattern Names For Generation): Document memory_blockage instruction pattern. Bootstrapped and regression tested together with a followup x86 patch on x86_64-linux-gnu {,-m32}. OK for mainline? Uros.
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index 14aab9474bc2..c4c113850fe1 100644 --- a/gcc/doc/md.texi +++ b/gcc/doc/md.texi @@ -6734,6 +6734,15 @@ scheduler and other passes from moving instructions and using register equivalences across the boundary defined by the blockage insn. This needs to be an UNSPEC_VOLATILE pattern or a volatile ASM. +@cindex @code{memory_blockage} instruction pattern +@item @samp{memory_blockage} +This pattern, if defined, represents a compiler memory barrier, and will be +placed at points across which RTL passes may not propagate memory accesses. +This instruction needs to read and write volatile BLKmode memory. It does +not need to generate any machine instruction. If this pattern is not defined, +the compiler falls back to emitting an instruction corresponding +to @code{asm volatile ("" ::: "memory")}. + @cindex @code{memory_barrier} instruction pattern @item @samp{memory_barrier} If the target memory model is not fully synchronous, then this pattern diff --git a/gcc/optabs.c b/gcc/optabs.c index b65707080eee..94060036e61f 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -6276,10 +6276,10 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval, return true; } -/* Generate asm volatile("" : : : "memory") as the memory barrier. */ +/* Generate asm volatile("" : : : "memory") as the memory blockage. */ static void -expand_asm_memory_barrier (void) +expand_asm_memory_blockage (void) { rtx asm_op, clob; @@ -6295,6 +6295,17 @@ expand_asm_memory_barrier (void) emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, asm_op, clob))); } +/* Do not propagate memory accesses across this point. */ + +static void +expand_memory_blockage (void) +{ + if (targetm.have_memory_blockage) + emit_insn (gen_memory_blockage ()); + else + expand_asm_memory_blockage (); +} + /* This routine will either emit the mem_thread_fence pattern or issue a sync_synchronize to generate a fence for memory model MEMMODEL. */ @@ -6306,14 +6317,14 @@ expand_mem_thread_fence (enum memmodel model) if (targetm.have_mem_thread_fence ()) { emit_insn (targetm.gen_mem_thread_fence (GEN_INT (model))); - expand_asm_memory_barrier (); + expand_memory_blockage (); } else if (targetm.have_memory_barrier ()) emit_insn (targetm.gen_memory_barrier ()); else if (synchronize_libfunc != NULL_RTX) emit_library_call (synchronize_libfunc, LCT_NORMAL, VOIDmode); else - expand_asm_memory_barrier (); + expand_memory_blockage (); } /* Emit a signal fence with given memory model. */ @@ -6324,7 +6335,7 @@ expand_mem_signal_fence (enum memmodel model) /* No machine barrier is required to implement a signal fence, but a compiler memory barrier must be issued, except for relaxed MM. */ if (!is_mm_relaxed (model)) - expand_asm_memory_barrier (); + expand_memory_blockage (); } /* This function expands the atomic load operation: @@ -6346,7 +6357,7 @@ expand_atomic_load (rtx target, rtx mem, enum memmodel model) struct expand_operand ops[3]; rtx_insn *last = get_last_insn (); if (is_mm_seq_cst (model)) - expand_asm_memory_barrier (); + expand_memory_blockage (); create_output_operand (&ops[0], target, mode); create_fixed_operand (&ops[1], mem); @@ -6354,7 +6365,7 @@ expand_atomic_load (rtx target, rtx mem, enum memmodel model) if (maybe_expand_insn (icode, 3, ops)) { if (!is_mm_relaxed (model)) - expand_asm_memory_barrier (); + expand_memory_blockage (); return ops[0].value; } delete_insns_since (last); @@ -6404,14 +6415,14 @@ expand_atomic_store (rtx mem, rtx val, enum memmodel model, bool use_release) { rtx_insn *last = get_last_insn (); if (!is_mm_relaxed (model)) - expand_asm_memory_barrier (); + expand_memory_blockage (); create_fixed_operand (&ops[0], mem); create_input_operand (&ops[1], val, mode); create_integer_operand (&ops[2], model); if (maybe_expand_insn (icode, 3, ops)) { if (is_mm_seq_cst (model)) - expand_asm_memory_barrier (); + expand_memory_blockage (); return const0_rtx; } delete_insns_since (last); diff --git a/gcc/target-insns.def b/gcc/target-insns.def index 4669439c7e1d..75976b2f8d99 100644 --- a/gcc/target-insns.def +++ b/gcc/target-insns.def @@ -60,6 +60,7 @@ DEF_TARGET_INSN (jump, (rtx x0)) DEF_TARGET_INSN (load_multiple, (rtx x0, rtx x1, rtx x2)) DEF_TARGET_INSN (mem_thread_fence, (rtx x0)) DEF_TARGET_INSN (memory_barrier, (void)) +DEF_TARGET_INSN (memory_blockage, (void)) DEF_TARGET_INSN (movstr, (rtx x0, rtx x1, rtx x2)) DEF_TARGET_INSN (nonlocal_goto, (rtx x0, rtx x1, rtx x2, rtx x3)) DEF_TARGET_INSN (nonlocal_goto_receiver, (void))