Ping.

(to be clear, patch 2/2 is my previous followup in this thread, I forgot to
adjust the subject line; it should have said:
"[PATCH 2/2] x86: add compiler memory barriers when expanding atomic_load").
On Wed, 10 May 2017, Alexander Monakov wrote:

> Hi,
> 
> When expanding __atomic_thread_fence(x) to RTL, the i386 backend doesn't emit
> any instruction except for x==__ATOMIC_SEQ_CST (which emits 'mfence').  This 
> is incorrect: although no machine barrier is needed, the compiler still must
> emit a compiler barrier into the IR to prevent propagation and code motion
> across the fence.  The testcase added with the patch shows how it can lead
> to a miscompilation.
> 
> The proposed patch fixes it by handling non-seq-cst fences exactly like
> __atomic_signal_fence is expanded, by emitting asm volatile("":::"memory").
> 
> The s390 backend uses the a similar mem_thread_fence expansion, so the patch
> fixes both backends in the same manner.
> 
> Bootstrapped and regtested on x86_64; also checked that s390-linux cc1
> successfully builds after the change.  OK for trunk?
> 
> (the original source code in the PR was misusing atomic fences by doing
> something like
> 
>   void f(int *p)
>   {
>     while (*p)
>       __atomic_thread_fence(__ATOMIC_ACQUIRE);
>   }
> 
> but since *p is not atomic, a concurrent write to *p would cause a data race 
> and
> thus invoke undefined behavior; also, if *p is false prior to entering the 
> loop,
> execution does not encounter the fence; new test here has code usable without 
> UB)
> 
> Alexander
> 
>       * config/i386/sync.md (mem_thread_fence): Emit a compiler barrier for
>       non-seq-cst fences.  Adjust comment.
>       * config/s390/s390.md (mem_thread_fence): Likewise.
>       * optabs.c (expand_asm_memory_barrier): Export.
>       * optabs.h (expand_asm_memory_barrier): Declare.
> testsuite/
>       * gcc.target/i386/pr80640-1.c: New testcase.
> ---
>  gcc/config/i386/sync.md                   |  7 ++++++-
>  gcc/config/s390/s390.md                   | 11 +++++++++--
>  gcc/optabs.c                              |  2 +-
>  gcc/optabs.h                              |  3 +++
>  gcc/testsuite/gcc.target/i386/pr80640-1.c | 12 ++++++++++++
>  5 files changed, 31 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr80640-1.c
> 
> diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
> index 20d46fe..619d53b 100644
> --- a/gcc/config/i386/sync.md
> +++ b/gcc/config/i386/sync.md
> @@ -108,7 +108,7 @@ (define_expand "mem_thread_fence"
>    enum memmodel model = memmodel_from_int (INTVAL (operands[0]));
>  
>    /* Unless this is a SEQ_CST fence, the i386 memory model is strong
> -     enough not to require barriers of any kind.  */
> +     enough not to require a processor barrier of any kind.  */
>    if (is_mm_seq_cst (model))
>      {
>        rtx (*mfence_insn)(rtx);
> @@ -124,6 +124,11 @@ (define_expand "mem_thread_fence"
>  
>        emit_insn (mfence_insn (mem));
>      }
> +  else if (!is_mm_relaxed (model))
> +    {
> +      /* However, a compiler barrier is still required.  */
> +      expand_asm_memory_barrier ();
> +    }
>    DONE;
>  })
>  
> diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
> index c9fd19a..65e54c4 100644
> --- a/gcc/config/s390/s390.md
> +++ b/gcc/config/s390/s390.md
> @@ -10109,14 +10109,21 @@ (define_expand "mem_thread_fence"
>    [(match_operand:SI 0 "const_int_operand")]         ;; model
>    ""
>  {
> +  enum memmodel model = memmodel_from_int (INTVAL (operands[0]));
> +
>    /* Unless this is a SEQ_CST fence, the s390 memory model is strong
> -     enough not to require barriers of any kind.  */
> -  if (is_mm_seq_cst (memmodel_from_int (INTVAL (operands[0]))))
> +     enough not to require a processor barrier of any kind.  */
> +  if (is_mm_seq_cst (model))
>      {
>        rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
>        MEM_VOLATILE_P (mem) = 1;
>        emit_insn (gen_mem_thread_fence_1 (mem));
>      }
> +  else if (!is_mm_relaxed (model))
> +    {
> +      /* However, a compiler barrier is still required.  */
> +      expand_asm_memory_barrier ();
> +    }
>    DONE;
>  })
>  
> diff --git a/gcc/optabs.c b/gcc/optabs.c
> index 48e37f8..1f1fbc3 100644
> --- a/gcc/optabs.c
> +++ b/gcc/optabs.c
> @@ -6269,7 +6269,7 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx 
> *ptarget_oval,
>  
>  /* Generate asm volatile("" : : : "memory") as the memory barrier.  */
>  
> -static void
> +void
>  expand_asm_memory_barrier (void)
>  {
>    rtx asm_op, clob;
> diff --git a/gcc/optabs.h b/gcc/optabs.h
> index b2dd31a..aca6755 100644
> --- a/gcc/optabs.h
> +++ b/gcc/optabs.h
> @@ -322,6 +322,9 @@ extern bool expand_atomic_compare_and_swap (rtx *, rtx *, 
> rtx, rtx, rtx, bool,
>  extern void expand_mem_thread_fence (enum memmodel);
>  extern void expand_mem_signal_fence (enum memmodel);
>  
> +/* Generate a compile-time memory barrier.  */
> +extern void expand_asm_memory_barrier (void);
> +
>  rtx expand_atomic_load (rtx, rtx, enum memmodel);
>  rtx expand_atomic_store (rtx, rtx, enum memmodel, bool);
>  rtx expand_atomic_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel, 
> diff --git a/gcc/testsuite/gcc.target/i386/pr80640-1.c 
> b/gcc/testsuite/gcc.target/i386/pr80640-1.c
> new file mode 100644
> index 0000000..f1d1e55
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr80640-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fno-tree-ter" } */
> +/* { dg-final { scan-assembler-not "xorl\[\\t \]*\\\%eax,\[\\t \]*%eax" } } 
> */
> +
> +int f(int *p, volatile _Bool *p1, volatile _Bool *p2)
> +{
> +  int v = *p;
> +  *p1 = !v;
> +  while (*p2);
> +  __atomic_thread_fence (__ATOMIC_ACQUIRE);
> +  return v - *p;
> +}
> 

Reply via email to