On Wed, Sep 13, 2023 at 3:55 PM Wilco Dijkstra via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
>
> __sync_val_compare_and_swap may be used on 128-bit types and either calls the
> outline atomic code or uses an inline loop.  On AArch64 LDXP is only atomic if
> the value is stored successfully using STXP, but the current implementations
> do not perform the store if the comparison fails.  In this case the value 
> returned
> is not read atomically.

IIRC, the previous discussions in this space revolved around the
difficulty with the store writing to readonly memory which is why I
think we went with LDXP in this form.
Has something changed from then ?

Reviewed-by : Ramana Radhakrishnan  <ram...@gcc.gnu.org>

regards
Ramana




>
> Passes regress/bootstrap, OK for commit?
>
> gcc/ChangeLog/
>         PR target/111404
>         * config/aarch64/aarch64.cc (aarch64_split_compare_and_swap):
>         For 128-bit store the loaded value and loop if needed.
>
> libgcc/ChangeLog/
>         PR target/111404
>         * config/aarch64/lse.S (__aarch64_cas16_acq_rel): Execute STLXP using
>         either new value or loaded value.
>
> ---
>
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 
> 5e8d0a0c91bc7719de2a8c5627b354cf905a4db0..c44c0b979d0cc3755c61dcf566cfddedccebf1ea
>  100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -23413,11 +23413,11 @@ aarch64_split_compare_and_swap (rtx operands[])
>    mem = operands[1];
>    oldval = operands[2];
>    newval = operands[3];
> -  is_weak = (operands[4] != const0_rtx);
>    model_rtx = operands[5];
>    scratch = operands[7];
>    mode = GET_MODE (mem);
>    model = memmodel_from_int (INTVAL (model_rtx));
> +  is_weak = operands[4] != const0_rtx && mode != TImode;
>
>    /* When OLDVAL is zero and we want the strong version we can emit a tighter
>      loop:
> @@ -23478,6 +23478,33 @@ aarch64_split_compare_and_swap (rtx operands[])
>    else
>      aarch64_gen_compare_reg (NE, scratch, const0_rtx);
>
> +  /* 128-bit LDAXP is not atomic unless STLXP succeeds.  So for a mismatch,
> +     store the returned value and loop if the STLXP fails.  */
> +  if (mode == TImode)
> +    {
> +      rtx_code_label *label3 = gen_label_rtx ();
> +      emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (Pmode, 
> label3)));
> +      emit_barrier ();
> +
> +      emit_label (label2);
> +      aarch64_emit_store_exclusive (mode, scratch, mem, rval, model_rtx);
> +
> +      if (aarch64_track_speculation)
> +       {
> +         /* Emit an explicit compare instruction, so that we can correctly
> +            track the condition codes.  */
> +         rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
> +         x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
> +       }
> +      else
> +       x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
> +      x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> +                               gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
> +      aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
> +
> +      label2 = label3;
> +    }
> +
>    emit_label (label2);
>
>    /* If we used a CBNZ in the exchange loop emit an explicit compare with 
> RVAL
> diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
> index 
> dde3a28e07b13669533dfc5e8fac0a9a6ac33dbd..ba05047ff02b6fc5752235bffa924fc4a2f48c04
>  100644
> --- a/libgcc/config/aarch64/lse.S
> +++ b/libgcc/config/aarch64/lse.S
> @@ -160,6 +160,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
> If not, see
>  #define tmp0   16
>  #define tmp1   17
>  #define tmp2   15
> +#define tmp3   14
> +#define tmp4   13
>
>  #define BTI_C  hint    34
>
> @@ -233,10 +235,11 @@ STARTFN   NAME(cas)
>  0:     LDXP            x0, x1, [x4]
>         cmp             x0, x(tmp0)
>         ccmp            x1, x(tmp1), #0, eq
> -       bne             1f
> -       STXP            w(tmp2), x2, x3, [x4]
> -       cbnz            w(tmp2), 0b
> -1:     BARRIER
> +       csel            x(tmp2), x2, x0, eq
> +       csel            x(tmp3), x3, x1, eq
> +       STXP            w(tmp4), x(tmp2), x(tmp3), [x4]
> +       cbnz            w(tmp4), 0b
> +       BARRIER
>         ret
>
>  #endif
>

Reply via email to