As an alternative to changing the JIT to produce SWPAL/CASAL, because I
wasn't sure if *all* aarch64 targets support these, I tried adding DMB
ISH or DMB SY to the end of the generated code sequences. Surprisingly,
this did not fix the issue! So there's perhaps something fishy about the
LDAXR-STLXR sequences themselves?
So for now I'll stick on my own machine with SWPAL/CASAL, since this
does seem to work well enough to let both my own code and fibers run.
Tony
diff --git a/libguile/lightening/lightening/aarch64-cpu.c
b/libguile/lightening/lightening/aarch64-cpu.c
index 13aa351e9..bff583e33 100644
--- a/libguile/lightening/lightening/aarch64-cpu.c
+++ b/libguile/lightening/lightening/aarch64-cpu.c
@@ -225,6 +225,7 @@ oxxrs(jit_state_t *_jit, int32_t Op,
#define A64_STLR 0xc89ffc00
#define A64_LDAXR 0xc85ffc00
#define A64_STLXR 0xc800fc00
+#define A64_DMB 0xd50330bf
#define A64_STRBI 0x39000000
#define A64_LDRBI 0x39400000
#define A64_LDRSBI 0x39800000
@@ -675,6 +676,31 @@ STLXR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t
Rm)
return oxxx(_jit, A64_STLXR, Rt, Rn, Rm);
}
+static void
+DMB(jit_state_t *_jit, int32_t CRm)
+{
+ uint32_t inst = A64_DMB;
+ inst = write_unsigned_bitfield(inst, CRm, 4, 8);
+ emit_u32_with_pool(_jit, inst);
+}
+
+static void
+DMB_ISH(jit_state_t *_jit)
+{
+ DMB(_jit, 11);
+ // ^ 11 = ISH, "Inner Shareable". This is what Java apparently uses
+ // See
+ // - https://gist.github.com/RaasAhsan/8e3554a41e07068536425ca0de46c9e8
+ // - https://mail.openjdk.org/pipermail/hotspot-dev/2021-March/049694.html
+ // - https://bugs.openjdk.org/browse/JDK-8262519
+}
+
+static void
+DMB_SY(jit_state_t *_jit)
+{
+ DMB(_jit, 15);
+}
+
static void
LDRSB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm)
{
@@ -2541,6 +2567,7 @@ swap_atomic(jit_state_t *_jit, int32_t dst, int32_t loc,
int32_t val)
jit_patch_there(_jit, bnei(_jit, result, 0), retry);
if (dst == val) unget_temp_gpr(_jit);
unget_temp_gpr(_jit);
+ DMB_SY(_jit);
}
static void
@@ -2562,6 +2589,7 @@ cas_atomic(jit_state_t *_jit, int32_t dst, int32_t loc,
int32_t expected,
jit_patch_here(_jit, bad);
movr(_jit, dst, dst_or_tmp);
unget_temp_gpr(_jit);
+ DMB_SY(_jit);
}
static void