As an alternative to changing the JIT to produce SWPAL/CASAL, because I wasn't sure if *all* aarch64 targets support these, I tried adding DMB ISH or DMB SY to the end of the generated code sequences. Surprisingly, this did not fix the issue! So there's perhaps something fishy about the LDAXR-STLXR sequences themselves?

So for now I'll stick on my own machine with SWPAL/CASAL, since this does seem to work well enough to let both my own code and fibers run.

Tony
diff --git a/libguile/lightening/lightening/aarch64-cpu.c 
b/libguile/lightening/lightening/aarch64-cpu.c
index 13aa351e9..bff583e33 100644
--- a/libguile/lightening/lightening/aarch64-cpu.c
+++ b/libguile/lightening/lightening/aarch64-cpu.c
@@ -225,6 +225,7 @@ oxxrs(jit_state_t *_jit, int32_t Op,
 #define A64_STLR                      0xc89ffc00
 #define A64_LDAXR                     0xc85ffc00
 #define A64_STLXR                     0xc800fc00
+#define A64_DMB                       0xd50330bf
 #define A64_STRBI                     0x39000000
 #define A64_LDRBI                     0x39400000
 #define A64_LDRSBI                    0x39800000
@@ -675,6 +676,31 @@ STLXR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t 
Rm)
   return oxxx(_jit, A64_STLXR, Rt, Rn, Rm);
 }
 
+static void
+DMB(jit_state_t *_jit, int32_t CRm)
+{
+  uint32_t inst = A64_DMB;
+  inst = write_unsigned_bitfield(inst, CRm, 4, 8);
+  emit_u32_with_pool(_jit, inst);
+}
+
+static void
+DMB_ISH(jit_state_t *_jit)
+{
+  DMB(_jit, 11);
+  // ^ 11 = ISH, "Inner Shareable". This is what Java apparently uses
+  // See
+  //  - https://gist.github.com/RaasAhsan/8e3554a41e07068536425ca0de46c9e8
+  //  - https://mail.openjdk.org/pipermail/hotspot-dev/2021-March/049694.html
+  //  - https://bugs.openjdk.org/browse/JDK-8262519
+}
+
+static void
+DMB_SY(jit_state_t *_jit)
+{
+  DMB(_jit, 15);
+}
+
 static void
 LDRSB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) 
 {
@@ -2541,6 +2567,7 @@ swap_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, 
int32_t val)
   jit_patch_there(_jit, bnei(_jit, result, 0), retry);
   if (dst == val) unget_temp_gpr(_jit);
   unget_temp_gpr(_jit);
+  DMB_SY(_jit);
 }
 
 static void
@@ -2562,6 +2589,7 @@ cas_atomic(jit_state_t *_jit, int32_t dst, int32_t loc, 
int32_t expected,
   jit_patch_here(_jit, bad);
   movr(_jit, dst, dst_or_tmp);
   unget_temp_gpr(_jit);
+  DMB_SY(_jit);
 }
 
 static void

Reply via email to