atomic: Add atomic16 primitives for xchg, fetch_and, fetch_or

Richard Henderson Fri, 15 Aug 2025 05:28:50 -0700

Signed-off-by: Richard Henderson <richard.hender...@linaro.org>
---
 host/include/aarch64/host/atomic128-cas.h.inc | 57 +++++++++++
 host/include/generic/host/atomic128-cas.h.inc | 96 +++++++++++++++++++
 2 files changed, 153 insertions(+)


diff --git a/host/include/aarch64/host/atomic128-cas.h.inc 
b/host/include/aarch64/host/atomic128-cas.h.inc
index 991da4ef54..aec27df182 100644
--- a/host/include/aarch64/host/atomic128-cas.h.inc
+++ b/host/include/aarch64/host/atomic128-cas.h.inc
@@ -38,6 +38,63 @@ static inline Int128 atomic16_cmpxchg(Int128 *ptr, Int128 
cmp, Int128 new)
     return int128_make128(oldl, oldh);
 }
 
+static inline Int128 atomic16_xchg(Int128 *ptr, Int128 new)
+{
+    uint64_t newl = int128_getlo(new), newh = int128_gethi(new);
+    uint64_t oldl, oldh;
+    uint32_t tmp;
+
+    asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
+        "stlxp %w[tmp], %[newl], %[newh], %[mem]\n\t"
+        "cbnz %w[tmp], 0b"
+        : [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
+          [oldl] "=&r"(oldl), [oldh] "=&r"(oldh)
+        : [newl] "r"(newl), [newh] "r"(newh)
+        : "memory");
+
+    return int128_make128(oldl, oldh);
+}
+
+static inline Int128 atomic16_fetch_and(Int128 *ptr, Int128 new)
+{
+    uint64_t newl = int128_getlo(new), newh = int128_gethi(new);
+    uint64_t oldl, oldh, tmpl, tmph;
+    uint32_t tmp;
+
+    asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
+        "and %[tmpl], %[oldl], %[newl]\n\t"
+        "and %[tmph], %[oldh], %[newh]\n\t"
+        "stlxp %w[tmp], %[tmpl], %[tmph], %[mem]\n\t"
+        "cbnz %w[tmp], 0b"
+        : [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
+          [oldl] "=&r"(oldl), [oldh] "=&r"(oldh)
+        : [newl] "r"(newl), [newh] "r"(newh),
+          [tmpl] "r"(tmpl), [tmph] "r"(tmph)
+        : "memory");
+
+    return int128_make128(oldl, oldh);
+}
+
+static inline Int128 atomic16_fetch_or(Int128 *ptr, Int128 new)
+{
+    uint64_t newl = int128_getlo(new), newh = int128_gethi(new);
+    uint64_t oldl, oldh, tmpl, tmph;
+    uint32_t tmp;
+
+    asm("0: ldaxp %[oldl], %[oldh], %[mem]\n\t"
+        "orr %[tmpl], %[oldl], %[newl]\n\t"
+        "orr %[tmph], %[oldh], %[newh]\n\t"
+        "stlxp %w[tmp], %[tmpl], %[tmph], %[mem]\n\t"
+        "cbnz %w[tmp], 0b"
+        : [mem] "+m"(*ptr), [tmp] "=&r"(tmp),
+          [oldl] "=&r"(oldl), [oldh] "=&r"(oldh)
+        : [newl] "r"(newl), [newh] "r"(newh),
+          [tmpl] "r"(tmpl), [tmph] "r"(tmph)
+        : "memory");
+
+    return int128_make128(oldl, oldh);
+}
+
 # define CONFIG_CMPXCHG128 1
 # define HAVE_CMPXCHG128 1
 #endif
diff --git a/host/include/generic/host/atomic128-cas.h.inc 
b/host/include/generic/host/atomic128-cas.h.inc
index 6b40cc2271..990162c56f 100644
--- a/host/include/generic/host/atomic128-cas.h.inc
+++ b/host/include/generic/host/atomic128-cas.h.inc
@@ -23,6 +23,51 @@ atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
     r.i = qatomic_cmpxchg__nocheck(ptr_align, c.i, n.i);
     return r.s;
 }
+
+/*
+ * Since we're looping anyway, use weak compare and swap.
+ * If the host supports weak, this will eliminate a second loop hidden
+ * within the atomic operation itself; otherwise the weak parameter is
+ * ignored.
+ */
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
+atomic16_xchg(Int128 *ptr, Int128 new)
+{
+    __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128 old = *ptr_align;
+
+    while (!__atomic_compare_exchange_n(ptr_align, &old, new, true,
+                                        __ATOMIC_SEQ_CST, 0)) {
+        continue;
+    }
+    return old;
+}
+
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
+atomic16_fetch_and(Int128 *ptr, Int128 val)
+{
+    __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128 old = *ptr_align;
+
+    while (!__atomic_compare_exchange_n(ptr_align, &old, old & val, true,
+                                        __ATOMIC_SEQ_CST, 0)) {
+        continue;
+    }
+    return old;
+}
+
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
+atomic16_fetch_or(Int128 *ptr, Int128 val)
+{
+    __int128_t *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128 old = *ptr_align;
+
+    while (!__atomic_compare_exchange_n(ptr_align, &old, old | val, true,
+                                        __ATOMIC_SEQ_CST, 0)) {
+        continue;
+    }
+    return old;
+}
 # define HAVE_CMPXCHG128 1
 #elif defined(CONFIG_CMPXCHG128)
 static inline Int128 ATTRIBUTE_ATOMIC128_OPT
@@ -36,6 +81,57 @@ atomic16_cmpxchg(Int128 *ptr, Int128 cmp, Int128 new)
     r.i = __sync_val_compare_and_swap_16(ptr_align, c.i, n.i);
     return r.s;
 }
+
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
+atomic16_xchg(Int128 *ptr, Int128 new)
+{
+    Int128Aligned *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128Alias o, n;
+
+    n.s = new;
+    o.s = *ptr_align;
+    while (1) {
+        __int128 c = __sync_val_compare_and_swap_16(ptr_align, o.i, n.i);
+        if (c == o.i) {
+            return o.s;
+        }
+        o.i = c;
+    }
+}
+
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
+atomic16_fetch_and(Int128 *ptr, Int128 val)
+{
+    Int128Aligned *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128Alias o, v;
+
+    v.s = val;
+    o.s = *ptr_align;
+    while (1) {
+        __int128 c = __sync_val_compare_and_swap_16(ptr_align, o.i, o.i & v.i);
+        if (c == o.i) {
+            return o.s;
+        }
+        o.i = c;
+    }
+}
+
+static inline Int128 ATTRIBUTE_ATOMIC128_OPT
+atomic16_fetch_or(Int128 *ptr, Int128 val)
+{
+    Int128Aligned *ptr_align = __builtin_assume_aligned(ptr, 16);
+    Int128Alias o, v;
+
+    v.s = val;
+    o.s = *ptr_align;
+    while (1) {
+        __int128 c = __sync_val_compare_and_swap_16(ptr_align, o.i, o.i | v.i);
+        if (c == o.i) {
+            return o.s;
+        }
+        o.i = c;
+    }
+}
 # define HAVE_CMPXCHG128 1
 #else
 /* Fallback definition that must be optimized away, or error.  */
-- 
2.43.0

[PATCH 2/7] qemu/atomic: Add atomic16 primitives for xchg, fetch_and, fetch_or

Reply via email to