The Linux Kernel Memory Model [1][2] requires RMW operations that have a
return value to be fully ordered.

BPF atomic operations with BPF_FETCH (including BPF_XCHG and
BPF_CMPXCHG) return a value back so they need to be JITed to fully
ordered operations. POWERPC currently emits relaxed operations for
these.

We can show this by running the following litmus-test:

PPC SB+atomic_add+fetch

{
0:r0=x;  (* dst reg assuming offset is 0 *)
0:r1=2;  (* src reg *)
0:r2=1;
0:r4=y;  (* P0 writes to this, P1 reads this *)
0:r5=z;  (* P1 writes to this, P0 reads this *)
0:r6=0;

1:r2=1;
1:r4=y;
1:r5=z;
}

P0                      | P1            ;
stw         r2, 0(r4)   | stw  r2,0(r5) ;
                        |               ;
loop:lwarx  r3, r6, r0  |               ;
mr          r8, r3      |               ;
add         r3, r3, r1  | sync          ;
stwcx.      r3, r6, r0  |               ;
bne         loop        |               ;
mr          r1, r8      |               ;
                        |               ;
lwa         r7, 0(r5)   | lwa  r7,0(r4) ;

~exists(0:r7=0 /\ 1:r7=0)

Witnesses
Positive: 9 Negative: 3
Condition ~exists (0:r7=0 /\ 1:r7=0)
Observation SB+atomic_add+fetch Sometimes 3 9

This test shows that the older store in P0 is reordered with a newer
load to a different address. Although there is a RMW operation with
fetch between them. Adding a sync before and after RMW fixes the issue:

Witnesses
Positive: 9 Negative: 0
Condition ~exists (0:r7=0 /\ 1:r7=0)
Observation SB+atomic_add+fetch Never 0 9

[1] https://www.kernel.org/doc/Documentation/memory-barriers.txt
[2] https://www.kernel.org/doc/Documentation/atomic_t.txt

Fixes: 65112709115f ("powerpc/bpf/64: add support for BPF_ATOMIC bitwise 
operations")
Signed-off-by: Puranjay Mohan <puran...@kernel.org>
Acked-by: Paul E. McKenney <paul...@kernel.org>
---
Changes in v2 -> v3:
v2: https://lore.kernel.org/all/20240508115404.74823-1-puran...@kernel.org/
- Emit the sync outside the loop so it doesn't get executed everytime.
- Minor coding style changes.

Changes in v1 -> v2:
v1: https://lore.kernel.org/all/20240507175439.119467-1-puran...@kernel.org/
- Don't emit `sync` for non-SMP kernels as that adds unessential overhead.
---

arch/powerpc/net/bpf_jit_comp32.c | 12 ++++++++++++
 arch/powerpc/net/bpf_jit_comp64.c | 12 ++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit_comp32.c 
b/arch/powerpc/net/bpf_jit_comp32.c
index 2f39c50ca729..35f64dcfa68e 100644
--- a/arch/powerpc/net/bpf_jit_comp32.c
+++ b/arch/powerpc/net/bpf_jit_comp32.c
@@ -852,6 +852,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
u32 *fimage, struct code
 
                        /* Get offset into TMP_REG */
                        EMIT(PPC_RAW_LI(tmp_reg, off));
+                       /*
+                        * Enforce full ordering for operations with BPF_FETCH 
by emitting a 'sync'
+                        * before and after the operation.
+                        *
+                        * This is a requirement in the Linux Kernel Memory 
Model.
+                        * See __cmpxchg_u32() in asm/cmpxchg.h as an example.
+                        */
+                       if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP))
+                               EMIT(PPC_RAW_SYNC());
                        tmp_idx = ctx->idx * 4;
                        /* load value from memory into r0 */
                        EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0));
@@ -905,6 +914,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 
*fimage, struct code
 
                        /* For the BPF_FETCH variant, get old data into src_reg 
*/
                        if (imm & BPF_FETCH) {
+                               /* Emit 'sync' to enforce full ordering */
+                               if (IS_ENABLED(CONFIG_SMP))
+                                       EMIT(PPC_RAW_SYNC());
                                EMIT(PPC_RAW_MR(ret_reg, ax_reg));
                                if (!fp->aux->verifier_zext)
                                        EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* 
higher 32-bit */
diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index 79f23974a320..884eef1b3973 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -803,6 +803,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
u32 *fimage, struct code
 
                        /* Get offset into TMP_REG_1 */
                        EMIT(PPC_RAW_LI(tmp1_reg, off));
+                       /*
+                        * Enforce full ordering for operations with BPF_FETCH 
by emitting a 'sync'
+                        * before and after the operation.
+                        *
+                        * This is a requirement in the Linux Kernel Memory 
Model.
+                        * See __cmpxchg_u64() in asm/cmpxchg.h as an example.
+                        */
+                       if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP))
+                               EMIT(PPC_RAW_SYNC());
                        tmp_idx = ctx->idx * 4;
                        /* load value from memory into TMP_REG_2 */
                        if (size == BPF_DW)
@@ -865,6 +874,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 
*fimage, struct code
                        PPC_BCC_SHORT(COND_NE, tmp_idx);
 
                        if (imm & BPF_FETCH) {
+                               /* Emit 'sync' to enforce full ordering */
+                               if (IS_ENABLED(CONFIG_SMP))
+                                       EMIT(PPC_RAW_SYNC());
                                EMIT(PPC_RAW_MR(ret_reg, _R0));
                                /*
                                 * Skip unnecessary zero-extension for 32-bit 
cmpxchg.
-- 
2.40.1

Reply via email to