https://github.com/peilin-ye updated https://github.com/llvm/llvm-project/pull/108636
>From 885d5141f6707a0fdf4be363351083f8fdf8fd54 Mon Sep 17 00:00:00 2001 From: Peilin Ye <yepei...@google.com> Date: Sat, 5 Oct 2024 06:44:21 +0000 Subject: [PATCH 1/3] [BPF] Rename isST*() and isLD*() functions in BPFMISimplifyPatchable.cpp (NFC) We are planning to add load (specifically, atomic acquiring load, or "load-acquire") instructions under the STX instruction class. To make that easier, rename the isST*() and isLD*() helper functions based on what the instructions actually do, rather than their instruction class. --- .../lib/Target/BPF/BPFMISimplifyPatchable.cpp | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp index 39390e8c38f8c1..4a1684ccebb793 100644 --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -94,35 +94,35 @@ void BPFMISimplifyPatchable::initialize(MachineFunction &MFParm) { LLVM_DEBUG(dbgs() << "*** BPF simplify patchable insts pass ***\n\n"); } -static bool isST(unsigned Opcode) { +static bool isStoreImm(unsigned Opcode) { return Opcode == BPF::STB_imm || Opcode == BPF::STH_imm || Opcode == BPF::STW_imm || Opcode == BPF::STD_imm; } -static bool isSTX32(unsigned Opcode) { +static bool isStore32(unsigned Opcode) { return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32; } -static bool isSTX64(unsigned Opcode) { +static bool isStore64(unsigned Opcode) { return Opcode == BPF::STB || Opcode == BPF::STH || Opcode == BPF::STW || Opcode == BPF::STD; } -static bool isLDX32(unsigned Opcode) { +static bool isLoad32(unsigned Opcode) { return Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32; } -static bool isLDX64(unsigned Opcode) { +static bool isLoad64(unsigned Opcode) { return Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW || Opcode == BPF::LDD; } -static bool isLDSX(unsigned Opcode) { +static bool isLoadSext(unsigned Opcode) { return Opcode == BPF::LDBSX || Opcode == BPF::LDHSX || Opcode == BPF::LDWSX; } bool BPFMISimplifyPatchable::isLoadInst(unsigned Opcode) { - return isLDX32(Opcode) || isLDX64(Opcode) || isLDSX(Opcode); + return isLoad32(Opcode) || isLoad64(Opcode) || isLoadSext(Opcode); } void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI, @@ -143,11 +143,11 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI, MachineInstr *DefInst = MO.getParent(); unsigned Opcode = DefInst->getOpcode(); unsigned COREOp; - if (isLDX64(Opcode) || isLDSX(Opcode)) + if (isLoad64(Opcode) || isLoadSext(Opcode)) COREOp = BPF::CORE_LD64; - else if (isLDX32(Opcode)) + else if (isLoad32(Opcode)) COREOp = BPF::CORE_LD32; - else if (isSTX64(Opcode) || isSTX32(Opcode) || isST(Opcode)) + else if (isStore64(Opcode) || isStore32(Opcode) || isStoreImm(Opcode)) COREOp = BPF::CORE_ST; else continue; @@ -160,7 +160,7 @@ void BPFMISimplifyPatchable::checkADDrr(MachineRegisterInfo *MRI, // Reject the form: // %1 = ADD_rr %2, %3 // *(type *)(%2 + 0) = %1 - if (isSTX64(Opcode) || isSTX32(Opcode)) { + if (isStore64(Opcode) || isStore32(Opcode)) { const MachineOperand &Opnd = DefInst->getOperand(0); if (Opnd.isReg() && Opnd.getReg() == MO.getReg()) continue; >From cd4e9647f942a2f8d57fcd95219744d89f3e2cf3 Mon Sep 17 00:00:00 2001 From: Peilin Ye <yepei...@google.com> Date: Sat, 5 Oct 2024 07:31:54 +0000 Subject: [PATCH 2/3] [BPF] Add load-acquire and store-release instructions under -mcpu=v4 As discussed in [1], introduce BPF instructions with load-acquire and store-release semantics under -mcpu=v4. The following new flags are defined: BPF_ATOMIC_LOAD 0x10 BPF_ATOMIC_STORE 0x20 BPF_RELAXED: 0x0 BPF_ACQUIRE: 0x1 BPF_RELEASE: 0x2 BPF_ACQ_REL: 0x3 BPF_SEQ_CST: 0x4 A "load-acquire" is a BPF_STX | BPF_ATOMIC instruction with the 'imm' field set to BPF_ATOMIC_LOAD | BPF_ACQUIRE (0x11). Similarly, a "store-release" is a BPF_STX | BPF_ATOMIC instruction with the 'imm' field set to BPF_ATOMIC_STORE | BPF_RELEASE (0x22). Unlike existing atomic operations that only support BPF_W (32-bit) and BPF_DW (64-bit) size modifiers, load-acquires and store-releases also support BPF_B (8-bit) and BPF_H (16-bit). An 8- or 16-bit load-acquire zero-extends the value before writing it to a 32-bit register, just like ARM64 instruction LDAPRH and friends. As an example, for -march=bpfel (big-endian): long foo(long *ptr) { return __atomic_load_n(ptr, __ATOMIC_ACQUIRE); } foo() can be compiled to: db 10 00 00 11 00 00 00 r0 = load_acquire((u64 *)(r1 + 0x0)) 95 00 00 00 00 00 00 00 exit opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX imm (0x00000011): BPF_ATOMIC_LOAD | BPF_ACQUIRE Similarly: void bar(short *ptr, short val) { __atomic_store_n(ptr, val, __ATOMIC_RELEASE); } bar() can be compiled to: cb 21 00 00 22 00 00 00 store_release((u16 *)(r1 + 0x0), w2) 95 00 00 00 00 00 00 00 exit opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX imm (0x00000022): BPF_ATOMIC_STORE | BPF_RELEASE Inline assembly is also supported. For example: asm volatile("%0 = load_acquire((u64 *)(%1 + 0x0))" : "=r"(ret) : "r"(ptr) : "memory"); Add two macros, __BPF_FEATURE_LOAD_ACQUIRE and __BPF_FEATURE_STORE_RELEASE, to let developers detect these new features in source code. They can also be disabled using two new llc options, -disable-load-acquire and -disable-store-release, respectively. Using __ATOMIC_RELAXED for __atomic_store{,_n}() will generate a "plain" store (BPF_MEM | BPF_STX) instruction: void foo(short *ptr, short val) { __atomic_store_n(ptr, val, __ATOMIC_RELAXED); } 6b 21 00 00 00 00 00 00 *(u16 *)(r1 + 0x0) = w2 95 00 00 00 00 00 00 00 exit Similarly, using __ATOMIC_RELAXED for __atomic_load{,_n}() will generate a zero-extending, "plain" load (BPF_MEM | BPF_LDX) instruction: int foo(char *ptr) { return __atomic_load_n(ptr, __ATOMIC_RELAXED); } 71 11 00 00 00 00 00 00 w1 = *(u8 *)(r1 + 0x0) bc 10 08 00 00 00 00 00 w0 = (s8)w1 95 00 00 00 00 00 00 00 exit Currently __ATOMIC_CONSUME is an alias for __ATOMIC_ACQUIRE. Finally, using __ATOMIC_SEQ_CST ("sequentially consistent") is not supported yet and will cause an error. [1] https://lore.kernel.org/all/20240729183246.4110549-1-yepei...@google.com/ --- clang/lib/Basic/Targets/BPF.cpp | 2 + .../test/Preprocessor/bpf-predefined-macros.c | 9 ++ .../lib/Target/BPF/AsmParser/BPFAsmParser.cpp | 2 + llvm/lib/Target/BPF/BPFInstrFormats.td | 17 +++ llvm/lib/Target/BPF/BPFInstrInfo.td | 128 ++++++++++++++++++ .../lib/Target/BPF/BPFMISimplifyPatchable.cpp | 12 +- llvm/lib/Target/BPF/BPFSubtarget.cpp | 10 ++ llvm/lib/Target/BPF/BPFSubtarget.h | 5 +- llvm/test/CodeGen/BPF/acquire-release.ll | 80 +++++++++++ .../CodeGen/BPF/assembler-disassembler-v4.s | 20 +++ 10 files changed, 280 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/BPF/acquire-release.ll diff --git a/clang/lib/Basic/Targets/BPF.cpp b/clang/lib/Basic/Targets/BPF.cpp index f4684765b7ffb3..090f6bf38d6b45 100644 --- a/clang/lib/Basic/Targets/BPF.cpp +++ b/clang/lib/Basic/Targets/BPF.cpp @@ -67,6 +67,8 @@ void BPFTargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__BPF_FEATURE_SDIV_SMOD"); Builder.defineMacro("__BPF_FEATURE_GOTOL"); Builder.defineMacro("__BPF_FEATURE_ST"); + Builder.defineMacro("__BPF_FEATURE_LOAD_ACQUIRE"); + Builder.defineMacro("__BPF_FEATURE_STORE_RELEASE"); } } diff --git a/clang/test/Preprocessor/bpf-predefined-macros.c b/clang/test/Preprocessor/bpf-predefined-macros.c index 8c2143f767c40c..8fae8d6ddea090 100644 --- a/clang/test/Preprocessor/bpf-predefined-macros.c +++ b/clang/test/Preprocessor/bpf-predefined-macros.c @@ -67,6 +67,12 @@ int t; #ifdef __BPF_FEATURE_MAY_GOTO int u; #endif +#ifdef __BPF_FEATURE_LOAD_ACQUIRE +int v; +#endif +#ifdef __BPF_FEATURE_STORE_RELEASE +int w; +#endif // CHECK: int b; // CHECK: int c; @@ -106,6 +112,9 @@ int u; // CPU_V3: int u; // CPU_V4: int u; +// CPU_V4: int v; +// CPU_V4: int w; + // CPU_GENERIC: int g; // CPU_PROBE: int f; diff --git a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp index 32ddf11ec31968..8823435faf37d5 100644 --- a/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp +++ b/llvm/lib/Target/BPF/AsmParser/BPFAsmParser.cpp @@ -237,6 +237,7 @@ struct BPFOperand : public MCParsedAsmOperand { .Case("exit", true) .Case("lock", true) .Case("ld_pseudo", true) + .Case("store_release", true) .Default(false); } @@ -273,6 +274,7 @@ struct BPFOperand : public MCParsedAsmOperand { .Case("cmpxchg_64", true) .Case("cmpxchg32_32", true) .Case("addr_space_cast", true) + .Case("load_acquire", true) .Default(false); } }; diff --git a/llvm/lib/Target/BPF/BPFInstrFormats.td b/llvm/lib/Target/BPF/BPFInstrFormats.td index feffdbc69465ea..3a1286bf83810f 100644 --- a/llvm/lib/Target/BPF/BPFInstrFormats.td +++ b/llvm/lib/Target/BPF/BPFInstrFormats.td @@ -48,6 +48,23 @@ def BPF_END : BPFArithOp<0xd>; def BPF_XCHG : BPFArithOp<0xe>; def BPF_CMPXCHG : BPFArithOp<0xf>; +class BPFAtomicLoadStoreOp<bits<4> val> { + bits<4> Value = val; +} + +def BPF_ATOMIC_LOAD : BPFAtomicLoadStoreOp<0x1>; +def BPF_ATOMIC_STORE : BPFAtomicLoadStoreOp<0x2>; + +class BPFAtomicOrdering<bits<4> val> { + bits<4> Value = val; +} + +def BPF_RELAXED : BPFAtomicOrdering<0x0>; +def BPF_ACQUIRE : BPFAtomicOrdering<0x1>; +def BPF_RELEASE : BPFAtomicOrdering<0x2>; +def BPF_ACQ_REL : BPFAtomicOrdering<0x3>; +def BPF_SEQ_CST : BPFAtomicOrdering<0x4>; + class BPFEndDir<bits<1> val> { bits<1> Value = val; } diff --git a/llvm/lib/Target/BPF/BPFInstrInfo.td b/llvm/lib/Target/BPF/BPFInstrInfo.td index 62d6e25f83b59f..40f108077e8f65 100644 --- a/llvm/lib/Target/BPF/BPFInstrInfo.td +++ b/llvm/lib/Target/BPF/BPFInstrInfo.td @@ -60,6 +60,8 @@ def BPFHasSdivSmod : Predicate<"Subtarget->hasSdivSmod()">; def BPFNoMovsx : Predicate<"!Subtarget->hasMovsx()">; def BPFNoBswap : Predicate<"!Subtarget->hasBswap()">; def BPFHasStoreImm : Predicate<"Subtarget->hasStoreImm()">; +def BPFHasLoadAcquire : Predicate<"Subtarget->hasLoadAcquire()">; +def BPFHasStoreRelease : Predicate<"Subtarget->hasStoreRelease()">; class ImmediateAsmOperand<string name> : AsmOperandClass { let Name = name; @@ -566,6 +568,48 @@ let Predicates = [BPFHasALU32, BPFHasStoreImm] in { (STB_imm (imm_to_i64 imm:$src), ADDRri:$dst)>; } +class STORE_RELEASE<BPFWidthModifer SizeOp, string OpcodeStr, RegisterClass RegTp> + : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value, + (outs), + (ins RegTp:$src, MEMri:$addr), + "store_release(("#OpcodeStr#" *)($addr), $src)", + []> { + bits<4> src; + bits<20> addr; + + let Inst{51-48} = addr{19-16}; // base reg + let Inst{55-52} = src; + let Inst{47-32} = addr{15-0}; // offset + let Inst{7-4} = BPF_ATOMIC_STORE.Value; + let Inst{3-0} = BPF_RELEASE.Value; + let BPFClass = BPF_STX; +} + +class STORE_RELEASEi64<BPFWidthModifer Opc, string OpcodeStr> + : STORE_RELEASE<Opc, OpcodeStr, GPR>; + +class relaxed_store<PatFrag base> + : PatFrag<(ops node:$val, node:$ptr), (base node:$val, node:$ptr)> { + let IsAtomic = 1; + let IsAtomicOrderingReleaseOrStronger = 0; +} + +class releasing_store<PatFrag base> + : PatFrag<(ops node:$val, node:$ptr), (base node:$val, node:$ptr)> { + let IsAtomic = 1; + let IsAtomicOrderingRelease = 1; +} + +let Predicates = [BPFHasStoreRelease] in { + def STDREL : STORE_RELEASEi64<BPF_DW, "u64">; + + foreach P = [[relaxed_store<atomic_store_64>, STD], + [releasing_store<atomic_store_64>, STDREL], + ] in { + def : Pat<(P[0] GPR:$val, ADDRri:$addr), (P[1] GPR:$val, ADDRri:$addr)>; + } +} + // LOAD instructions class LOAD<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern> : TYPE_LD_ST<ModOp.Value, SizeOp.Value, @@ -622,6 +666,48 @@ let Predicates = [BPFHasLdsx] in { def LDD : LOADi64<BPF_DW, BPF_MEM, "u64", load>; +class LOAD_ACQUIRE<BPFWidthModifer SizeOp, string OpcodeStr, RegisterClass RegTp> + : TYPE_LD_ST<BPF_ATOMIC.Value, SizeOp.Value, + (outs RegTp:$dst), + (ins MEMri:$addr), + "$dst = load_acquire(("#OpcodeStr#" *)($addr))", + []> { + bits<4> dst; + bits<20> addr; + + let Inst{51-48} = dst; + let Inst{55-52} = addr{19-16}; // base reg + let Inst{47-32} = addr{15-0}; // offset + let Inst{7-4} = BPF_ATOMIC_LOAD.Value; + let Inst{3-0} = BPF_ACQUIRE.Value; + let BPFClass = BPF_STX; +} + +class LOAD_ACQUIREi64<BPFWidthModifer SizeOp, string OpcodeStr> + : LOAD_ACQUIRE<SizeOp, OpcodeStr, GPR>; + +class relaxed_load<PatFrags base> + : PatFrag<(ops node:$ptr), (base node:$ptr)> { + let IsAtomic = 1; + let IsAtomicOrderingAcquireOrStronger = 0; +} + +class acquiring_load<PatFrags base> + : PatFrag<(ops node:$ptr), (base node:$ptr)> { + let IsAtomic = 1; + let IsAtomicOrderingAcquire = 1; +} + +let Predicates = [BPFHasLoadAcquire] in { + def LDDACQ : LOAD_ACQUIREi64<BPF_DW, "u64">; + + foreach P = [[relaxed_load<atomic_load_64>, LDD], + [acquiring_load<atomic_load_64>, LDDACQ], + ] in { + def : Pat<(P[0] ADDRri:$addr), (P[1] ADDRri:$addr)>; + } +} + class BRANCH<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern> : TYPE_ALU_JMP<Opc.Value, BPF_K.Value, (outs), @@ -1181,10 +1267,19 @@ class STORE32<BPFWidthModifer SizeOp, string OpcodeStr, list<dag> Pattern> class STOREi32<BPFWidthModifer Opc, string OpcodeStr, PatFrag OpNode> : STORE32<Opc, OpcodeStr, [(OpNode GPR32:$src, ADDRri:$addr)]>; +class STORE_RELEASEi32<BPFWidthModifer Opc, string OpcodeStr> + : STORE_RELEASE<Opc, OpcodeStr, GPR32>; + let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in { def STW32 : STOREi32<BPF_W, "u32", store>; def STH32 : STOREi32<BPF_H, "u16", truncstorei16>; def STB32 : STOREi32<BPF_B, "u8", truncstorei8>; + + let Predicates = [BPFHasStoreRelease] in { + def STWREL32 : STORE_RELEASEi32<BPF_W, "u32">; + def STHREL32 : STORE_RELEASEi32<BPF_H, "u16">; + def STBREL32 : STORE_RELEASEi32<BPF_B, "u8">; + } } class LOAD32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, list<dag> Pattern> @@ -1205,10 +1300,19 @@ class LOAD32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, lis class LOADi32<BPFWidthModifer SizeOp, BPFModeModifer ModOp, string OpcodeStr, PatFrag OpNode> : LOAD32<SizeOp, ModOp, OpcodeStr, [(set i32:$dst, (OpNode ADDRri:$addr))]>; +class LOAD_ACQUIREi32<BPFWidthModifer SizeOp, string OpcodeStr> + : LOAD_ACQUIRE<SizeOp, OpcodeStr, GPR32>; + let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in { def LDW32 : LOADi32<BPF_W, BPF_MEM, "u32", load>; def LDH32 : LOADi32<BPF_H, BPF_MEM, "u16", zextloadi16>; def LDB32 : LOADi32<BPF_B, BPF_MEM, "u8", zextloadi8>; + + let Predicates = [BPFHasLoadAcquire] in { + def LDWACQ32 : LOAD_ACQUIREi32<BPF_W, "u32">; + def LDHACQ32 : LOAD_ACQUIREi32<BPF_H, "u16">; + def LDBACQ32 : LOAD_ACQUIREi32<BPF_B, "u8">; + } } let Predicates = [BPFHasALU32] in { @@ -1238,6 +1342,30 @@ let Predicates = [BPFHasALU32] in { (SUBREG_TO_REG (i64 0), (LDH32 ADDRri:$src), sub_32)>; def : Pat<(i64 (extloadi32 ADDRri:$src)), (SUBREG_TO_REG (i64 0), (LDW32 ADDRri:$src), sub_32)>; + + let Predicates = [BPFHasLoadAcquire] in { + foreach P = [[relaxed_load<atomic_load_32>, LDW32], + [relaxed_load<atomic_load_az_16>, LDH32], + [relaxed_load<atomic_load_az_8>, LDB32], + [acquiring_load<atomic_load_32>, LDWACQ32], + [acquiring_load<atomic_load_az_16>, LDHACQ32], + [acquiring_load<atomic_load_az_8>, LDBACQ32], + ] in { + def : Pat<(P[0] ADDRri:$addr), (P[1] ADDRri:$addr)>; + } + } + + let Predicates = [BPFHasStoreRelease] in { + foreach P = [[relaxed_store<atomic_store_32>, STW32], + [relaxed_store<atomic_store_16>, STH32], + [relaxed_store<atomic_store_8>, STB32], + [releasing_store<atomic_store_32>, STWREL32], + [releasing_store<atomic_store_16>, STHREL32], + [releasing_store<atomic_store_8>, STBREL32], + ] in { + def : Pat<(P[0] GPR32:$val, ADDRri:$addr), (P[1] GPR32:$val, ADDRri:$addr)>; + } + } } let usesCustomInserter = 1, isCodeGenOnly = 1 in { diff --git a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp index 4a1684ccebb793..f6735adbde6400 100644 --- a/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp +++ b/llvm/lib/Target/BPF/BPFMISimplifyPatchable.cpp @@ -100,21 +100,25 @@ static bool isStoreImm(unsigned Opcode) { } static bool isStore32(unsigned Opcode) { - return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32; + return Opcode == BPF::STB32 || Opcode == BPF::STH32 || Opcode == BPF::STW32 || + Opcode == BPF::STBREL32 || Opcode == BPF::STHREL32 || + Opcode == BPF::STWREL32; } static bool isStore64(unsigned Opcode) { return Opcode == BPF::STB || Opcode == BPF::STH || Opcode == BPF::STW || - Opcode == BPF::STD; + Opcode == BPF::STD || Opcode == BPF::STDREL; } static bool isLoad32(unsigned Opcode) { - return Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32; + return Opcode == BPF::LDB32 || Opcode == BPF::LDH32 || Opcode == BPF::LDW32 || + Opcode == BPF::LDBACQ32 || Opcode == BPF::LDHACQ32 || + Opcode == BPF::LDWACQ32; } static bool isLoad64(unsigned Opcode) { return Opcode == BPF::LDB || Opcode == BPF::LDH || Opcode == BPF::LDW || - Opcode == BPF::LDD; + Opcode == BPF::LDD || Opcode == BPF::LDDACQ; } static bool isLoadSext(unsigned Opcode) { diff --git a/llvm/lib/Target/BPF/BPFSubtarget.cpp b/llvm/lib/Target/BPF/BPFSubtarget.cpp index 305e9a2bf2cda3..c3b24659552179 100644 --- a/llvm/lib/Target/BPF/BPFSubtarget.cpp +++ b/llvm/lib/Target/BPF/BPFSubtarget.cpp @@ -40,6 +40,12 @@ static cl::opt<bool> Disable_gotol("disable-gotol", cl::Hidden, cl::init(false), static cl::opt<bool> Disable_StoreImm("disable-storeimm", cl::Hidden, cl::init(false), cl::desc("Disable BPF_ST (immediate store) insn")); +static cl::opt<bool> + Disable_load_acquire("disable-load-acquire", cl::Hidden, cl::init(false), + cl::desc("Disable load-acquire insns")); +static cl::opt<bool> + Disable_store_release("disable-store-release", cl::Hidden, cl::init(false), + cl::desc("Disable store-release insns")); void BPFSubtarget::anchor() {} @@ -62,6 +68,8 @@ void BPFSubtarget::initializeEnvironment() { HasSdivSmod = false; HasGotol = false; HasStoreImm = false; + HasLoadAcquire = false; + HasStoreRelease = false; } void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -91,6 +99,8 @@ void BPFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { HasSdivSmod = !Disable_sdiv_smod; HasGotol = !Disable_gotol; HasStoreImm = !Disable_StoreImm; + HasLoadAcquire = !Disable_load_acquire; + HasStoreRelease = !Disable_store_release; return; } } diff --git a/llvm/lib/Target/BPF/BPFSubtarget.h b/llvm/lib/Target/BPF/BPFSubtarget.h index 33747546eadc3b..a3e8e1f017085e 100644 --- a/llvm/lib/Target/BPF/BPFSubtarget.h +++ b/llvm/lib/Target/BPF/BPFSubtarget.h @@ -64,7 +64,8 @@ class BPFSubtarget : public BPFGenSubtargetInfo { bool UseDwarfRIS; // whether cpu v4 insns are enabled. - bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm; + bool HasLdsx, HasMovsx, HasBswap, HasSdivSmod, HasGotol, HasStoreImm, + HasLoadAcquire, HasStoreRelease; std::unique_ptr<CallLowering> CallLoweringInfo; std::unique_ptr<InstructionSelector> InstSelector; @@ -92,6 +93,8 @@ class BPFSubtarget : public BPFGenSubtargetInfo { bool hasSdivSmod() const { return HasSdivSmod; } bool hasGotol() const { return HasGotol; } bool hasStoreImm() const { return HasStoreImm; } + bool hasLoadAcquire() const { return HasLoadAcquire; } + bool hasStoreRelease() const { return HasStoreRelease; } bool isLittleEndian() const { return IsLittleEndian; } diff --git a/llvm/test/CodeGen/BPF/acquire-release.ll b/llvm/test/CodeGen/BPF/acquire-release.ll new file mode 100644 index 00000000000000..e1b52f9bb3c8d5 --- /dev/null +++ b/llvm/test/CodeGen/BPF/acquire-release.ll @@ -0,0 +1,80 @@ +; RUN: llc < %s -march=bpfel -mcpu=v4 -verify-machineinstrs -show-mc-encoding \ +; RUN: | FileCheck -check-prefixes=CHECK-LE %s +; RUN: llc < %s -march=bpfeb -mcpu=v4 -verify-machineinstrs -show-mc-encoding \ +; RUN: | FileCheck -check-prefixes=CHECK-BE %s + +define dso_local i8 @load_acquire_i8(ptr nocapture noundef readonly %p) local_unnamed_addr { +; CHECK-LABEL: load_acquire_i8 +; CHECK-LE: w0 = load_acquire((u8 *)(r1 + 0)) # encoding: [0xd3,0x10,0x00,0x00,0x11,0x00,0x00,0x00] +; CHECK-BE: w0 = load_acquire((u8 *)(r1 + 0)) # encoding: [0xd3,0x01,0x00,0x00,0x00,0x00,0x00,0x11] +entry: + %0 = load atomic i8, ptr %p acquire, align 1 + ret i8 %0 +} + +define dso_local i16 @load_acquire_i16(ptr nocapture noundef readonly %p) local_unnamed_addr { +; CHECK-LABEL: load_acquire_i16 +; CHECK-LE: w0 = load_acquire((u16 *)(r1 + 0)) # encoding: [0xcb,0x10,0x00,0x00,0x11,0x00,0x00,0x00] +; CHECK-BE: w0 = load_acquire((u16 *)(r1 + 0)) # encoding: [0xcb,0x01,0x00,0x00,0x00,0x00,0x00,0x11] +entry: + %0 = load atomic i16, ptr %p acquire, align 2 + ret i16 %0 +} + +define dso_local i32 @load_acquire_i32(ptr nocapture noundef readonly %p) local_unnamed_addr { +; CHECK-LABEL: load_acquire_i32 +; CHECK-LE: w0 = load_acquire((u32 *)(r1 + 0)) # encoding: [0xc3,0x10,0x00,0x00,0x11,0x00,0x00,0x00] +; CHECK-BE: w0 = load_acquire((u32 *)(r1 + 0)) # encoding: [0xc3,0x01,0x00,0x00,0x00,0x00,0x00,0x11] +entry: + %0 = load atomic i32, ptr %p acquire, align 4 + ret i32 %0 +} + +define dso_local i64 @load_acquire_i64(ptr nocapture noundef readonly %p) local_unnamed_addr { +; CHECK-LABEL: load_acquire_i64 +; CHECK-LE: r0 = load_acquire((u64 *)(r1 + 0)) # encoding: [0xdb,0x10,0x00,0x00,0x11,0x00,0x00,0x00] +; CHECK-BE: r0 = load_acquire((u64 *)(r1 + 0)) # encoding: [0xdb,0x01,0x00,0x00,0x00,0x00,0x00,0x11] +entry: + %0 = load atomic i64, ptr %p acquire, align 8 + ret i64 %0 +} + +define void @store_release_i8(ptr nocapture noundef writeonly %p, + i8 noundef signext %v) local_unnamed_addr { +; CHECK-LABEL: store_release_i8 +; CHECK-LE: store_release((u8 *)(r1 + 0), w2) # encoding: [0xd3,0x21,0x00,0x00,0x22,0x00,0x00,0x00] +; CHECK-BE: store_release((u8 *)(r1 + 0), w2) # encoding: [0xd3,0x12,0x00,0x00,0x00,0x00,0x00,0x22] +entry: + store atomic i8 %v, ptr %p release, align 1 + ret void +} + +define void @store_release_i16(ptr nocapture noundef writeonly %p, + i16 noundef signext %v) local_unnamed_addr { +; CHECK-LABEL: store_release_i16 +; CHECK-LE: store_release((u16 *)(r1 + 0), w2) # encoding: [0xcb,0x21,0x00,0x00,0x22,0x00,0x00,0x00] +; CHECK-BE: store_release((u16 *)(r1 + 0), w2) # encoding: [0xcb,0x12,0x00,0x00,0x00,0x00,0x00,0x22] +entry: + store atomic i16 %v, ptr %p release, align 2 + ret void +} + +define void @store_release_i32(ptr nocapture noundef writeonly %p, + i32 noundef %v) local_unnamed_addr { +; CHECK-LABEL: store_release_i32 +; CHECK-LE: store_release((u32 *)(r1 + 0), w2) # encoding: [0xc3,0x21,0x00,0x00,0x22,0x00,0x00,0x00] +; CHECK-BE: store_release((u32 *)(r1 + 0), w2) # encoding: [0xc3,0x12,0x00,0x00,0x00,0x00,0x00,0x22] +entry: + store atomic i32 %v, ptr %p release, align 4 + ret void +} + +define void @store_release_i64(ptr nocapture noundef writeonly %p, + i64 noundef %v) local_unnamed_addr { +; CHECK-LABEL: store_release_i64 +; CHECK-LE: store_release((u64 *)(r1 + 0), r2) # encoding: [0xdb,0x21,0x00,0x00,0x22,0x00,0x00,0x00] +; CHECK-BE: store_release((u64 *)(r1 + 0), r2) # encoding: [0xdb,0x12,0x00,0x00,0x00,0x00,0x00,0x22] +entry: + store atomic i64 %v, ptr %p release, align 8 + ret void +} diff --git a/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s b/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s index d52985986bdc36..388bab78a28283 100644 --- a/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s +++ b/llvm/test/CodeGen/BPF/assembler-disassembler-v4.s @@ -42,3 +42,23 @@ r2 s%= r4 // CHECK: 9c 42 01 00 00 00 00 00 w2 s%= w4 w1 s/= w3 w2 s%= w4 + +// CHECK: d3 10 00 00 11 00 00 00 w0 = load_acquire((u8 *)(r1 + 0x0)) +// CHECK: cb 10 00 00 11 00 00 00 w0 = load_acquire((u16 *)(r1 + 0x0)) +// CHECK: c3 10 00 00 11 00 00 00 w0 = load_acquire((u32 *)(r1 + 0x0)) +w0 = load_acquire((u8 *)(r1 + 0)) +w0 = load_acquire((u16 *)(r1 + 0)) +w0 = load_acquire((u32 *)(r1 + 0)) + +// CHECK: db 10 00 00 11 00 00 00 r0 = load_acquire((u64 *)(r1 + 0x0)) +r0 = load_acquire((u64 *)(r1 + 0)) + +// CHECK: d3 21 00 00 22 00 00 00 store_release((u8 *)(r1 + 0x0), w2) +// CHECK: cb 21 00 00 22 00 00 00 store_release((u16 *)(r1 + 0x0), w2) +// CHECK: c3 21 00 00 22 00 00 00 store_release((u32 *)(r1 + 0x0), w2) +store_release((u8 *)(r1 + 0), w2) +store_release((u16 *)(r1 + 0), w2) +store_release((u32 *)(r1 + 0), w2) + +// CHECK: db 21 00 00 22 00 00 00 store_release((u64 *)(r1 + 0x0), r2) +store_release((u64 *)(r1 + 0), r2) >From a5ea07b54fe3dddbd47d840cc3623b5444db0914 Mon Sep 17 00:00:00 2001 From: Peilin Ye <yepei...@google.com> Date: Sat, 21 Sep 2024 04:00:40 +0000 Subject: [PATCH 3/3] [BPF] Improve error message for seq_cst atomic load and store Sequentially consistent (seq_cst) atomic load and store are not supported yet for BPF. Right now, calling __atomic_{load,store}{,_n}() with __ATOMIC_SEQ_CST will cause an error: $ cat bar.c int foo(int *ptr) { return __atomic_load_n(ptr, __ATOMIC_SEQ_CST); } $ clang --target=bpf -mcpu=v4 -c bar.c > /dev/null fatal error: error in backend: Cannot select: t8: i32,ch = AtomicLoad<(load seq_cst (s32) from %ir.0)> t7:1, t7 ... Which isn't very useful. Just like commit 379d90884807 ("BPF: provide better error message for unsupported atomic operations"), make it generate an error message saying that the requested operation isn't supported, before triggering that "fatal error": $ clang --target=bpf -mcpu=v4 -c bar.c > /dev/null bar.c:1:5: error: sequentially consistent (seq_cst) atomic load/store is not supported 1 | int foo(int *ptr) { return __atomic_load_n(ptr, __ATOMIC_SEQ_CST); } | ^ ... --- llvm/lib/Target/BPF/BPFISelLowering.cpp | 25 +++++++++++++++++++++++++ llvm/lib/Target/BPF/BPFISelLowering.h | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/BPF/BPFISelLowering.cpp b/llvm/lib/Target/BPF/BPFISelLowering.cpp index ff23d3b055d0d5..90b5f42aa7e5cd 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.cpp +++ b/llvm/lib/Target/BPF/BPFISelLowering.cpp @@ -93,6 +93,11 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); } + for (auto VT : {MVT::i32, MVT::i64}) { + setOperationAction(ISD::ATOMIC_LOAD, VT, Custom); + setOperationAction(ISD::ATOMIC_STORE, VT, Custom); + } + for (auto VT : { MVT::i32, MVT::i64 }) { if (VT == MVT::i32 && !STI.getHasAlu32()) continue; @@ -291,6 +296,9 @@ void BPFTargetLowering::ReplaceNodeResults( else Msg = "unsupported atomic operation, please use 64 bit version"; break; + case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_STORE: + return; } SDLoc DL(N); @@ -316,6 +324,9 @@ SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerSDIVSREM(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_STORE: + return LowerATOMIC_LOAD_STORE(Op, DAG); } } @@ -703,6 +714,20 @@ SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(BPFISD::SELECT_CC, DL, VTs, Ops); } +SDValue BPFTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op, + SelectionDAG &DAG) const { + SDNode *N = Op.getNode(); + SDLoc DL(N); + + if (cast<AtomicSDNode>(N)->getMergedOrdering() == + AtomicOrdering::SequentiallyConsistent) + fail( + DL, DAG, + "sequentially consistent (seq_cst) atomic load/store is not supported"); + + return Op; +} + const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((BPFISD::NodeType)Opcode) { case BPFISD::FIRST_NUMBER: diff --git a/llvm/lib/Target/BPF/BPFISelLowering.h b/llvm/lib/Target/BPF/BPFISelLowering.h index d59098f9f569ba..ad048ad05e6dd0 100644 --- a/llvm/lib/Target/BPF/BPFISelLowering.h +++ b/llvm/lib/Target/BPF/BPFISelLowering.h @@ -77,7 +77,7 @@ class BPFTargetLowering : public TargetLowering { SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - + SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits