nemanjai created this revision. nemanjai added reviewers: PowerPC, RKSimon, pengfei, arsenm, t.p.northover. Herald added subscribers: steven.zhang, kbarton, hiraditya. Herald added a project: All. nemanjai requested review of this revision. Herald added subscribers: jdoerfert, wdng. Herald added projects: clang, LLVM.
This is just a proposal patch for a possible direction we can extend these builtins to other targets as well as an implementation for PowerPC. This adds a new instruction similar to `LOAD_STACK_GUARD` that is meant to load things from fixed addresses. The new instruction has semantics that are a superset of what `LOAD_STACK_GUARD` does so the two can be folded into one. My hope with this patch is to collect feedback from the community about the direction, so please provide any feedback you may have. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D152914 Files: clang/include/clang/Basic/Builtins.def clang/include/clang/Basic/BuiltinsX86.def clang/include/clang/Basic/TargetInfo.h clang/lib/Basic/Targets/PPC.cpp clang/lib/Basic/Targets/PPC.h clang/lib/Basic/Targets/X86.h clang/lib/CodeGen/CGBuiltin.cpp clang/lib/Sema/SemaChecking.cpp clang/test/CodeGen/builtin-cpu-supports.c clang/test/Sema/builtin-cpu-supports.c llvm/include/llvm/Analysis/TargetLibraryInfo.h llvm/include/llvm/CodeGen/TargetLowering.h llvm/include/llvm/IR/Intrinsics.td llvm/include/llvm/Support/TargetOpcodes.def llvm/include/llvm/Target/Target.td llvm/include/llvm/TargetParser/PPCTargetParser.def llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/lib/Target/PowerPC/PPCISelLowering.h llvm/lib/Target/PowerPC/PPCInstrInfo.cpp llvm/lib/Target/PowerPC/PPCSubtarget.h llvm/lib/Target/PowerPC/PPCTargetMachine.h llvm/test/CodeGen/PowerPC/cpu-supports.ll
Index: llvm/test/CodeGen/PowerPC/cpu-supports.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/cpu-supports.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-linux-gnu < %s | FileCheck %s \ +; RUN: -check-prefix=BE64 +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc-linux-gnu < %s | FileCheck %s \ +; RUN: -check-prefix=BE32 +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-linux-gnu < %s | FileCheck %s \ +; RUN: -check-prefix=LE +define dso_local signext i32 @test(i32 noundef signext %a) local_unnamed_addr #0 { +; BE64-LABEL: test: +; BE64: # %bb.0: # %entry +; BE64-NEXT: lwz r4, -28772(r13) +; BE64-NEXT: andis. r4, r4, 128 +; BE64-NEXT: bne cr0, .LBB0_3 +; BE64-NEXT: # %bb.1: # %if.else +; BE64-NEXT: lwz r4, -28776(r13) +; BE64-NEXT: andis. r4, r4, 1024 +; BE64-NEXT: bne cr0, .LBB0_4 +; BE64-NEXT: # %bb.2: # %if.else3 +; BE64-NEXT: lwz r4, -28764(r13) +; BE64-NEXT: cmplwi r4, 39 +; BE64-NEXT: addi r4, r3, 5 +; BE64-NEXT: slwi r3, r3, 1 +; BE64-NEXT: iseleq r3, r3, r4 +; BE64-NEXT: .LBB0_3: # %return +; BE64-NEXT: extsw r3, r3 +; BE64-NEXT: blr +; BE64-NEXT: .LBB0_4: # %if.then2 +; BE64-NEXT: addi r3, r3, -5 +; BE64-NEXT: extsw r3, r3 +; BE64-NEXT: blr +; +; BE32-LABEL: test: +; BE32: # %bb.0: # %entry +; BE32-NEXT: lwz r4, -28732(r2) +; BE32-NEXT: andis. r4, r4, 128 +; BE32-NEXT: bnelr cr0 +; BE32-NEXT: # %bb.1: # %if.else +; BE32-NEXT: lwz r4, -28736(r2) +; BE32-NEXT: andis. r4, r4, 1024 +; BE32-NEXT: bne cr0, .LBB0_3 +; BE32-NEXT: # %bb.2: # %if.else3 +; BE32-NEXT: lwz r4, -28724(r2) +; BE32-NEXT: cmplwi r4, 39 +; BE32-NEXT: addi r4, r3, 5 +; BE32-NEXT: slwi r3, r3, 1 +; BE32-NEXT: iseleq r3, r3, r4 +; BE32-NEXT: blr +; BE32-NEXT: .LBB0_3: # %if.then2 +; BE32-NEXT: addi r3, r3, -5 +; BE32-NEXT: blr +; +; LE-LABEL: test: +; LE: # %bb.0: # %entry +; LE-NEXT: lwz r4, -28776(r13) +; LE-NEXT: andis. r4, r4, 128 +; LE-NEXT: bne cr0, .LBB0_3 +; LE-NEXT: # %bb.1: # %if.else +; LE-NEXT: lwz r4, -28772(r13) +; LE-NEXT: andis. r4, r4, 1024 +; LE-NEXT: bne cr0, .LBB0_4 +; LE-NEXT: # %bb.2: # %if.else3 +; LE-NEXT: lwz r4, -28764(r13) +; LE-NEXT: cmplwi r4, 39 +; LE-NEXT: addi r4, r3, 5 +; LE-NEXT: slwi r3, r3, 1 +; LE-NEXT: iseleq r3, r3, r4 +; LE-NEXT: .LBB0_3: # %return +; LE-NEXT: extsw r3, r3 +; LE-NEXT: blr +; LE-NEXT: .LBB0_4: # %if.then2 +; LE-NEXT: addi r3, r3, -5 +; LE-NEXT: extsw r3, r3 +; LE-NEXT: blr +entry: + %cpu_supports = tail call i32 @llvm.fixed.addr.ld(i32 1) + %0 = and i32 %cpu_supports, 8388608 + %.not = icmp eq i32 %0, 0 + br i1 %.not, label %if.else, label %return + +if.else: ; preds = %entry + %cpu_supports1 = tail call i32 @llvm.fixed.addr.ld(i32 0) + %1 = and i32 %cpu_supports1, 67108864 + %.not12 = icmp eq i32 %1, 0 + br i1 %.not12, label %if.else3, label %if.then2 + +if.then2: ; preds = %if.else + %sub = add nsw i32 %a, -5 + br label %return + +if.else3: ; preds = %if.else + %cpu_is = tail call i32 @llvm.fixed.addr.ld(i32 2) + %2 = icmp eq i32 %cpu_is, 39 + br i1 %2, label %if.then4, label %if.end6 + +if.then4: ; preds = %if.else3 + %add = shl nsw i32 %a, 1 + br label %return + +if.end6: ; preds = %if.else3 + %add7 = add nsw i32 %a, 5 + br label %return + +return: ; preds = %entry, %if.end6, %if.then4, %if.then2 + %retval.0 = phi i32 [ %sub, %if.then2 ], [ %add, %if.then4 ], [ %add7, %if.end6 ], [ %a, %entry ] + ret i32 %retval.0 +} + +declare i32 @llvm.fixed.addr.ld(i32 immarg) #1 Index: llvm/lib/Target/PowerPC/PPCTargetMachine.h =================================================================== --- llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -32,6 +32,7 @@ std::unique_ptr<TargetLoweringObjectFile> TLOF; PPCABI TargetABI; Endian Endianness = Endian::NOT_DETECTED; + mutable bool HasGlibcHWCAPAccess = false; mutable StringMap<std::unique_ptr<PPCSubtarget>> SubtargetMap; @@ -64,6 +65,8 @@ const TargetSubtargetInfo *STI) const override; bool isELFv2ABI() const { return TargetABI == PPC_ABI_ELFv2; } + bool hasGlibcHWCAPAccess() const { return HasGlibcHWCAPAccess; } + void setGlibcHWCAPAccess(bool Val = true) const { HasGlibcHWCAPAccess = Val; } bool isPPC64() const { const Triple &TT = getTargetTriple(); return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le); Index: llvm/lib/Target/PowerPC/PPCSubtarget.h =================================================================== --- llvm/lib/Target/PowerPC/PPCSubtarget.h +++ llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -210,6 +210,7 @@ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } + bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); } bool isAIXABI() const { return TargetTriple.isOSAIX(); } bool isSVR4ABI() const { return !isAIXABI(); } Index: llvm/lib/Target/PowerPC/PPCInstrInfo.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1102,6 +1102,7 @@ case PPC::ADDIStocHA8: case PPC::ADDItocL: case PPC::LOAD_STACK_GUARD: + case PPC::LOAD_FIXED_ADDR: case PPC::XXLXORz: case PPC::XXLXORspz: case PPC::XXLXORdpz: @@ -3127,6 +3128,32 @@ .addReg(Reg); return true; } + case TargetOpcode::LOAD_FIXED_ADDR: { + assert(Subtarget.isTargetLinux() && + "Only Linux target is expected to contain LOAD_FIXED_ADDR"); + int64_t Offset = 0; + const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2; + MI.setDesc(get(PPC::LWZ)); + uint64_t FAType = MI.getOperand(1).getImm(); + // The HWCAP and HWCAP2 word offsets are reversed on big endian Linux. + if ((FAType == TargetLibraryInfo::FA_HWCAP && Subtarget.isLittleEndian()) || + (FAType == TargetLibraryInfo::FA_HWCAP2 && !Subtarget.isLittleEndian())) + Offset = Subtarget.isPPC64() ? -0x7064 : -0x703C; + else if ((FAType == TargetLibraryInfo::FA_HWCAP2 && + Subtarget.isLittleEndian()) || + (FAType == TargetLibraryInfo::FA_HWCAP && + !Subtarget.isLittleEndian())) + Offset = Subtarget.isPPC64() ? -0x7068 : -0x7040; + else if (FAType == TargetLibraryInfo::FA_CPUID) + Offset = Subtarget.isPPC64() ? -0x705C : -0x7034; + assert(Offset && "Do not know the offset for this fixed addr load"); + MI.removeOperand(1); + Subtarget.getTargetMachine().setGlibcHWCAPAccess(); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(Offset) + .addReg(Reg); + return true; + } case PPC::DFLOADf32: case PPC::DFLOADf64: case PPC::DFSTOREf32: Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1122,6 +1122,7 @@ /// Override to support customized stack guard loading. bool useLoadStackGuardNode() const override; + bool useLoadFixedAddr(TargetLibraryInfo::FixedAddrType Type) const override; void insertSSPDeclarations(Module &M) const override; Value *getSDagStackGuard(const Module &M) const override; Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1341,6 +1341,8 @@ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Legal); + setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Legal); if (!isPPC64) { setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); @@ -17370,6 +17372,22 @@ return true; } +// Enable LOAD_FIXED_ADDR for the values that are available from +// libc. +bool PPCTargetLowering::useLoadFixedAddr( + TargetLibraryInfo::FixedAddrType Type) const { + if (!Subtarget.isTargetGlibc()) + return false; + + switch (Type) { + case TargetLibraryInfo::FA_HWCAP: + case TargetLibraryInfo::FA_HWCAP2: + case TargetLibraryInfo::FA_CPUID: + return true; + } + return false; +} + // Override to disable global variable loading on Linux and insert AIX canary // word declaration. void PPCTargetLowering::insertSSPDeclarations(Module &M) const { Index: llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1782,6 +1782,10 @@ PPCTargetStreamer *TS = static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer()); + if (static_cast<const PPCTargetMachine &>(TM).hasGlibcHWCAPAccess()) + OutStreamer->emitSymbolValue( + GetExternalSymbolSymbol("__parse_hwcap_and_convert_at_platform"), + MAI->getCodePointerSize()); emitGNUAttributes(M); if (!TOC.empty()) { Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2736,6 +2736,17 @@ return SDValue(Node, 0); } +/// Create a LOAD_FIXED_ADDR node, and let it carry the target specific global +/// variable if there exists one. +static SDValue getLoadFixedAddr(SelectionDAG &DAG, const SDLoc &DL, + SDValue &Chain, + TargetLibraryInfo::FixedAddrType Type) { + MachineSDNode *Node = + DAG.getMachineNode(TargetOpcode::LOAD_FIXED_ADDR, DL, MVT::i32, + DAG.getTargetConstant(Type, DL, MVT::i32), Chain); + return SDValue(Node, 0); +} + /// Codegen a new tail for a stack protector check ParentMBB which has had its /// tail spliced into a stack protector check success bb. /// @@ -6773,6 +6784,15 @@ DAG.setRoot(Res); return; } + case Intrinsic::fixed_addr_ld: { + SDValue Chain = getRoot(); + uint64_t ConstArg = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(); + Res = getLoadFixedAddr(DAG, sdl, Chain, + (TargetLibraryInfo::FixedAddrType)ConstArg); + DAG.setRoot(Chain); + setValue(&I, Res); + return; + } case Intrinsic::objectsize: llvm_unreachable("llvm.objectsize.* should have been lowered already"); Index: llvm/include/llvm/TargetParser/PPCTargetParser.def =================================================================== --- /dev/null +++ llvm/include/llvm/TargetParser/PPCTargetParser.def @@ -0,0 +1,71 @@ +#ifndef PPC_FEATURE +#define PPC_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN) +#endif +#ifndef PPC_CPU +#define PPC_CPU(NAME, NUM) +#endif + +// PPC_FEATURE(Name, Description, EnumName, BitMask, FA_WORD) +PPC_FEATURE("4xxmac","4xx CPU has a Multiply Accumulator",PPCF_4XXMAC,0x02000000,FA_HWCAP) +PPC_FEATURE("altivec","CPU has a SIMD/Vector Unit",PPCF_ALTIVEC,0x10000000,FA_HWCAP) +PPC_FEATURE("arch_2_05","CPU supports ISA 205 (eg, POWER6)",PPCF_ARCH205,0x00001000,FA_HWCAP) +PPC_FEATURE("arch_2_06","CPU supports ISA 206 (eg, POWER7)",PPCF_ARCH206,0x00000100,FA_HWCAP) +PPC_FEATURE("arch_2_07","CPU supports ISA 207 (eg, POWER8)",PPCF_ARCH207,0x80000000,FA_HWCAP2) +PPC_FEATURE("arch_3_00","CPU supports ISA 30 (eg, POWER9)",PPCF_ARCH30,0x00800000,FA_HWCAP2) +PPC_FEATURE("arch_3_1","CPU supports ISA 31 (eg, POWER10)",PPCF_ARCH31,0x00040000,FA_HWCAP2) +PPC_FEATURE("archpmu","CPU supports the set of compatible performance monitoring events",PPCF_ARCHPMU,0x00000040,FA_HWCAP) +PPC_FEATURE("booke","CPU supports the Embedded ISA category",PPCF_BOOKE,0x00008000,FA_HWCAP) +PPC_FEATURE("cellbe","CPU has a CELL broadband engine",PPCF_CELLBE,0x00010000,FA_HWCAP) +PPC_FEATURE("darn","CPU supports the darn (deliver a random number) instruction",PPCF_DARN,0x00200000,FA_HWCAP2) +PPC_FEATURE("dfp","CPU has a decimal floating point unit",PPCF_DFP,0x00000400,FA_HWCAP) +PPC_FEATURE("dscr","CPU supports the data stream control register",PPCF_DSCR,0x20000000,FA_HWCAP2) +PPC_FEATURE("ebb","CPU supports event base branching",PPCF_EBB,0x10000000,FA_HWCAP2) +PPC_FEATURE("efpdouble","CPU has a SPE double precision floating point unit",PPCF_EFPDOUBLE,0x00200000,FA_HWCAP) +PPC_FEATURE("efpsingle","CPU has a SPE single precision floating point unit",PPCF_EFPSINGLE,0x00400000,FA_HWCAP) +PPC_FEATURE("fpu","CPU has a floating point unit",PPCF_FPU,0x08000000,FA_HWCAP) +PPC_FEATURE("htm","CPU has hardware transaction memory instructions",PPCF_HTM,0x40000000,FA_HWCAP2) +PPC_FEATURE("htm-nosc","Kernel aborts hardware transactions when a syscall is made",PPCF_HTM_NOSC,0x01000000,FA_HWCAP2) +PPC_FEATURE("htm-no-suspend","CPU supports hardware transaction memory but does not support the tsuspend instruction.",PPCF_HTM_NO_SUSPEND,0x00080000,FA_HWCAP2) +PPC_FEATURE("ic_snoop","CPU supports icache snooping capabilities",PPCF_IC_SNOOP,0x00002000,FA_HWCAP) +PPC_FEATURE("ieee128","CPU supports 128-bit IEEE binary floating point instructions",PPCF_IEEE128,0x00400000,FA_HWCAP2) +PPC_FEATURE("isel","CPU supports the integer select instruction",PPCF_ISEL,0x08000000,FA_HWCAP2) +PPC_FEATURE("mma","CPU supports the matrix-multiply assist instructions",PPCF_MMA,0x00020000,FA_HWCAP2) +PPC_FEATURE("mmu","CPU has a memory management unit",PPCF_MMU,0x04000000,FA_HWCAP) +PPC_FEATURE("notb","CPU does not have a timebase (eg, 601 and 403gx)",PPCF_NOTB,0x00100000,FA_HWCAP) +PPC_FEATURE("pa6t","CPU supports the PA Semi 6T CORE ISA",PPCF_PA6T,0x00000800,FA_HWCAP) +PPC_FEATURE("power4","CPU supports ISA 200 (eg, POWER4)",PPCF_POWER4,0x00080000,FA_HWCAP) +PPC_FEATURE("power5","CPU supports ISA 202 (eg, POWER5)",PPCF_POWER5,0x00040000,FA_HWCAP) +PPC_FEATURE("power5+","CPU supports ISA 203 (eg, POWER5+)",PPCF_POWER5P,0x00020000,FA_HWCAP) +PPC_FEATURE("power6x","CPU supports ISA 205 (eg, POWER6) extended opcodes mffgpr and mftgpr.",PPCF_POWER6X,0x00000200,FA_HWCAP) +PPC_FEATURE("ppc32","CPU supports 32-bit mode execution",PPCF_PPC32,0x80000000,FA_HWCAP) +PPC_FEATURE("ppc601","CPU supports the old POWER ISA (eg, 601)",PPCF_PPC601,0x20000000,FA_HWCAP) +PPC_FEATURE("ppc64","CPU supports 64-bit mode execution",PPCF_PPC64,0x40000000,FA_HWCAP) +PPC_FEATURE("ppcle","CPU supports a little-endian mode that uses address swizzling",PPCF_PPCLE,0x00000001,FA_HWCAP) +PPC_FEATURE("scv","Kernel supports system call vectored",PPCF_SCV,0x00100000,FA_HWCAP2) +PPC_FEATURE("smt","CPU support simultaneous multi-threading",PPCF_SMT,0x00004000,FA_HWCAP) +PPC_FEATURE("spe","CPU has a signal processing extension unit",PPCF_SPE,0x00800000,FA_HWCAP) +PPC_FEATURE("tar","CPU supports the target address register",PPCF_TAR,0x04000000,FA_HWCAP2) +PPC_FEATURE("true_le","CPU supports true little-endian mode",PPCF_TRUE_LE,0x00000002,FA_HWCAP) +PPC_FEATURE("ucache","CPU has unified I/D cache",PPCF_UCACHE,0x01000000,FA_HWCAP) +PPC_FEATURE("vcrypto","CPU supports the vector cryptography instructions",PPCF_VCRYPTO,0x02000000,FA_HWCAP2) +PPC_FEATURE("vsx","CPU supports the vector-scalar extension",PPCF_VSX,0x00000080,FA_HWCAP) + +// PPC_CPU(Name, NumericID) +PPC_CPU("power4",32) +PPC_CPU("ppc970",33) +PPC_CPU("power5",34) +PPC_CPU("power5+",35) +PPC_CPU("power6",36) +PPC_CPU("ppc-cell-be",37) +PPC_CPU("power6x",38) +PPC_CPU("power7",39) +PPC_CPU("ppca2",40) +PPC_CPU("ppc405",41) +PPC_CPU("ppc440",42) +PPC_CPU("ppc464",43) +PPC_CPU("ppc476",44) +PPC_CPU("power8",45) +PPC_CPU("power9",46) +PPC_CPU("power10",47) +#undef PPC_FEATURE +#undef PPC_CPU Index: llvm/include/llvm/Target/Target.td =================================================================== --- llvm/include/llvm/Target/Target.td +++ llvm/include/llvm/Target/Target.td @@ -1295,6 +1295,14 @@ let hasSideEffects = false; bit isPseudo = true; } +def LOAD_FIXED_ADDR : StandardPseudoInstruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins i32imm:$a); + let mayLoad = true; + bit isReMaterializable = true; + let hasSideEffects = false; + bit isPseudo = true; +} def PREALLOCATED_SETUP : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$a); Index: llvm/include/llvm/Support/TargetOpcodes.def =================================================================== --- llvm/include/llvm/Support/TargetOpcodes.def +++ llvm/include/llvm/Support/TargetOpcodes.def @@ -145,6 +145,11 @@ /// additionally expand this pseudo after register allocation. HANDLE_TARGET_OPCODE(LOAD_STACK_GUARD) +/// This pseudo-instruction loads a value provided by libc at afixed address. +/// For example, GLIBC provides HWCAP, HWCAP2 and CPUID at a fixed address +/// on some targets (for example PPC Linux). +HANDLE_TARGET_OPCODE(LOAD_FIXED_ADDR) + /// These are used to support call sites that must have the stack adjusted /// before the call (e.g. to initialize an argument passed by value). /// See llvm.call.preallocated.{setup,arg} in the LangRef for more details. Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -900,6 +900,12 @@ def int_stackprotector : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>; def int_stackguard : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], []>; +// Load of a value provided by the system library at a fixed address. Used for +// accessing things like HWCAP word provided by GLIBC. +def int_fixed_addr_ld + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrInaccessibleMemOnly, ImmArg<ArgIndex<0>>]>; + // A cover for instrumentation based profiling. def int_instrprof_cover : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty]>; Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -27,6 +27,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/ComplexDeinterleavingPass.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -85,7 +86,6 @@ class MCExpr; class Module; class ProfileSummaryInfo; -class TargetLibraryInfo; class TargetMachine; class TargetRegisterClass; class TargetRegisterInfo; @@ -5238,6 +5238,12 @@ return false; } + /// If this function returns true, SelectionDAGBuilder emits a + /// LOAD_FIXED_ADDR for the particular value. + virtual bool useLoadFixedAddr(TargetLibraryInfo::FixedAddrType Type) const { + return false; + } + virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, const SDLoc &DL) const { llvm_unreachable("not implemented for this target"); Index: llvm/include/llvm/Analysis/TargetLibraryInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -246,6 +246,16 @@ BitVector OverrideAsUnavailable; public: + /// Possible values provided by the system library at a fixed address. These + /// are values that are set up by the OS, system library or dynamic loader + /// that usually provide information about the CPU/System/Environment within + /// which the program is being executed. + enum FixedAddrType { + FA_HWCAP, // The HWCAP word provided by libc. + FA_HWCAP2, // The HWCAP2 word provided by libc. + FA_CPUID // The CPUID word (or string) provided by libc. + }; + explicit TargetLibraryInfo(const TargetLibraryInfoImpl &Impl, std::optional<const Function *> F = std::nullopt) : Impl(&Impl), OverrideAsUnavailable(NumLibFuncs) { Index: clang/test/Sema/builtin-cpu-supports.c =================================================================== --- clang/test/Sema/builtin-cpu-supports.c +++ clang/test/Sema/builtin-cpu-supports.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -fsyntax-only -triple x86_64-pc-linux-gnu -verify %s -// RUN: %clang_cc1 -fsyntax-only -triple powerpc64le-linux-gnu -verify %s +// RUN: %clang_cc1 -fsyntax-only -triple aarch64-linux-gnu -verify %s extern void a(const char *); @@ -21,11 +21,11 @@ (void)__builtin_cpu_is("x86-64-v3"); // expected-error {{invalid cpu name for builtin}} (void)__builtin_cpu_is("x86-64-v4"); // expected-error {{invalid cpu name for builtin}} #else - if (__builtin_cpu_supports("vsx")) // expected-error {{use of unknown builtin}} - a("vsx"); + if (__builtin_cpu_supports("aes")) // expected-error {{use of unknown builtin}} + a("aes"); - if (__builtin_cpu_is("pwr9")) // expected-error {{use of unknown builtin}} - a("pwr9"); + if (__builtin_cpu_is("cortex-x3")) // expected-error {{use of unknown builtin}} + a("cortex-x3"); #endif return 0; Index: clang/test/CodeGen/builtin-cpu-supports.c =================================================================== --- clang/test/CodeGen/builtin-cpu-supports.c +++ clang/test/CodeGen/builtin-cpu-supports.c @@ -1,32 +1,55 @@ -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm < %s| FileCheck %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm < %s | FileCheck %s \ +// RUN: --check-prefix=CHECK-X86 +// RUN: %clang_cc1 -triple ppc64le-linux-gnu -emit-llvm < %s | FileCheck %s \ +// RUN: --check-prefix=CHECK-PPC +#ifndef __PPC__ // Test that we have the structure definition, the gep offsets, the name of the // global, the bit grab, and the icmp correct. extern void a(const char *); -// CHECK: @__cpu_model = external dso_local global { i32, i32, i32, [1 x i32] } -// CHECK: @__cpu_features2 = external dso_local global i32 +// CHECK-X86: @__cpu_model = external dso_local global { i32, i32, i32, [1 x i32] } +// CHECK-X86: @__cpu_features2 = external dso_local global i32 int main(void) { __builtin_cpu_init(); - // CHECK: call void @__cpu_indicator_init + // CHECK-X86: call void @__cpu_indicator_init if (__builtin_cpu_supports("sse4.2")) a("sse4.2"); - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0) - // CHECK: [[AND:%[^ ]+]] = and i32 [[LOAD]], 256 - // CHECK: = icmp eq i32 [[AND]], 256 +// CHECK-X86: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0) +// CHECK-X86: [[AND:%[^ ]+]] = and i32 [[LOAD]], 256 +// CHECK-X86: = icmp eq i32 [[AND]], 256 if (__builtin_cpu_supports("gfni")) a("gfni"); - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr @__cpu_features2 - // CHECK: [[AND:%[^ ]+]] = and i32 [[LOAD]], 1 - // CHECK: = icmp eq i32 [[AND]], 1 +// CHECK-X86: [[LOAD:%[^ ]+]] = load i32, ptr @__cpu_features2 +// CHECK-X86: [[AND:%[^ ]+]] = and i32 [[LOAD]], 1 +// CHECK-X86: = icmp eq i32 [[AND]], 1 return 0; } -// CHECK: declare dso_local void @__cpu_indicator_init() +// CHECK-X86: declare dso_local void @__cpu_indicator_init() +#else +int test(int a) { +// CHECK-PPC: [[CPUSUP:%[^ ]+]] = call i32 @llvm.fixed.addr.ld(i32 1) +// CHECK-PPC: [[AND:%[^ ]+]] = and i32 [[CPUSUP]], 8388608 +// CHECK-PPC: icmp ne i32 [[AND]], 0 +// CHECK-PPC: [[CPUSUP2:%[^ ]+]] = call i32 @llvm.fixed.addr.ld(i32 0) +// CHECK-PPC: [[AND2:%[^ ]+]] = and i32 [[CPUSUP2]], 67108864 +// CHECK-PPC: icmp ne i32 [[AND2]], 0 +// CHECK-PPC: [[CPUID:%[^ ]+]] = call i32 @llvm.fixed.addr.ld(i32 2) +// CHECK-PPC: icmp eq i32 [[CPUID]], 39 + if (__builtin_cpu_supports("arch_3_00")) // HWCAP2 + return a; + else if (__builtin_cpu_supports("mmu")) // HWCAP + return a - 5; + else if (__builtin_cpu_is("power7")) // CPUID + return a + a; + return a + 5; +} +#endif Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -2061,6 +2061,66 @@ return false; } +/// SemaBuiltinCpuSupports - Handle __builtin_cpu_supports(char *). +/// This checks that the target supports __builtin_cpu_supports and +/// that the string argument is constant and valid. +static bool SemaBuiltinCpuSupports(Sema &S, const TargetInfo &TI, + const TargetInfo *AuxTI, CallExpr *TheCall) { + Expr *Arg = TheCall->getArg(0); + + const TargetInfo *TheTI = nullptr; + if (TI.supportsCpuSupports()) + TheTI = &TI; + else if (AuxTI && AuxTI->supportsCpuSupports()) + TheTI = AuxTI; + else + return S.Diag(TheCall->getBeginLoc(), diag::warn_builtin_unknown) + << "__builtin_cpu_supports"; + + // Check if the argument is a string literal. + if (!isa<StringLiteral>(Arg->IgnoreParenImpCasts())) + return S.Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal) + << Arg->getSourceRange(); + + // Check the contents of the string. + StringRef Feature = + cast<StringLiteral>(Arg->IgnoreParenImpCasts())->getString(); + if (!TheTI->validateCpuSupports(Feature)) + return S.Diag(TheCall->getBeginLoc(), diag::err_invalid_cpu_supports) + << Arg->getSourceRange(); + return false; +} + +/// SemaBuiltinCpuIs - Handle __builtin_cpu_is(char *). +/// This checks that the target supports __builtin_cpu_is and +/// that the string argument is constant and valid. +static bool SemaBuiltinCpuIs(Sema &S, const TargetInfo &TI, + const TargetInfo *AuxTI, CallExpr *TheCall) { + Expr *Arg = TheCall->getArg(0); + + const TargetInfo *TheTI = nullptr; + if (TI.supportsCpuIs()) + TheTI = &TI; + else if (AuxTI && AuxTI->supportsCpuIs()) + TheTI = AuxTI; + else + return S.Diag(TheCall->getBeginLoc(), diag::warn_builtin_unknown) + << "__builtin_cpu_is"; + + // Check if the argument is a string literal. + if (!isa<StringLiteral>(Arg->IgnoreParenImpCasts())) + return S.Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal) + << Arg->getSourceRange(); + + // Check the contents of the string. + StringRef Feature = + cast<StringLiteral>(Arg->IgnoreParenImpCasts())->getString(); + if (!TheTI->validateCpuIs(Feature)) + return S.Diag(TheCall->getBeginLoc(), diag::err_invalid_cpu_is) + << Arg->getSourceRange(); + return false; +} + ExprResult Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, CallExpr *TheCall) { @@ -2088,6 +2148,16 @@ } switch (BuiltinID) { + case Builtin::BI__builtin_cpu_supports: + if (SemaBuiltinCpuSupports(*this, Context.getTargetInfo(), + Context.getAuxTargetInfo(), TheCall)) + return ExprError(); + break; + case Builtin::BI__builtin_cpu_is: + if (SemaBuiltinCpuIs(*this, Context.getTargetInfo(), + Context.getAuxTargetInfo(), TheCall)) + return ExprError(); + break; case Builtin::BI__builtin___CFStringMakeConstantString: // CFStringMakeConstantString is currently not implemented for GOFF (i.e., // on z/OS) and for XCOFF (i.e., on AIX). Emit unsupported @@ -4838,47 +4908,6 @@ return false; } -/// SemaBuiltinCpuSupports - Handle __builtin_cpu_supports(char *). -/// This checks that the target supports __builtin_cpu_supports and -/// that the string argument is constant and valid. -static bool SemaBuiltinCpuSupports(Sema &S, const TargetInfo &TI, - CallExpr *TheCall) { - Expr *Arg = TheCall->getArg(0); - - // Check if the argument is a string literal. - if (!isa<StringLiteral>(Arg->IgnoreParenImpCasts())) - return S.Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal) - << Arg->getSourceRange(); - - // Check the contents of the string. - StringRef Feature = - cast<StringLiteral>(Arg->IgnoreParenImpCasts())->getString(); - if (!TI.validateCpuSupports(Feature)) - return S.Diag(TheCall->getBeginLoc(), diag::err_invalid_cpu_supports) - << Arg->getSourceRange(); - return false; -} - -/// SemaBuiltinCpuIs - Handle __builtin_cpu_is(char *). -/// This checks that the target supports __builtin_cpu_is and -/// that the string argument is constant and valid. -static bool SemaBuiltinCpuIs(Sema &S, const TargetInfo &TI, CallExpr *TheCall) { - Expr *Arg = TheCall->getArg(0); - - // Check if the argument is a string literal. - if (!isa<StringLiteral>(Arg->IgnoreParenImpCasts())) - return S.Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal) - << Arg->getSourceRange(); - - // Check the contents of the string. - StringRef Feature = - cast<StringLiteral>(Arg->IgnoreParenImpCasts())->getString(); - if (!TI.validateCpuIs(Feature)) - return S.Diag(TheCall->getBeginLoc(), diag::err_invalid_cpu_is) - << Arg->getSourceRange(); - return false; -} - // Check if the rounding mode is legal. bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) { // Indicates if this instruction has rounding control or just SAE. @@ -5353,12 +5382,6 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall) { - if (BuiltinID == X86::BI__builtin_cpu_supports) - return SemaBuiltinCpuSupports(*this, TI, TheCall); - - if (BuiltinID == X86::BI__builtin_cpu_is) - return SemaBuiltinCpuIs(*this, TI, TheCall); - // Check for 32-bit only builtins on a 64-bit target. const llvm::Triple &TT = TI.getTriple(); if (TT.getArch() != llvm::Triple::x86 && isX86_32Builtin(BuiltinID)) Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -13190,9 +13191,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { - if (BuiltinID == X86::BI__builtin_cpu_is) + if (BuiltinID == Builtin::BI__builtin_cpu_is) return EmitX86CpuIs(E); - if (BuiltinID == X86::BI__builtin_cpu_supports) + if (BuiltinID == Builtin::BI__builtin_cpu_supports) return EmitX86CpuSupports(E); if (BuiltinID == X86::BI__builtin_cpu_init) return EmitX86CpuInit(); @@ -15692,6 +15693,38 @@ // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we // call __builtin_readcyclecounter. + case Builtin::BI__builtin_cpu_is: { + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); + unsigned NumCPUID = StringSwitch<unsigned>(CPUStr) +#define PPC_CPU(Name, NumericID) .Case(Name, NumericID) +#include "llvm/TargetParser/PPCTargetParser.def" + .Default(-1U); + Value *Op0 = + llvm::ConstantInt::get(Int32Ty, (unsigned)TargetLibraryInfo::FA_CPUID); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::fixed_addr_ld); + Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is"); + return Builder.CreateICmpEQ(TheCall, + llvm::ConstantInt::get(Int32Ty, NumCPUID)); + } + case Builtin::BI__builtin_cpu_supports: { + unsigned FeatureWord; + unsigned BitMask; + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString(); + std::tie(FeatureWord, BitMask) = + StringSwitch<std::pair<unsigned, unsigned>>(CPUStr) +#define PPC_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \ + .Case(Name, {(unsigned)TargetLibraryInfo::FA_WORD, Bitmask}) +#include "llvm/TargetParser/PPCTargetParser.def" + .Default({0, 0}); + Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::fixed_addr_ld); + Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports"); + Value *Mask = + Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask)); + return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty)); + } case PPC::BI__builtin_ppc_get_timebase: return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); Index: clang/lib/Basic/Targets/X86.h =================================================================== --- clang/lib/Basic/Targets/X86.h +++ clang/lib/Basic/Targets/X86.h @@ -206,6 +206,10 @@ return RegName.equals("esp") || RegName.equals("rsp"); } + bool supportsCpuSupports() const override { return true; } + + bool supportsCpuIs() const override { return true; } + bool validateCpuSupports(StringRef FeatureStr) const override; bool validateCpuIs(StringRef FeatureStr) const override; Index: clang/lib/Basic/Targets/PPC.h =================================================================== --- clang/lib/Basic/Targets/PPC.h +++ clang/lib/Basic/Targets/PPC.h @@ -356,6 +356,12 @@ bool isSPRegName(StringRef RegName) const override { return RegName.equals("r1") || RegName.equals("x1"); } + // We support __builtin_cpu_supports/__builtin_cpu_is on targets that + // have GLIBC since it is GLIBC that provides the HWCAP[2] in the auxv. + bool supportsCpuSupports() const override { return getTriple().isOSGlibc(); } + bool supportsCpuIs() const override { return getTriple().isOSGlibc(); } + bool validateCpuSupports(StringRef Feature) const override; + bool validateCpuIs(StringRef Name) const override; }; class LLVM_LIBRARY_VISIBILITY PPC32TargetInfo : public PPCTargetInfo { Index: clang/lib/Basic/Targets/PPC.cpp =================================================================== --- clang/lib/Basic/Targets/PPC.cpp +++ clang/lib/Basic/Targets/PPC.cpp @@ -858,3 +858,17 @@ return llvm::ArrayRef(BuiltinInfo, clang::PPC::LastTSBuiltin - Builtin::FirstTSBuiltin); } + +bool PPCTargetInfo::validateCpuSupports(StringRef FeatureStr) const { +#define PPC_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN) .Case(NAME, true) + return llvm::StringSwitch<bool>(FeatureStr) +#include "llvm/TargetParser/PPCTargetParser.def" + .Default(false); +} + +bool PPCTargetInfo::validateCpuIs(StringRef CPUName) const { +#define PPC_CPU(NAME, NUM) .Case(NAME, true) + return llvm::StringSwitch<bool>(CPUName) +#include "llvm/TargetParser/PPCTargetParser.def" + .Default(false); +} Index: clang/include/clang/Basic/TargetInfo.h =================================================================== --- clang/include/clang/Basic/TargetInfo.h +++ clang/include/clang/Basic/TargetInfo.h @@ -1398,6 +1398,11 @@ return getTriple().isOSBinFormatELF() && !getTriple().isOSFuchsia(); } + // Identify whether this target supports __builtin_cpu_supports and + // __builtin_cpu_is. + virtual bool supportsCpuSupports() const { return false; } + virtual bool supportsCpuIs() const { return false; } + // Validate the contents of the __builtin_cpu_supports(const char*) // argument. virtual bool validateCpuSupports(StringRef Name) const { return false; } Index: clang/include/clang/Basic/BuiltinsX86.def =================================================================== --- clang/include/clang/Basic/BuiltinsX86.def +++ clang/include/clang/Basic/BuiltinsX86.def @@ -26,12 +26,9 @@ # define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANG, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif -// Miscellaneous builtin for checking x86 cpu features. // TODO: Make this somewhat generic so that other backends // can use it? BUILTIN(__builtin_cpu_init, "v", "n") -BUILTIN(__builtin_cpu_supports, "bcC*", "nc") -BUILTIN(__builtin_cpu_is, "bcC*", "nc") // Undefined Values // Index: clang/include/clang/Basic/Builtins.def =================================================================== --- clang/include/clang/Basic/Builtins.def +++ clang/include/clang/Basic/Builtins.def @@ -116,6 +116,10 @@ # define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) #endif +// Builtins for checking CPU features based on the GCC builtins. +BUILTIN(__builtin_cpu_supports, "bcC*", "nc") +BUILTIN(__builtin_cpu_is, "bcC*", "nc") + // Standard libc/libm functions: BUILTIN(__builtin_atan2 , "ddd" , "Fne") BUILTIN(__builtin_atan2f, "fff" , "Fne")
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits