https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/92338
>From 41fbc18c7a4a26b11bc4b772bbe2e384ad9d9dbc Mon Sep 17 00:00:00 2001 From: Freddy Ye <freddy...@intel.com> Date: Fri, 10 May 2024 16:29:55 +0800 Subject: [PATCH 1/5] [X86] Support EGPR for inline assembly. "jR": explictly enables EGPR "r": enables/disables EGPR w/wo -mapx-inline-asm-use-gpr32 -mapx-inline-asm-use-gpr32 will also define a new Macro: __APX_INLINE_ASM_USE_GPR32__ --- clang/include/clang/Driver/Options.td | 2 + clang/lib/Basic/Targets/X86.cpp | 26 +++++++++ clang/lib/Basic/Targets/X86.h | 1 + clang/lib/Driver/ToolChains/Arch/X86.cpp | 2 + .../Driver/x86-apx-inline-asm-use-gpr32.cpp | 3 + clang/test/Preprocessor/x86_target_features.c | 3 + llvm/lib/Target/X86/X86.td | 3 + llvm/lib/Target/X86/X86ISelLowering.cpp | 57 +++++++++++++++++-- .../CodeGen/X86/inline-asm-jR-constraint.ll | 19 +++++++ .../CodeGen/X86/inline-asm-r-constraint.ll | 16 ++++++ 10 files changed, 127 insertions(+), 5 deletions(-) create mode 100644 clang/test/Driver/x86-apx-inline-asm-use-gpr32.cpp create mode 100644 llvm/test/CodeGen/X86/inline-asm-jR-constraint.ll create mode 100644 llvm/test/CodeGen/X86/inline-asm-r-constraint.ll diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 73a2518480e9b..20a7c482bbf06 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6281,6 +6281,8 @@ def mno_apx_features_EQ : CommaJoined<["-"], "mno-apx-features=">, Group<m_x86_F // we will add it to -mapxf. def mapxf : Flag<["-"], "mapxf">, Alias<mapx_features_EQ>, AliasArgs<["egpr","push2pop2","ppx", "ndd"]>; def mno_apxf : Flag<["-"], "mno-apxf">, Alias<mno_apx_features_EQ>, AliasArgs<["egpr","push2pop2","ppx","ndd"]>; +def mapx_inline_asm_use_gpr32 : Flag<["-"], "mapx-inline-asm-use-gpr32">, Group<m_Group>, + HelpText<"Enable use of GPR32 in inline assembly for APX">; } // let Flags = [TargetSpecific] // VE feature flags diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 67e2126cf766b..9e61b6e6d6441 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -450,6 +450,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasFullBFloat16 = true; } else if (Feature == "+egpr") { HasEGPR = true; + } else if (Feature == "+inline-asm-use-gpr32") { + HasInlineAsmUseGPR32 = true; } else if (Feature == "+push2pop2") { HasPush2Pop2 = true; } else if (Feature == "+ppx") { @@ -974,6 +976,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, // Condition here is aligned with the feature set of mapxf in Options.td if (HasEGPR && HasPush2Pop2 && HasPPX && HasNDD) Builder.defineMacro("__APX_F__"); + if (HasInlineAsmUseGPR32) + Builder.defineMacro("__APX_INLINE_ASM_USE_GPR32__"); // Each case falls through to the previous one here. switch (SSELevel) { @@ -1493,6 +1497,15 @@ bool X86TargetInfo::validateAsmConstraint( case 'C': // SSE floating point constant. case 'G': // x87 floating point constant. return true; + case 'j': + Name++; + switch (*Name) { + default: + return false; + case 'R': + Info.setAllowsRegister(); + return true; + } case '@': // CC condition changes. if (auto Len = matchAsmCCConstraint(Name)) { @@ -1764,6 +1777,19 @@ std::string X86TargetInfo::convertConstraint(const char *&Constraint) const { // to the next constraint. return std::string("^") + std::string(Constraint++, 2); } + case 'j': + switch (Constraint[1]) { + default: + // Break from inner switch and fall through (copy single char), + // continue parsing after copying the current constraint into + // the return string. + break; + case 'R': + // "^" hints llvm that this is a 2 letter constraint. + // "Constraint++" is used to promote the string iterator + // to the next constraint. + return std::string("^") + std::string(Constraint++, 2); + } [[fallthrough]]; default: return std::string(1, *Constraint); diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index c14e4d5f433d8..69c68ee80f3ba 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -174,6 +174,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { bool HasNDD = false; bool HasCCMP = false; bool HasCF = false; + bool HasInlineAsmUseGPR32 = false; protected: llvm::X86::CPUKind CPU = llvm::X86::CK_None; diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp index 53e26a9f8e229..085ff4824a9b0 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -309,4 +309,6 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, Features.push_back("+prefer-no-gather"); if (Args.hasArg(options::OPT_mno_scatter)) Features.push_back("+prefer-no-scatter"); + if (Args.hasArg(options::OPT_mapx_inline_asm_use_gpr32)) + Features.push_back("+inline-asm-use-gpr32"); } diff --git a/clang/test/Driver/x86-apx-inline-asm-use-gpr32.cpp b/clang/test/Driver/x86-apx-inline-asm-use-gpr32.cpp new file mode 100644 index 0000000000000..a45140d96e66c --- /dev/null +++ b/clang/test/Driver/x86-apx-inline-asm-use-gpr32.cpp @@ -0,0 +1,3 @@ +/// Tests -mapx-inline-asm-use-gpr32 +// RUN: %clang -target x86_64-unknown-linux-gnu -c -mapx-inline-asm-use-gpr32 -### %s 2>&1 | FileCheck --check-prefix=GPR32 %s +// GPR32: "-target-feature" "+inline-asm-use-gpr32" diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c index 5602c59158fe5..7fe19598fbd4e 100644 --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -811,3 +811,6 @@ // NDD: #define __NDD__ 1 // PPX: #define __PPX__ 1 // PUSH2POP2: #define __PUSH2POP2__ 1 + +// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-inline-asm-use-gpr32 -x c -E -dM -o - %s | FileCheck --check-prefixes=USEGPR32 %s +// USEGPR32: #define __APX_INLINE_ASM_USE_GPR32__ 1 diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 9f5b58d78fcce..90a0c878a1101 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -353,6 +353,9 @@ def FeatureCCMP : SubtargetFeature<"ccmp", "HasCCMP", "true", "Support conditional cmp & test instructions">; def FeatureCF : SubtargetFeature<"cf", "HasCF", "true", "Support conditional faulting">; +def FeatureUseGPR32InInlineAsm + : SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "false", + "Enable use of GPR32 in inline assembly for APX">; // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka // "string operations"). See "REP String Enhancement" in the Intel Software diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 0410cc33ca337..2f381a3fc117c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -57574,6 +57574,13 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const { case '2': return C_RegisterClass; } + case 'j': + switch (Constraint[1]) { + default: + break; + case 'R': + return C_RegisterClass; + } } } else if (parseConstraintCode(Constraint) != X86::COND_INVALID) return C_Other; @@ -57653,6 +57660,18 @@ X86TargetLowering::getSingleConstraintMatchWeight( break; } break; + case 'j': + if (StringRef(Constraint).size() != 2) + break; + switch (Constraint[1]) { + default: + return CW_Invalid; + case 'R': + if (CallOperandVal->getType()->isIntegerTy()) + Wt = CW_SpecificReg; + break; + } + break; case 'v': if ((Ty->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()) Wt = CW_Register; @@ -58015,16 +58034,28 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &X86::GR64_ABCDRegClass); break; case 'r': // GENERAL_REGS - case 'l': // INDEX_REGS + case 'l': // INDEX_REGS{} + if (Subtarget.useInlineAsmGPR32()) { + if (VT == MVT::i8 || VT == MVT::i1) + return std::make_pair(0U, &X86::GR8_NOREX2RegClass); + if (VT == MVT::i16) + return std::make_pair(0U, &X86::GR16_NOREX2RegClass); + if (VT == MVT::i32 || VT == MVT::f32 || + (!VT.isVector() && !Subtarget.is64Bit())) + return std::make_pair(0U, &X86::GR32_NOREX2RegClass); + if (VT != MVT::f80 && !VT.isVector()) + return std::make_pair(0U, &X86::GR64_NOREX2RegClass); + break; + } if (VT == MVT::i8 || VT == MVT::i1) - return std::make_pair(0U, &X86::GR8_NOREX2RegClass); + return std::make_pair(0U, &X86::GR8RegClass); if (VT == MVT::i16) - return std::make_pair(0U, &X86::GR16_NOREX2RegClass); + return std::make_pair(0U, &X86::GR16RegClass); if (VT == MVT::i32 || VT == MVT::f32 || (!VT.isVector() && !Subtarget.is64Bit())) - return std::make_pair(0U, &X86::GR32_NOREX2RegClass); + return std::make_pair(0U, &X86::GR32RegClass); if (VT != MVT::f80 && !VT.isVector()) - return std::make_pair(0U, &X86::GR64_NOREX2RegClass); + return std::make_pair(0U, &X86::GR64RegClass); break; case 'R': // LEGACY_REGS if (VT == MVT::i8 || VT == MVT::i1) @@ -58248,6 +58279,22 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, } break; } + } else if (Constraint.size() == 2 && Constraint[0] == 'j') { + switch (Constraint[1]) { + default: + break; + case 'R': + if (VT == MVT::i8 || VT == MVT::i1) + return std::make_pair(0U, &X86::GR8RegClass); + if (VT == MVT::i16) + return std::make_pair(0U, &X86::GR16RegClass); + if (VT == MVT::i32 || VT == MVT::f32 || + (!VT.isVector() && !Subtarget.is64Bit())) + return std::make_pair(0U, &X86::GR32RegClass); + if (VT != MVT::f80 && !VT.isVector()) + return std::make_pair(0U, &X86::GR64RegClass); + break; + } } if (parseConstraintCode(Constraint) != X86::COND_INVALID) diff --git a/llvm/test/CodeGen/X86/inline-asm-jR-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-jR-constraint.ll new file mode 100644 index 0000000000000..2348e75433798 --- /dev/null +++ b/llvm/test/CodeGen/X86/inline-asm-jR-constraint.ll @@ -0,0 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: not llc -mtriple=x86_64 %s 2>&1 | FileCheck %s --check-prefix=ERR +; RUN: llc -mtriple=x86_64 -mattr=+egpr < %s | FileCheck %s --check-prefix=EGPR +; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s --check-prefix=EGPRUSEGPR32 + +; ERR: error: inline assembly requires more registers than available + +define void @constraint_jR_test() nounwind "frame-pointer"="all" { +; EGPR-LABEL: constraint_jR_test: +; EGPR: addq %r16, %rax +; +; EGPRUSEGPR32-LABEL: constraint_jR_test: +; EGPRUSEGPR32: addq %r16, %rax +entry: + %reg = alloca i64, align 8 + %0 = load i64, ptr %reg, align 8 + call void asm sideeffect "add $0, %rax", "^jR,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0) + ret void +} diff --git a/llvm/test/CodeGen/X86/inline-asm-r-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-r-constraint.ll new file mode 100644 index 0000000000000..158dcdf9be272 --- /dev/null +++ b/llvm/test/CodeGen/X86/inline-asm-r-constraint.ll @@ -0,0 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: not llc -mtriple=x86_64 < %s 2>&1 | FileCheck %s --check-prefix=ERR +; RUN: not llc -mtriple=x86_64 -mattr=+egpr < %s 2>&1 | FileCheck %s --check-prefix=ERR +; RUN: llc -mtriple=x86_64 -mattr=+egpr,+inline-asm-use-gpr32 < %s | FileCheck %s --check-prefix=USEGPR32 + +; ERR: error: inline assembly requires more registers than available + +define void @constraint_r_test() nounwind "frame-pointer"="all" { +; USEGPR32-LABEL: constraint_r_test: +; USEGPR32: addq %r16, %rax +entry: + %reg = alloca i64, align 8 + %0 = load i64, ptr %reg, align 8 + call void asm sideeffect "add $0, %rax", "r,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0) + ret void +} >From c1daf204376b734bb2b527d646769d807f6b668c Mon Sep 17 00:00:00 2001 From: Freddy Ye <freddy...@intel.com> Date: Thu, 16 May 2024 11:15:03 +0800 Subject: [PATCH 2/5] refine --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2f381a3fc117c..5c7fd89299e9d 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58034,7 +58034,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, return std::make_pair(0U, &X86::GR64_ABCDRegClass); break; case 'r': // GENERAL_REGS - case 'l': // INDEX_REGS{} + case 'l': // INDEX_REGS if (Subtarget.useInlineAsmGPR32()) { if (VT == MVT::i8 || VT == MVT::i1) return std::make_pair(0U, &X86::GR8_NOREX2RegClass); >From 9f43b45b2f5985e7d6de5b2130710427dbf50ab9 Mon Sep 17 00:00:00 2001 From: Freddy Ye <freddy...@intel.com> Date: Thu, 16 May 2024 14:52:15 +0800 Subject: [PATCH 3/5] clang-format --- llvm/lib/Target/X86/X86ISelLowering.cpp | 26 ++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5c7fd89299e9d..36466876163c3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -58281,19 +58281,19 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, } } else if (Constraint.size() == 2 && Constraint[0] == 'j') { switch (Constraint[1]) { - default: - break; - case 'R': - if (VT == MVT::i8 || VT == MVT::i1) - return std::make_pair(0U, &X86::GR8RegClass); - if (VT == MVT::i16) - return std::make_pair(0U, &X86::GR16RegClass); - if (VT == MVT::i32 || VT == MVT::f32 || - (!VT.isVector() && !Subtarget.is64Bit())) - return std::make_pair(0U, &X86::GR32RegClass); - if (VT != MVT::f80 && !VT.isVector()) - return std::make_pair(0U, &X86::GR64RegClass); - break; + default: + break; + case 'R': + if (VT == MVT::i8 || VT == MVT::i1) + return std::make_pair(0U, &X86::GR8RegClass); + if (VT == MVT::i16) + return std::make_pair(0U, &X86::GR16RegClass); + if (VT == MVT::i32 || VT == MVT::f32 || + (!VT.isVector() && !Subtarget.is64Bit())) + return std::make_pair(0U, &X86::GR32RegClass); + if (VT != MVT::f80 && !VT.isVector()) + return std::make_pair(0U, &X86::GR64RegClass); + break; } } >From 1fdd2f23a05f679d8777176970217a68543a4a62 Mon Sep 17 00:00:00 2001 From: Freddy Ye <freddy...@intel.com> Date: Fri, 17 May 2024 09:26:37 +0800 Subject: [PATCH 4/5] add egpr restric to the new option. --- clang/lib/Basic/Targets/X86.cpp | 2 +- clang/test/Preprocessor/x86_target_features.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 9e61b6e6d6441..0114974637f7c 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -976,7 +976,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, // Condition here is aligned with the feature set of mapxf in Options.td if (HasEGPR && HasPush2Pop2 && HasPPX && HasNDD) Builder.defineMacro("__APX_F__"); - if (HasInlineAsmUseGPR32) + if (HasEGPR && HasInlineAsmUseGPR32) Builder.defineMacro("__APX_INLINE_ASM_USE_GPR32__"); // Each case falls through to the previous one here. diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c index 7fe19598fbd4e..d2603fde01786 100644 --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -812,5 +812,7 @@ // PPX: #define __PPX__ 1 // PUSH2POP2: #define __PUSH2POP2__ 1 -// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-inline-asm-use-gpr32 -x c -E -dM -o - %s | FileCheck --check-prefixes=USEGPR32 %s +// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-inline-asm-use-gpr32 -x c -E -dM -o - %s | FileCheck --check-prefixes=NOUSEGPR32 %s +// RUN: %clang -target x86_64-unknown-unknown -march=x86-64 -mapx-features=egpr -mapx-inline-asm-use-gpr32 -x c -E -dM -o - %s | FileCheck --check-prefixes=USEGPR32 %s +// NOUSEGPR32-NOT: #define __APX_INLINE_ASM_USE_GPR32__ 1 // USEGPR32: #define __APX_INLINE_ASM_USE_GPR32__ 1 >From 4d1ad3090416cda320c88f1ddc0937b5749e64b4 Mon Sep 17 00:00:00 2001 From: Freddy Ye <freddy...@intel.com> Date: Tue, 21 May 2024 10:21:08 +0800 Subject: [PATCH 5/5] address comments. --- .../X86/{ => apx}/inline-asm-jR-constraint.ll | 0 .../X86/{ => apx}/inline-asm-r-constraint.ll | 13 +++++++++++++ 2 files changed, 13 insertions(+) rename llvm/test/CodeGen/X86/{ => apx}/inline-asm-jR-constraint.ll (100%) rename llvm/test/CodeGen/X86/{ => apx}/inline-asm-r-constraint.ll (63%) diff --git a/llvm/test/CodeGen/X86/inline-asm-jR-constraint.ll b/llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll similarity index 100% rename from llvm/test/CodeGen/X86/inline-asm-jR-constraint.ll rename to llvm/test/CodeGen/X86/apx/inline-asm-jR-constraint.ll diff --git a/llvm/test/CodeGen/X86/inline-asm-r-constraint.ll b/llvm/test/CodeGen/X86/apx/inline-asm-r-constraint.ll similarity index 63% rename from llvm/test/CodeGen/X86/inline-asm-r-constraint.ll rename to llvm/test/CodeGen/X86/apx/inline-asm-r-constraint.ll index 158dcdf9be272..955372dbc5f20 100644 --- a/llvm/test/CodeGen/X86/inline-asm-r-constraint.ll +++ b/llvm/test/CodeGen/X86/apx/inline-asm-r-constraint.ll @@ -14,3 +14,16 @@ entry: call void asm sideeffect "add $0, %rax", "r,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0) ret void } + +define void @constraint_jR_test() nounwind "frame-pointer"="all" { +; EGPR-LABEL: constraint_jR_test: +; EGPR: addq %r16, %rax +; +; EGPRUSEGPR32-LABEL: constraint_jR_test: +; EGPRUSEGPR32: addq %r16, %rax +entry: + %reg = alloca i64, align 8 + %0 = load i64, ptr %reg, align 8 + call void asm sideeffect "add $0, %rax", "^jR,~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rsi},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"(i64 %0) + ret void +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits