r340390 - [clang-tblgen] Add -print-records and -dump-json modes.
Author: statham Date: Wed Aug 22 02:20:39 2018 New Revision: 340390 URL: http://llvm.org/viewvc/llvm-project?rev=340390&view=rev Log: [clang-tblgen] Add -print-records and -dump-json modes. Currently, if clang-tblgen is run without a mode option, it defaults to the first mode in its 'enum Action', which happens to be -gen-clang-attr-classes. I think it makes more sense for it to behave the same way as llvm-tblgen, i.e. print a diagnostic dump if it's not given any more specific instructions. I've also added the same -dump-json that llvm-tblgen supports. This means any tblgen command line (whether llvm- or clang-) can be mechanically turned into one that processes the same input into JSON. Reviewers: nhaehnle Reviewed By: nhaehnle Subscribers: cfe-commits Differential Revision: https://reviews.llvm.org/D50771 Modified: cfe/trunk/utils/TableGen/TableGen.cpp Modified: cfe/trunk/utils/TableGen/TableGen.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/TableGen.cpp?rev=340390&r1=340389&r2=340390&view=diff == --- cfe/trunk/utils/TableGen/TableGen.cpp (original) +++ cfe/trunk/utils/TableGen/TableGen.cpp Wed Aug 22 02:20:39 2018 @@ -23,6 +23,8 @@ using namespace llvm; using namespace clang; enum ActionType { + PrintRecords, + DumpJSON, GenClangAttrClasses, GenClangAttrParserStringSwitches, GenClangAttrSubjectMatchRulesParserStringSwitches, @@ -66,6 +68,10 @@ namespace { cl::opt Action( cl::desc("Action to perform:"), cl::values( +clEnumValN(PrintRecords, "print-records", + "Print all records to stdout (default)"), +clEnumValN(DumpJSON, "dump-json", + "Dump all records as machine-readable JSON"), clEnumValN(GenClangAttrClasses, "gen-clang-attr-classes", "Generate clang attribute clases"), clEnumValN(GenClangAttrParserStringSwitches, @@ -164,6 +170,12 @@ ClangComponent("clang-component", bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) { switch (Action) { + case PrintRecords: +OS << Records; // No argument, dump all contents +break; + case DumpJSON: +EmitJSON(Records, OS); +break; case GenClangAttrClasses: EmitClangAttrClass(Records, OS); break; ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r333513 - Support __iso_volatile_load8 etc on aarch64-win32.
Author: statham Date: Wed May 30 00:54:05 2018 New Revision: 333513 URL: http://llvm.org/viewvc/llvm-project?rev=333513&view=rev Log: Support __iso_volatile_load8 etc on aarch64-win32. These intrinsics are used by MSVC's header files on AArch64 Windows as well as AArch32, so we should support them for both targets. I've factored them out of CodeGenFunction::EmitARMBuiltinExpr into separate functions that EmitAArch64BuiltinExpr can call as well. Reviewers: javed.absar, mstorsjo Reviewed By: mstorsjo Subscribers: kristof.beyls, cfe-commits Differential Revision: https://reviews.llvm.org/D47476 Added: cfe/trunk/test/CodeGen/ms-volatile-aarch64.c (with props) Modified: cfe/trunk/include/clang/Basic/BuiltinsAArch64.def cfe/trunk/lib/CodeGen/CGBuiltin.cpp cfe/trunk/lib/CodeGen/CodeGenFunction.h Modified: cfe/trunk/include/clang/Basic/BuiltinsAArch64.def URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAArch64.def?rev=333513&r1=333512&r2=333513&view=diff == --- cfe/trunk/include/clang/Basic/BuiltinsAArch64.def (original) +++ cfe/trunk/include/clang/Basic/BuiltinsAArch64.def Wed May 30 00:54:05 2018 @@ -69,5 +69,15 @@ LANGBUILTIN(__dmb, "vUi", "nc", ALL_MS_L LANGBUILTIN(__dsb, "vUi", "nc", ALL_MS_LANGUAGES) LANGBUILTIN(__isb, "vUi", "nc", ALL_MS_LANGUAGES) +// MSVC intrinsics for volatile but non-acquire/release loads and stores +LANGBUILTIN(__iso_volatile_load8, "ccCD*", "n", ALL_MS_LANGUAGES) +LANGBUILTIN(__iso_volatile_load16, "ssCD*", "n", ALL_MS_LANGUAGES) +LANGBUILTIN(__iso_volatile_load32, "iiCD*", "n", ALL_MS_LANGUAGES) +LANGBUILTIN(__iso_volatile_load64, "LLiLLiCD*", "n", ALL_MS_LANGUAGES) +LANGBUILTIN(__iso_volatile_store8, "vcD*c", "n", ALL_MS_LANGUAGES) +LANGBUILTIN(__iso_volatile_store16, "vsD*s", "n", ALL_MS_LANGUAGES) +LANGBUILTIN(__iso_volatile_store32, "viD*i", "n", ALL_MS_LANGUAGES) +LANGBUILTIN(__iso_volatile_store64, "vLLiD*LLi", "n", ALL_MS_LANGUAGES) + #undef BUILTIN #undef LANGBUILTIN Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=333513&r1=333512&r2=333513&view=diff == --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original) +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed May 30 00:54:05 2018 @@ -5179,6 +5179,34 @@ static bool HasExtraNeonArgument(unsigne return true; } +Value *CodeGenFunction::EmitISOVolatileLoad(const CallExpr *E) { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + QualType ElTy = E->getArg(0)->getType()->getPointeeType(); + CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); + llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), + LoadSize.getQuantity() * 8); + Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); + llvm::LoadInst *Load = +Builder.CreateAlignedLoad(Ptr, LoadSize); + Load->setVolatile(true); + return Load; +} + +Value *CodeGenFunction::EmitISOVolatileStore(const CallExpr *E) { + Value *Ptr = EmitScalarExpr(E->getArg(0)); + Value *Value = EmitScalarExpr(E->getArg(1)); + QualType ElTy = E->getArg(0)->getType()->getPointeeType(); + CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy); + llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), + StoreSize.getQuantity() * 8); + Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); + llvm::StoreInst *Store = +Builder.CreateAlignedStore(Value, Ptr, + StoreSize); + Store->setVolatile(true); + return Store; +} + Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch) { @@ -5421,35 +5449,13 @@ Value *CodeGenFunction::EmitARMBuiltinEx case ARM::BI__iso_volatile_load8: case ARM::BI__iso_volatile_load16: case ARM::BI__iso_volatile_load32: - case ARM::BI__iso_volatile_load64: { -Value *Ptr = EmitScalarExpr(E->getArg(0)); -QualType ElTy = E->getArg(0)->getType()->getPointeeType(); -CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy); -llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(), - LoadSize.getQuantity() * 8); -Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo()); -llvm::LoadInst *Load = - Builder.CreateAlignedLoad(Ptr, LoadSize); -Load->setVolatile(true); -return Load; - } + case ARM::BI__iso_volatile_load64: +return EmitISOVolatileLoad(E); case ARM::BI__iso_volatile_store8: case ARM::BI__iso_volatile_store16: case ARM::BI__iso_volatile_store32: - case ARM::BI__iso_volatile_store64: { -Value *Ptr = EmitScalarExpr(E->getArg(0)); -Value *Value = EmitScalarExpr(E->getArg(1
r362380 - [ARM] Fix recent breakage of -mfpu=none.
Author: statham Date: Mon Jun 3 04:02:53 2019 New Revision: 362380 URL: http://llvm.org/viewvc/llvm-project?rev=362380&view=rev Log: [ARM] Fix recent breakage of -mfpu=none. The recent change D60691 introduced a bug in clang when handling option combinations such as `-mcpu=cortex-m4 -mfpu=none`. Those options together should select Cortex-M4 but disable all use of hardware FP, but in fact, now hardware FP instructions can still be generated in that mode. The reason is because the handling of FPUVersion::NONE disables all the same feature names it used to, of which the base one is `vfp2`. But now there are further features below that, like `vfp2d16fp` and (following D60694) `fpregs`, which also need to be turned off to disable hardware FP completely. Added a tiny test which double-checks that compiling a simple FP function doesn't access the FP registers. Reviewers: SjoerdMeijer, dmgreen Reviewed By: dmgreen Subscribers: lebedev.ri, javed.absar, kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D62729 Added: cfe/trunk/test/CodeGen/arm-mfpu-none.c (with props) Modified: cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp cfe/trunk/test/Driver/arm-mfpu.c Modified: cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp?rev=362380&r1=362379&r2=362380&view=diff == --- cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp Mon Jun 3 04:02:53 2019 @@ -430,8 +430,8 @@ fp16_fml_fallthrough: llvm::ARM::getFPUFeatures(llvm::ARM::FK_NONE, Features); // Disable hardware FP features which have been enabled. -// FIXME: Disabling vfp2 and neon should be enough as all the other -//features are dependent on these 2 features in LLVM. However +// FIXME: Disabling fpregs should be enough all by itself, since all +//the other FP features are dependent on it. However //there is currently no easy way to test this in clang, so for //now just be explicit and disable all known dependent features //as well. @@ -439,6 +439,11 @@ fp16_fml_fallthrough: "neon", "crypto", "dotprod", "fp16fml"}) if (std::find(std::begin(Features), std::end(Features), "+" + Feature) != std::end(Features)) Features.push_back(Args.MakeArgString("-" + Feature)); + +// Disable the base feature unconditionally, even if it was not +// explicitly in the features list (e.g. if we had +vfp3, which +// implies it). +Features.push_back("-fpregs"); } // En/disable crc code generation. Added: cfe/trunk/test/CodeGen/arm-mfpu-none.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-mfpu-none.c?rev=362380&view=auto == --- cfe/trunk/test/CodeGen/arm-mfpu-none.c (added) +++ cfe/trunk/test/CodeGen/arm-mfpu-none.c Mon Jun 3 04:02:53 2019 @@ -0,0 +1,8 @@ +// REQUIRES: arm-registered-target +// RUN: %clang -target arm-none-eabi -mcpu=cortex-m4 -mfpu=none -S -o - %s | FileCheck %s + +// CHECK-LABEL: compute +// CHECK-NOT: {{s[0-9]}} +float compute(float a, float b) { + return (a+b) * (a-b); +} Propchange: cfe/trunk/test/CodeGen/arm-mfpu-none.c -- svn:eol-style = native Propchange: cfe/trunk/test/CodeGen/arm-mfpu-none.c -- svn:keywords = Rev Date Author URL Id Modified: cfe/trunk/test/Driver/arm-mfpu.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/arm-mfpu.c?rev=362380&r1=362379&r2=362380&view=diff == --- cfe/trunk/test/Driver/arm-mfpu.c (original) +++ cfe/trunk/test/Driver/arm-mfpu.c Mon Jun 3 04:02:53 2019 @@ -318,6 +318,7 @@ // RUN: | FileCheck --check-prefix=CHECK-NO-FP %s // CHECK-NO-FP-NOT: "-target-feature" "+soft-float" // CHECK-NO-FP: "-target-feature" "+soft-float-abi" +// CHECK-NO-FP: "-target-feature" "-fpregs" // CHECK-NO-FP: "-target-feature" "-vfp2" // CHECK-NO-FP: "-target-feature" "-vfp3" // CHECK-NO-FP: "-target-feature" "-vfp4" @@ -363,6 +364,7 @@ // CHECK-SOFT-ABI-FP: "-target-feature" "-fp-armv8" // CHECK-SOFT-ABI-FP: "-target-feature" "-neon" // CHECK-SOFT-ABI-FP: "-target-feature" "-crypto" +// CHECK-SOFT-ABI-FP: "-target-feature" "-fpregs" // RUN: %clang -target arm-linux-androideabi21 %s -### -c 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-ARM5-ANDROID-FP-DEFAULT %s ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r362791 - [ARM] Fix bugs introduced by the fp64/d32 rework.
Author: statham Date: Fri Jun 7 05:42:54 2019 New Revision: 362791 URL: http://llvm.org/viewvc/llvm-project?rev=362791&view=rev Log: [ARM] Fix bugs introduced by the fp64/d32 rework. Change D60691 caused some knock-on failures that weren't caught by the existing tests. Firstly, selecting a CPU that should have had a restricted FPU (e.g. `-mcpu=cortex-m4`, which should have 16 d-regs and no double precision) could give the unrestricted version, because `ARM::getFPUFeatures` returned a list of features including subtracted ones (here `-fp64`,`-d32`), but `ARMTargetInfo::initFeatureMap` threw away all the ones that didn't start with `+`. Secondly, the preprocessor macros didn't reliably match the actual compilation settings: for example, `-mfpu=softvfp` could still set `__ARM_FP` as if hardware FP was available, because the list of features on the cc1 command line would include things like `+vfp4`,`-vfp4d16` and clang didn't realise that one of those cancelled out the other. I've fixed both of these issues by rewriting `ARM::getFPUFeatures` so that it returns a list that enables every FP-related feature compatible with the selected FPU and disables every feature not compatible, which is more verbose but means clang doesn't have to understand the dependency relationships between the backend features. Meanwhile, `ARMTargetInfo::handleTargetFeatures` is testing for all the various forms of the FP feature names, so that it won't miss cases where it should have set `HW_FP` to feed into feature test macros. That in turn caused an ordering problem when handling `-mcpu=foo+bar` together with `-mfpu=something_that_turns_off_bar`. To fix that, I've arranged that the `+bar` suffixes on the end of `-mcpu` and `-march` cause feature names to be put into a separate vector which is concatenated after the output of `getFPUFeatures`. Another side effect of all this is to fix a bug where `clang -target armv8-eabi` by itself would fail to set `__ARM_FEATURE_FMA`, even though `armv8` (aka Arm v8-A) implies FP-Armv8 which has FMA. That was because `HW_FP` was being set to a value including only the `FPARMV8` bit, but that feature test macro was testing only the `VFP4FPU` bit. Now `HW_FP` ends up with all the bits set, so it gives the right answer. Changes to tests included in this patch: * `arm-target-features.c`: I had to change basically all the expected results. (The Cortex-M4 test in there should function as a regression test for the accidental double-precision bug.) * `arm-mfpu.c`, `armv8.1m.main.c`: switched to using `CHECK-DAG` everywhere so that those tests are no longer sensitive to the order of cc1 feature options on the command line. * `arm-acle-6.5.c`: been updated to expect the right answer to that FMA test. * `Preprocessor/arm-target-features.c`: added a regression test for the `mfpu=softvfp` issue. Reviewers: SjoerdMeijer, dmgreen, ostannard, samparker, JamesNagurne Reviewed By: ostannard Subscribers: srhines, javed.absar, kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D62998 Modified: cfe/trunk/lib/Basic/Targets/ARM.cpp cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp cfe/trunk/test/CodeGen/arm-target-features.c cfe/trunk/test/Driver/arm-mfpu.c cfe/trunk/test/Driver/armv8.1m.main.c cfe/trunk/test/Preprocessor/arm-acle-6.5.c cfe/trunk/test/Preprocessor/arm-target-features.c Modified: cfe/trunk/lib/Basic/Targets/ARM.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/ARM.cpp?rev=362791&r1=362790&r2=362791&view=diff == --- cfe/trunk/lib/Basic/Targets/ARM.cpp (original) +++ cfe/trunk/lib/Basic/Targets/ARM.cpp Fri Jun 7 05:42:54 2019 @@ -408,18 +408,30 @@ bool ARMTargetInfo::handleTargetFeatures SoftFloat = true; } else if (Feature == "+soft-float-abi") { SoftFloatABI = true; -} else if (Feature == "+vfp2") { +} else if (Feature == "+vfp2sp" || Feature == "+vfp2d16sp" || + Feature == "+vfp2" || Feature == "+vfp2d16") { FPU |= VFP2FPU; HW_FP |= HW_FP_SP; -} else if (Feature == "+vfp3") { + if (Feature == "+vfp2" || Feature == "+vfp2d16") + HW_FP |= HW_FP_DP; +} else if (Feature == "+vfp3sp" || Feature == "+vfp3d16sp" || + Feature == "+vfp3" || Feature == "+vfp3d16") { FPU |= VFP3FPU; HW_FP |= HW_FP_SP; -} else if (Feature == "+vfp4") { + if (Feature == "+vfp3" || Feature == "+vfp3d16") + HW_FP |= HW_FP_DP; +} else if (Feature == "+vfp4sp" || Feature == "+vfp4d16sp" || + Feature == "+vfp4" || Feature == "+vfp4d16") { FPU |= VFP4FPU; HW_FP |= HW_FP_SP | HW_FP_HP; -} else if (Feature == "+fp-armv8") { + if (Feature == "+vfp4" || Feature == "+vfp4d16") + HW_FP |= HW_FP_DP; +} else if (Feature == "+fp-armv8sp" || Feature == "+fp-ar
r364331 - [ARM] Support inline assembler constraints for MVE.
Author: statham Date: Tue Jun 25 09:49:32 2019 New Revision: 364331 URL: http://llvm.org/viewvc/llvm-project?rev=364331&view=rev Log: [ARM] Support inline assembler constraints for MVE. "To" selects an odd-numbered GPR, and "Te" an even one. There are some 8.1-M instructions that have one too few bits in their register fields and require registers of particular parity, without necessarily using a consecutive even/odd pair. Also, the constraint letter "t" should select an MVE q-register, when MVE is present. This didn't need any source changes, but some extra tests have been added. Reviewers: dmgreen, samparker, SjoerdMeijer Subscribers: javed.absar, eraman, kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D60709 Modified: cfe/trunk/lib/Basic/Targets/ARM.cpp cfe/trunk/test/CodeGen/arm-asm.c Modified: cfe/trunk/lib/Basic/Targets/ARM.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/ARM.cpp?rev=364331&r1=364330&r2=364331&view=diff == --- cfe/trunk/lib/Basic/Targets/ARM.cpp (original) +++ cfe/trunk/lib/Basic/Targets/ARM.cpp Tue Jun 25 09:49:32 2019 @@ -900,6 +900,16 @@ bool ARMTargetInfo::validateAsmConstrain case 'Q': // A memory address that is a single base register. Info.setAllowsMemory(); return true; + case 'T': +switch (Name[1]) { +default: + break; +case 'e': // Even general-purpose register +case 'o': // Odd general-purpose register + Info.setAllowsRegister(); + Name++; + return true; +} case 'U': // a memory reference... switch (Name[1]) { case 'q': // ...ARMV4 ldrsb @@ -923,6 +933,7 @@ std::string ARMTargetInfo::convertConstr std::string R; switch (*Constraint) { case 'U': // Two-character constraint; add "^" hint for later parsing. + case 'T': R = std::string("^") + std::string(Constraint, 2); Constraint++; break; Modified: cfe/trunk/test/CodeGen/arm-asm.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-asm.c?rev=364331&r1=364330&r2=364331&view=diff == --- cfe/trunk/test/CodeGen/arm-asm.c (original) +++ cfe/trunk/test/CodeGen/arm-asm.c Tue Jun 25 09:49:32 2019 @@ -6,3 +6,21 @@ int t1() { __asm__ volatile ("flds s15, %[k] \n" :: [k] "Uv" (k) : "s15"); return 0; } + +// CHECK-LABEL: @even_reg_constraint_Te +int even_reg_constraint_Te(void) { + int acc = 0; + // CHECK: vaddv{{.*\^Te}} + asm("vaddv.s8 %0, Q0" + : "+Te" (acc)); + return acc; +} + +// CHECK-LABEL: @odd_reg_constraint_To +int odd_reg_constraint_To(void) { + int eacc = 0, oacc = 0; + // CHECK: vaddlv{{.*\^To}} + asm("vaddlv.s8 %0, %1, Q0" + : "+Te" (eacc), "+To" (oacc)); + return oacc; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r361845 - [ARM] Replace fp-only-sp and d16 with fp64 and d32.
Author: statham Date: Tue May 28 09:13:20 2019 New Revision: 361845 URL: http://llvm.org/viewvc/llvm-project?rev=361845&view=rev Log: [ARM] Replace fp-only-sp and d16 with fp64 and d32. Those two subtarget features were awkward because their semantics are reversed: each one indicates the _lack_ of support for something in the architecture, rather than the presence. As a consequence, you don't get the behavior you want if you combine two sets of feature bits. Each SubtargetFeature for an FP architecture version now comes in four versions, one for each combination of those options. So you can still say (for example) '+vfp2' in a feature string and it will mean what it's always meant, but there's a new string '+vfp2d16sp' meaning the version without those extra options. A lot of this change is just mechanically replacing positive checks for the old features with negative checks for the new ones. But one more interesting change is that I've rearranged getFPUFeatures() so that the main FPU feature is appended to the output list *before* rather than after the features derived from the Restriction field, so that -fp64 and -d32 can override defaults added by the main feature. Reviewers: dmgreen, samparker, SjoerdMeijer Subscribers: srhines, javed.absar, eraman, kristof.beyls, hiraditya, zzheng, Petar.Avramovic, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D60691 Modified: cfe/trunk/lib/Basic/Targets/ARM.cpp cfe/trunk/test/CodeGen/arm-target-features.c cfe/trunk/test/Driver/arm-mfpu.c Modified: cfe/trunk/lib/Basic/Targets/ARM.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/ARM.cpp?rev=361845&r1=361844&r2=361845&view=diff == --- cfe/trunk/lib/Basic/Targets/ARM.cpp (original) +++ cfe/trunk/lib/Basic/Targets/ARM.cpp Tue May 28 09:13:20 2019 @@ -400,8 +400,7 @@ bool ARMTargetInfo::handleTargetFeatures HasFloat16 = true; // This does not diagnose illegal cases like having both - // "+vfpv2" and "+vfpv3" or having "+neon" and "+fp-only-sp". - uint32_t HW_FP_remove = 0; + // "+vfpv2" and "+vfpv3" or having "+neon" and "-fp64". for (const auto &Feature : Features) { if (Feature == "+soft-float") { SoftFloat = true; @@ -409,19 +408,19 @@ bool ARMTargetInfo::handleTargetFeatures SoftFloatABI = true; } else if (Feature == "+vfp2") { FPU |= VFP2FPU; - HW_FP |= HW_FP_SP | HW_FP_DP; + HW_FP |= HW_FP_SP; } else if (Feature == "+vfp3") { FPU |= VFP3FPU; - HW_FP |= HW_FP_SP | HW_FP_DP; + HW_FP |= HW_FP_SP; } else if (Feature == "+vfp4") { FPU |= VFP4FPU; - HW_FP |= HW_FP_SP | HW_FP_DP | HW_FP_HP; + HW_FP |= HW_FP_SP | HW_FP_HP; } else if (Feature == "+fp-armv8") { FPU |= FPARMV8; - HW_FP |= HW_FP_SP | HW_FP_DP | HW_FP_HP; + HW_FP |= HW_FP_SP | HW_FP_HP; } else if (Feature == "+neon") { FPU |= NeonFPU; - HW_FP |= HW_FP_SP | HW_FP_DP; + HW_FP |= HW_FP_SP; } else if (Feature == "+hwdiv") { HWDiv |= HWDivThumb; } else if (Feature == "+hwdiv-arm") { @@ -432,8 +431,8 @@ bool ARMTargetInfo::handleTargetFeatures Crypto = 1; } else if (Feature == "+dsp") { DSP = 1; -} else if (Feature == "+fp-only-sp") { - HW_FP_remove |= HW_FP_DP; +} else if (Feature == "+fp64") { + HW_FP |= HW_FP_DP; } else if (Feature == "+8msecext") { if (CPUProfile != "M" || ArchVersion != 8) { Diags.Report(diag::err_target_unsupported_mcmse) << CPU; @@ -449,7 +448,6 @@ bool ARMTargetInfo::handleTargetFeatures DotProd = true; } } - HW_FP &= ~HW_FP_remove; switch (ArchVersion) { case 6: Modified: cfe/trunk/test/CodeGen/arm-target-features.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-target-features.c?rev=361845&r1=361844&r2=361845&view=diff == --- cfe/trunk/test/CodeGen/arm-target-features.c (original) +++ cfe/trunk/test/CodeGen/arm-target-features.c Tue May 28 09:13:20 2019 @@ -1,23 +1,23 @@ // REQUIRES: arm-registered-target // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a8 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3 -// CHECK-VFP3: "target-features"="+armv7-a,+dsp,+neon,+thumb-mode,+vfp3" +// CHECK-VFP3: "target-features"="+armv7-a,+d32,+dsp,+fp64,+neon,+thumb-mode,+vfp3" // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a5 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4 -// CHECK-VFP4: "target-features"="+armv7-a,+dsp,+neon,+thumb-mode,+vfp4" +// CHECK-VFP4: "target-features"="+armv7-a,+d32,+dsp,+fp64,+neon,+thumb-mode,+vfp4" // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a7 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV // RUN
r373744 - [clang] Prevent false positives in arm-mfpu-none codegen test.
Author: statham Date: Fri Oct 4 06:01:41 2019 New Revision: 373744 URL: http://llvm.org/viewvc/llvm-project?rev=373744&view=rev Log: [clang] Prevent false positives in arm-mfpu-none codegen test. A user pointed out to me in private email that this test will fail if it sees the letter 's' followed by a digit in any part of clang's assembly output after the function label. That includes the .ident at the end, which can include a full pathname or hostname or both from the system clang was built on. So if that path or hostname includes any text like 's5' then it will cause the test to fail. Fixed by adding a check for `.fnend`, to limit the scope of the `CHECK-NOT` to only the actual generated code for the test function. (Committed without prior review on the basis that it's a simple and obvious pure test-suite fix and also in a test I contributed myself.) Modified: cfe/trunk/test/CodeGen/arm-mfpu-none.c Modified: cfe/trunk/test/CodeGen/arm-mfpu-none.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-mfpu-none.c?rev=373744&r1=373743&r2=373744&view=diff == --- cfe/trunk/test/CodeGen/arm-mfpu-none.c (original) +++ cfe/trunk/test/CodeGen/arm-mfpu-none.c Fri Oct 4 06:01:41 2019 @@ -3,6 +3,7 @@ // CHECK-LABEL: compute // CHECK-NOT: {{s[0-9]}} +// CHECK: .fnend float compute(float a, float b) { return (a+b) * (a-b); } ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
r375001 - [Driver,ARM] Make -mfloat-abi=soft turn off MVE.
Author: statham Date: Wed Oct 16 06:23:39 2019 New Revision: 375001 URL: http://llvm.org/viewvc/llvm-project?rev=375001&view=rev Log: [Driver,ARM] Make -mfloat-abi=soft turn off MVE. Since `-mfloat-abi=soft` is taken to mean turning off all uses of the FP registers, it should turn off the MVE vector instructions as well as NEON and scalar FP. But it wasn't doing so. So the options `-march=armv8.1-m.main+mve.fp+fp.dp -mfloat-abi=soft` would cause the underlying LLVM to //not// support MVE (because it knows the real target feature relationships and turned off MVE when the `fpregs` feature was removed), but the clang layer still thought it //was// supported, and would misleadingly define the feature macro `__ARM_FEATURE_MVE`. The ARM driver code already has a long list of feature names to turn off when `-mfloat-abi=soft` is selected. The fix is to add the missing entries `mve` and `mve.fp` to that list. Reviewers: dmgreen Subscribers: kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D69025 Modified: cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp cfe/trunk/test/Driver/arm-mfpu.c Modified: cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp?rev=375001&r1=375000&r2=375001&view=diff == --- cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp (original) +++ cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp Wed Oct 16 06:23:39 2019 @@ -465,6 +465,7 @@ fp16_fml_fallthrough: "vfp4", "vfp4sp", "vfp4d16", "vfp4d16sp", "fp-armv8", "fp-armv8sp", "fp-armv8d16", "fp-armv8d16sp", "fullfp16", "neon", "crypto", "dotprod", "fp16fml", +"mve", "mve.fp", "fp64", "d32", "fpregs"}) Features.push_back(Args.MakeArgString("-" + Feature)); } Modified: cfe/trunk/test/Driver/arm-mfpu.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/arm-mfpu.c?rev=375001&r1=375000&r2=375001&view=diff == --- cfe/trunk/test/Driver/arm-mfpu.c (original) +++ cfe/trunk/test/Driver/arm-mfpu.c Wed Oct 16 06:23:39 2019 @@ -397,3 +397,9 @@ // CHECK-ARM7-ANDROID-FP-D16-NOT: "-target-feature" "+fp-armv8" // CHECK-ARM7-ANDROID-FP-D16-NOT: "-target-feature" "+neon" // CHECK-ARM7-ANDROID-FP-D16-NOT: "-target-feature" "+crypto" + +// RUN: %clang -target arm-none-none-eabi %s -march=armv8.1-m.main+mve.fp+fp.dp -mfloat-abi=soft -### -c 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-SOFTFLOATABI-INHIBITS-MVE %s +// CHECK-SOFTFLOATABI-INHIBITS-MVE-NOT: "-target-feature" "+mve" +// CHECK-SOFTFLOATABI-INHIBITS-MVE-DAG: "-target-feature" "-mve" +// CHECK-SOFTFLOATABI-INHIBITS-MVE-DAG: "-target-feature" "-mve.fp" ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 24ef631 - Fix file-ordering nit in D67161.
Author: Simon Tatham Date: 2019-10-25T09:22:07+01:00 New Revision: 24ef631f4333120abd6b66c1e8466a582b60779f URL: https://github.com/llvm/llvm-project/commit/24ef631f4333120abd6b66c1e8466a582b60779f DIFF: https://github.com/llvm/llvm-project/commit/24ef631f4333120abd6b66c1e8466a582b60779f.diff LOG: Fix file-ordering nit in D67161. Re-sorted the module names in clang/utils/TableGen/CMakeLists.txt back into alphabetical order. Added: Modified: clang/utils/TableGen/CMakeLists.txt Removed: diff --git a/clang/utils/TableGen/CMakeLists.txt b/clang/utils/TableGen/CMakeLists.txt index 407cf8a57f9a..c685a2c0c076 100644 --- a/clang/utils/TableGen/CMakeLists.txt +++ b/clang/utils/TableGen/CMakeLists.txt @@ -13,8 +13,8 @@ add_tablegen(clang-tblgen CLANG ClangOptionDocEmitter.cpp ClangSACheckersEmitter.cpp ClangTypeNodesEmitter.cpp - NeonEmitter.cpp MveEmitter.cpp + NeonEmitter.cpp TableGen.cpp ) set_target_properties(clang-tblgen PROPERTIES FOLDER "Clang tablegenning") ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 11ce19d - [clang] Switch arm-mve-intrinsics tests to use %clang_cc1.
Author: Simon Tatham Date: 2019-10-25T12:00:38+01:00 New Revision: 11ce19d2119e0870b2bf53eb23d215aa83cd5540 URL: https://github.com/llvm/llvm-project/commit/11ce19d2119e0870b2bf53eb23d215aa83cd5540 DIFF: https://github.com/llvm/llvm-project/commit/11ce19d2119e0870b2bf53eb23d215aa83cd5540.diff LOG: [clang] Switch arm-mve-intrinsics tests to use %clang_cc1. It isn't really necessary for them to run the clang driver, and it's more efficient not to (and also more stable against driver changes). Now they invoke cc1 directly, more like the analogous NEON tests. Reviewers: dmgreen Subscribers: kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D69426 Added: Modified: clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c clang/test/CodeGen/arm-mve-intrinsics/vadc.c clang/test/CodeGen/arm-mve-intrinsics/vaddq.c clang/test/CodeGen/arm-mve-intrinsics/vcvt.c clang/test/CodeGen/arm-mve-intrinsics/vld24.c clang/test/CodeGen/arm-mve-intrinsics/vldr.c clang/test/CodeGen/arm-mve-intrinsics/vminvq.c Removed: diff --git a/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c b/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c index ec9a47f18eb9..0eead7a973f0 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c @@ -1,5 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -fno-discard-value-names -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vadc.c b/clang/test/CodeGen/arm-mve-intrinsics/vadc.c index 6b77eac9ca54..58a47fc42bcb 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vadc.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vadc.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -fno-discard-value-names -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone -fno-discard-value-names -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c index 970ac53cefc6..1f18d5b57880 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -fno-discard-value-names -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone -fno-discard-value-names -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #include diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c index 1aae36619dfa..ed3ecd3ee62e 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c @@ -1,5 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp -mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -fno-discard-value-names -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s #inc
[clang] 9e6f19f - Fix missing build dependency on omp_gen.
Author: Simon Tatham Date: 2020-07-02T09:16:15+01:00 New Revision: 9e6f19fd8390d39a0351941da1582f888d18c369 URL: https://github.com/llvm/llvm-project/commit/9e6f19fd8390d39a0351941da1582f888d18c369 DIFF: https://github.com/llvm/llvm-project/commit/9e6f19fd8390d39a0351941da1582f888d18c369.diff LOG: Fix missing build dependency on omp_gen. Summary: `include/llvm/Frontend/OpenMP/CMakeLists.txt` creates a new target called `omp_gen` which builds the generated include file `OMP.h.inc`. This target must therefore be a dependency of every compilation step whose transitive #include dependencies contain `OMP.h.inc`, or else it's possible for builds to fail if Ninja (or make or whatever) schedules that compilation step before building `OMP.h.inc` at all. A few of those dependencies are currently missing, which leads to intermittent build failures, depending on the order that Ninja (or whatever) happens to schedule its commands. As far as I can see, compiles in `clang/lib/CodeGen`, `clang/lib/Frontend`, and `clang/examples` all depend transitivily on `OMP.h.inc` (usually via `clang/AST/AST.h`), but don't have the formal dependency in the ninja graph. Adding `omp_gen` to the dependencies of `clang-tablegen-targets` seems to be the way to get the missing dependency into the `clang/examples` subdirectory. This also fixes the other two clang subdirectories, as far as I can see. Reviewers: clementval, thakis, chandlerc, jdoerfert Reviewed By: clementval Subscribers: cfe-commits, jdenny, mgorny, sstefan1, llvm-commits Tags: #llvm, #clang Differential Revision: https://reviews.llvm.org/D82659 Added: Modified: clang/CMakeLists.txt Removed: diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 5a5e34aacbeb..83c30528499c 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -517,7 +517,10 @@ add_subdirectory(include) # All targets below may depend on all tablegen'd files. get_property(CLANG_TABLEGEN_TARGETS GLOBAL PROPERTY CLANG_TABLEGEN_TARGETS) -add_custom_target(clang-tablegen-targets DEPENDS ${CLANG_TABLEGEN_TARGETS}) +add_custom_target(clang-tablegen-targets + DEPENDS + omp_gen + ${CLANG_TABLEGEN_TARGETS}) set_target_properties(clang-tablegen-targets PROPERTIES FOLDER "Misc") list(APPEND LLVM_COMMON_DEPENDS clang-tablegen-targets) ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] ed0e4c7 - [clang][ARM] Add name-mangling test for direct __fp16 arguments.
Author: Simon Tatham Date: 2020-08-03T13:30:50+01:00 New Revision: ed0e4c70c99d3afd87fb202ab03bda40512677e7 URL: https://github.com/llvm/llvm-project/commit/ed0e4c70c99d3afd87fb202ab03bda40512677e7 DIFF: https://github.com/llvm/llvm-project/commit/ed0e4c70c99d3afd87fb202ab03bda40512677e7.diff LOG: [clang][ARM] Add name-mangling test for direct __fp16 arguments. `clang/test/CodeGenCXX/fp16-mangle.cpp` tests pointers to __fp16, but if you give the `-fallow-half-arguments-and-returns` option, then clang can also leave an __fp16 unmodified as a function argument or return type. This regression test checks the name-mangling of that. Reviewed By: miyuki Differential Revision: https://reviews.llvm.org/D85010 Added: clang/test/CodeGenCXX/fp16-mangle-arg-return.cpp Modified: Removed: diff --git a/clang/test/CodeGenCXX/fp16-mangle-arg-return.cpp b/clang/test/CodeGenCXX/fp16-mangle-arg-return.cpp new file mode 100644 index ..15214e13ad8a --- /dev/null +++ b/clang/test/CodeGenCXX/fp16-mangle-arg-return.cpp @@ -0,0 +1,16 @@ +// RUN: %clang_cc1 -emit-llvm -o - -triple arm-arm-none-eabi -fallow-half-arguments-and-returns %s | FileCheck %s +// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-arm-none-eabi -fallow-half-arguments-and-returns %s | FileCheck %s + +// Test name-mangling of __fp16 passed directly as a function argument +// (when that is permitted). + +// CHECK: define {{.*}}void @_Z13fp16_argumentDh(half %{{.*}}) +void fp16_argument(__fp16 arg) {} + +// Test name-mangling of __fp16 as a return type. The return type of +// fp16_return itself isn't mentioned in the mangled name, so to test +// this, we have to pass it a function pointer and make __fp16 the +// return type of that. + +// CHECK: define {{.*}}void @_Z11fp16_returnPFDhvE(half ()* %{{.*}}) +void fp16_return(__fp16 (*func)(void)) {} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 1d78294 - [Sema][BFloat] Forbid arithmetic on vectors of bfloat.
Author: Simon Tatham Date: 2020-08-07T11:25:19+01:00 New Revision: 1d782942500b2cbc9765ccf16264bb498850cefb URL: https://github.com/llvm/llvm-project/commit/1d782942500b2cbc9765ccf16264bb498850cefb DIFF: https://github.com/llvm/llvm-project/commit/1d782942500b2cbc9765ccf16264bb498850cefb.diff LOG: [Sema][BFloat] Forbid arithmetic on vectors of bfloat. Vectors of bfloat are a storage format only; you're supposed to explicitly convert them to a wider type to do arithmetic on them. But currently, if you write something like bfloat16x4_t test(bfloat16x4_t a, bfloat16x4_t b) { return a + b; } then the clang frontend accepts it without error, and (ARM or AArch64) isel fails to generate code for it. Added a rule in Sema that forbids the attempt from even being made, and tests that check it. In particular, we also outlaw arithmetic between vectors of bfloat and any other vector type. Patch by Luke Cheeseman. Reviewed By: LukeGeeson Differential Revision: https://reviews.llvm.org/D85009 Added: Modified: clang/lib/Sema/SemaExpr.cpp clang/test/Sema/arm-bfloat.cpp Removed: diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 4931cf46cffd..b681c930b2a7 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -9789,6 +9789,10 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS, const VectorType *RHSVecType = RHSType->getAs(); assert(LHSVecType || RHSVecType); + if ((LHSVecType && LHSVecType->getElementType()->isBFloat16Type()) || + (RHSVecType && RHSVecType->getElementType()->isBFloat16Type())) +return InvalidOperands(Loc, LHS, RHS); + // AltiVec-style "vector bool op vector bool" combinations are allowed // for some operators but not others. if (!AllowBothBool && diff --git a/clang/test/Sema/arm-bfloat.cpp b/clang/test/Sema/arm-bfloat.cpp index f7ee3c596eb8..ce3fc44baa39 100644 --- a/clang/test/Sema/arm-bfloat.cpp +++ b/clang/test/Sema/arm-bfloat.cpp @@ -27,3 +27,21 @@ void test(bool b) { fp16 = bf16; // expected-error {{assigning to '__fp16' from incompatible type '__bf16'}} bf16 + (b ? fp16 : bf16); // expected-error {{incompatible operand types ('__fp16' and '__bf16')}} } + +#include + +void test_vector(bfloat16x4_t a, bfloat16x4_t b, float16x4_t c) { + a + b; // expected-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}} + a - b; // expected-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}} + a * b; // expected-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}} + a / b; // expected-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}} + + a + c; // expected-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'float16x4_t' (vector of 4 'float16_t' values))}} + a - c; // expected-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'float16x4_t' (vector of 4 'float16_t' values))}} + a * c; // expected-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'float16x4_t' (vector of 4 'float16_t' values))}} + a / c; // expected-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'float16x4_t' (vector of 4 'float16_t' values))}} + c + b; // expected-error {{invalid operands to binary expression ('float16x4_t' (vector of 4 'float16_t' values) and 'bfloat16x4_t' (vector of 4 'bfloat16_t' values))}} + c - b; // expected-error {{invalid operands to binary expression ('float16x4_t' (vector of 4 'float16_t' values) and 'bfloat16x4_t' (vector of 4 'bfloat16_t' values))}} + c * b; // expected-error {{invalid operands to binary expression ('float16x4_t' (vector of 4 'float16_t' values) and 'bfloat16x4_t' (vector of 4 'bfloat16_t' values))}} + c / b; // expected-error {{invalid operands to binary expression ('float16x4_t' (vector of 4 'float16_t' values) and 'bfloat16x4_t' (vector of 4 'bfloat16_t' values))}} +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
statham-arm wrote: @petrhosek, do you have any further comments? I'll merge this change based on @MaskRay's approval if I haven't heard back in another week. https://github.com/llvm/llvm-project/pull/69447 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
statham-arm wrote: > my only concern is to make sure we don't unintentionally make it harder to > integrate potential future extensions such as the mutually dependent groups. Hmmm. So if you had both ME and MD groups, you might also need a _group_ to be able to be a member of another group? That way you could specify hierarchies such as "must have all of: A, B, and exactly one of C,D" (a MD group one of whose members is a ME group), or "must have at most one of: (all of A,B,C) or (all of U,V,W)" (a ME group containing MD groups). I suppose that makes sense, and the only change it needs to your structure is that maybe later a group record might also need to have a `Group:` or `Parent:` header. But there's no need to put that part in now, only to make sure there's room to add it in future if needed. Would you accept `Type: Exclusive` instead of `Exclusive: True`? It seems more plausible to me that there might be three kinds of group that _can't_ go together than three group-type flags that you can have in any combination. > although that may not necessarily be a bad thing since you could also warn if > someone accidentally tries to use a group that wasn't previously defined > (e.g. when making a typo). That is true. https://github.com/llvm/llvm-project/pull/69447 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
https://github.com/statham-arm updated https://github.com/llvm/llvm-project/pull/69447 >From 2a65ae75e8c8e62e7275a439849837919599e896 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 14 Sep 2023 14:51:17 +0100 Subject: [PATCH 1/4] [Driver] Add ExclusiveGroup feature to multilib.yaml. This allows a YAML-based multilib configuration to specify explicitly that a subset of its library directories are alternatives to each other, i.e. at most one of that subset should be selected. So if you have multiple sysroots each including a full set of headers and libraries, you can mark them as members of the same ExclusiveGroup, and then you'll be sure that only one of them is selected, even if two or more are compatible with the compile options. This is particularly important in multilib setups including the libc++ headers, where selecting the include directories from two different sysroots can cause an actual build failure. This occurs when including , for example: libc++'s stdio.h is included first, and will try to use `#include_next` to fetch the underlying libc's version. But if there are two include directories from separate multilibs, then both of their C++ include directories will end up on the include path first, followed by both the C directories. So the `#include_next` from the first libc++ stdio.h will include the second libc++ stdio.h, which will do nothing because it has the same include guard macro, and the libc header won't ever be included at all. If more than one of the options in an ExclusiveGroup matches the given flags, the last one wins. --- clang/include/clang/Driver/Multilib.h | 16 - clang/lib/Driver/Multilib.cpp | 49 ++--- .../baremetal-multilib-exclusive-group.yaml | 69 +++ 3 files changed, 122 insertions(+), 12 deletions(-) create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml diff --git a/clang/include/clang/Driver/Multilib.h b/clang/include/clang/Driver/Multilib.h index 1416559414f894b..6a9533e6dd831f1 100644 --- a/clang/include/clang/Driver/Multilib.h +++ b/clang/include/clang/Driver/Multilib.h @@ -39,13 +39,22 @@ class Multilib { std::string IncludeSuffix; flags_list Flags; + // Optionally, a multilib can be assigned a string tag indicating that it's + // part of a group of mutually exclusive possibilities. If two or more + // multilibs have the same non-empty value of ExclusiveGroup, then only the + // last matching one of them will be selected. + // + // Setting this to the empty string is a special case, indicating that the + // directory is not mutually exclusive with anything else. + std::string ExclusiveGroup; + public: /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the /// sysroot string so they must either be empty or begin with a '/' character. /// This is enforced with an assert in the constructor. Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {}, - StringRef IncludeSuffix = {}, - const flags_list &Flags = flags_list()); + StringRef IncludeSuffix = {}, const flags_list &Flags = flags_list(), + StringRef ExclusiveGroup = {}); /// Get the detected GCC installation path suffix for the multi-arch /// target variant. Always starts with a '/', unless empty @@ -63,6 +72,9 @@ class Multilib { /// All elements begin with either '-' or '!' const flags_list &flags() const { return Flags; } + /// Get the exclusive group label. + const std::string &exclusiveGroup() const { return ExclusiveGroup; } + LLVM_DUMP_METHOD void dump() const; /// print summary of the Multilib void print(raw_ostream &OS) const; diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp index 48a494d9fa38db5..085ccee7b25752e 100644 --- a/clang/lib/Driver/Multilib.cpp +++ b/clang/lib/Driver/Multilib.cpp @@ -29,9 +29,10 @@ using namespace driver; using namespace llvm::sys; Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix, - StringRef IncludeSuffix, const flags_list &Flags) + StringRef IncludeSuffix, const flags_list &Flags, + StringRef ExclusiveGroup) : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix), - Flags(Flags) { + Flags(Flags), ExclusiveGroup(ExclusiveGroup) { assert(GCCSuffix.empty() || (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1)); assert(OSSuffix.empty() || @@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags, llvm::SmallVector &Selected) const { llvm::StringSet<> FlagSet(expandFlags(Flags)); Selected.clear(); - llvm::copy_if(Multilibs, std::back_inserter(Selected), -[&FlagSet](const Multilib &M) { - for (const std::string &F : M.flags()) -if (!FlagSet.contains(F)) - return false; - return
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
statham-arm wrote: OK, here's a version with the syntax that way. I've added another test to demonstrate the new error checks. The implementation of exclusion is still done by having an `ExclusiveGroup` field in the actual `Multilib` class. Implementing mutually-dependent groups or nested groups is enough extra effort that I'd rather leave it until we actually need it! But now the user-facing syntax in `multilib.yaml` is futureproof against wanting to add those features later, so _only_ the implementation should need to change. https://github.com/llvm/llvm-project/pull/69447 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
statham-arm wrote: (btw, that `squash!` commit contains the revised commit message I plan to put on the final version, so I need to not forget to do the squash by hand to get that right) https://github.com/llvm/llvm-project/pull/69447 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
https://github.com/statham-arm updated https://github.com/llvm/llvm-project/pull/69447 >From 2a65ae75e8c8e62e7275a439849837919599e896 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 14 Sep 2023 14:51:17 +0100 Subject: [PATCH 1/3] [Driver] Add ExclusiveGroup feature to multilib.yaml. This allows a YAML-based multilib configuration to specify explicitly that a subset of its library directories are alternatives to each other, i.e. at most one of that subset should be selected. So if you have multiple sysroots each including a full set of headers and libraries, you can mark them as members of the same ExclusiveGroup, and then you'll be sure that only one of them is selected, even if two or more are compatible with the compile options. This is particularly important in multilib setups including the libc++ headers, where selecting the include directories from two different sysroots can cause an actual build failure. This occurs when including , for example: libc++'s stdio.h is included first, and will try to use `#include_next` to fetch the underlying libc's version. But if there are two include directories from separate multilibs, then both of their C++ include directories will end up on the include path first, followed by both the C directories. So the `#include_next` from the first libc++ stdio.h will include the second libc++ stdio.h, which will do nothing because it has the same include guard macro, and the libc header won't ever be included at all. If more than one of the options in an ExclusiveGroup matches the given flags, the last one wins. --- clang/include/clang/Driver/Multilib.h | 16 - clang/lib/Driver/Multilib.cpp | 49 ++--- .../baremetal-multilib-exclusive-group.yaml | 69 +++ 3 files changed, 122 insertions(+), 12 deletions(-) create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml diff --git a/clang/include/clang/Driver/Multilib.h b/clang/include/clang/Driver/Multilib.h index 1416559414f894b..6a9533e6dd831f1 100644 --- a/clang/include/clang/Driver/Multilib.h +++ b/clang/include/clang/Driver/Multilib.h @@ -39,13 +39,22 @@ class Multilib { std::string IncludeSuffix; flags_list Flags; + // Optionally, a multilib can be assigned a string tag indicating that it's + // part of a group of mutually exclusive possibilities. If two or more + // multilibs have the same non-empty value of ExclusiveGroup, then only the + // last matching one of them will be selected. + // + // Setting this to the empty string is a special case, indicating that the + // directory is not mutually exclusive with anything else. + std::string ExclusiveGroup; + public: /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the /// sysroot string so they must either be empty or begin with a '/' character. /// This is enforced with an assert in the constructor. Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {}, - StringRef IncludeSuffix = {}, - const flags_list &Flags = flags_list()); + StringRef IncludeSuffix = {}, const flags_list &Flags = flags_list(), + StringRef ExclusiveGroup = {}); /// Get the detected GCC installation path suffix for the multi-arch /// target variant. Always starts with a '/', unless empty @@ -63,6 +72,9 @@ class Multilib { /// All elements begin with either '-' or '!' const flags_list &flags() const { return Flags; } + /// Get the exclusive group label. + const std::string &exclusiveGroup() const { return ExclusiveGroup; } + LLVM_DUMP_METHOD void dump() const; /// print summary of the Multilib void print(raw_ostream &OS) const; diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp index 48a494d9fa38db5..085ccee7b25752e 100644 --- a/clang/lib/Driver/Multilib.cpp +++ b/clang/lib/Driver/Multilib.cpp @@ -29,9 +29,10 @@ using namespace driver; using namespace llvm::sys; Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix, - StringRef IncludeSuffix, const flags_list &Flags) + StringRef IncludeSuffix, const flags_list &Flags, + StringRef ExclusiveGroup) : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix), - Flags(Flags) { + Flags(Flags), ExclusiveGroup(ExclusiveGroup) { assert(GCCSuffix.empty() || (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1)); assert(OSSuffix.empty() || @@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags, llvm::SmallVector &Selected) const { llvm::StringSet<> FlagSet(expandFlags(Flags)); Selected.clear(); - llvm::copy_if(Multilibs, std::back_inserter(Selected), -[&FlagSet](const Multilib &M) { - for (const std::string &F : M.flags()) -if (!FlagSet.contains(F)) - return false; - return
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
@@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags, llvm::SmallVector &Selected) const { llvm::StringSet<> FlagSet(expandFlags(Flags)); Selected.clear(); - llvm::copy_if(Multilibs, std::back_inserter(Selected), -[&FlagSet](const Multilib &M) { - for (const std::string &F : M.flags()) -if (!FlagSet.contains(F)) - return false; - return true; -}); + + // Decide which multilibs we're going to select at all + std::vector IsSelected(Multilibs.size(), false); + std::map ExclusiveGroupMembers; + for (size_t i = 0, e = Multilibs.size(); i < e; ++i) { +const Multilib &M = Multilibs[i]; + +// If this multilib doesn't match all our flags, don't select it +if (!llvm::all_of(M.flags(), [&FlagSet](const std::string &F) { + return FlagSet.contains(F); +})) + continue; + +// If this multilib has the same ExclusiveGroup as one we've already +// selected, de-select the previous one +const std::string &group = M.exclusiveGroup(); +if (!group.empty()) { statham-arm wrote: `insert`, actually – `try_emplace` appears in `DenseMap` but not `DenseSet`. But otherwise, done. https://github.com/llvm/llvm-project/pull/69447 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
https://github.com/statham-arm edited https://github.com/llvm/llvm-project/pull/69447 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
@@ -138,10 +164,34 @@ static const VersionTuple MultilibVersionCurrent(1, 0); struct MultilibSerialization { std::string Dir; std::vector Flags; + std::string Group; +}; + +struct MultilibGroupSerialization { + /* + * Future directions: + * + * If it's needed in future, we could introduce additional group types by + * permitting Type to contain strings other than "Exclusive". Another + * possibility is a group of library directories that are mutually + * _dependent_ rather than mutually exclusive: if you include one you must + * include them all. + * + * It might also be useful to allow groups to be members of other groups, so + * that a mutually exclusive group could contain a mutually dependent set of + * library directories, or vice versa. + * + * These additional features would need changes in the implementation, but + * the YAML schema is set up so they can be added without requiring changes + * in existing users' multilib.yaml files. + */ + std::string Name; + std::string Type; statham-arm wrote: Yes, apparently we can. I hadn't found that part of the `llvm::yaml` API yet, but defining a `ScalarEnumerationTraits` for the enum type seems to be the way to make it Just Work during decoding. Thanks. https://github.com/llvm/llvm-project/pull/69447 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
https://github.com/statham-arm updated https://github.com/llvm/llvm-project/pull/69447 >From 1140903195e555643ee1a6b9f671b47b0c307f9e Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 14 Sep 2023 14:51:17 +0100 Subject: [PATCH] [Driver] Add ExclusiveGroup feature to multilib.yaml. This allows a YAML-based multilib configuration to specify explicitly that a subset of its library directories are alternatives to each other, i.e. at most one of that subset should be selected. So if you have multiple sysroots each including a full set of headers and libraries, you can mark them as members of the same mutually exclusive group, and then you'll be sure that only one of them is selected, even if two or more are compatible with the compile options. This is particularly important in multilib setups including the libc++ headers, where selecting the include directories from two different sysroots can cause an actual build failure. This occurs when including , for example: libc++'s stdio.h is included first, and will try to use `#include_next` to fetch the underlying libc's version. But if there are two include directories from separate multilibs, then both of their C++ include directories will end up on the include path first, followed by both the C directories. So the `#include_next` from the first libc++ stdio.h will include the second libc++ stdio.h, which will do nothing because it has the same include guard macro, and the libc header won't ever be included at all. If more than one of the options in an exclusive group matches the given flags, the last one wins. The syntax for specifying this in multilib.yaml is to define a Groups section in which you specify your group names, and for each one, declare it to have Type: Exclusive. (This reserves space in the syntax for maybe adding other group types later, such as a group of mutually _dependent_ things that you must have all or none of.) Then each Variant record that's a member of a group has a Group: property giving that group's name. --- clang/include/clang/Driver/Multilib.h | 16 ++- clang/lib/Driver/Multilib.cpp | 108 -- .../baremetal-multilib-exclusive-group.yaml | 79 + .../baremetal-multilib-group-error.yaml | 27 + 4 files changed, 218 insertions(+), 12 deletions(-) create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml create mode 100644 clang/test/Driver/baremetal-multilib-group-error.yaml diff --git a/clang/include/clang/Driver/Multilib.h b/clang/include/clang/Driver/Multilib.h index 1416559414f894b..6a9533e6dd831f1 100644 --- a/clang/include/clang/Driver/Multilib.h +++ b/clang/include/clang/Driver/Multilib.h @@ -39,13 +39,22 @@ class Multilib { std::string IncludeSuffix; flags_list Flags; + // Optionally, a multilib can be assigned a string tag indicating that it's + // part of a group of mutually exclusive possibilities. If two or more + // multilibs have the same non-empty value of ExclusiveGroup, then only the + // last matching one of them will be selected. + // + // Setting this to the empty string is a special case, indicating that the + // directory is not mutually exclusive with anything else. + std::string ExclusiveGroup; + public: /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the /// sysroot string so they must either be empty or begin with a '/' character. /// This is enforced with an assert in the constructor. Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {}, - StringRef IncludeSuffix = {}, - const flags_list &Flags = flags_list()); + StringRef IncludeSuffix = {}, const flags_list &Flags = flags_list(), + StringRef ExclusiveGroup = {}); /// Get the detected GCC installation path suffix for the multi-arch /// target variant. Always starts with a '/', unless empty @@ -63,6 +72,9 @@ class Multilib { /// All elements begin with either '-' or '!' const flags_list &flags() const { return Flags; } + /// Get the exclusive group label. + const std::string &exclusiveGroup() const { return ExclusiveGroup; } + LLVM_DUMP_METHOD void dump() const; /// print summary of the Multilib void print(raw_ostream &OS) const; diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp index 48a494d9fa38db5..7681c1a3ce6756f 100644 --- a/clang/lib/Driver/Multilib.cpp +++ b/clang/lib/Driver/Multilib.cpp @@ -9,6 +9,7 @@ #include "clang/Driver/Multilib.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/Version.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" @@ -29,9 +30,10 @@ using namespace driver; using namespace llvm::sys; Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix, - StringRef IncludeSuffix, const flags_list &Flags) + StringRef IncludeSuffix, const flags_list &Flags,
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
statham-arm wrote: (This final force-push is the squashed version of the previous stack, rebased to the current head of `main`, so that the builder can run a last test. Thanks both for the approvals; I'll merge it once the tests have finished.) https://github.com/llvm/llvm-project/pull/69447 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
https://github.com/statham-arm closed https://github.com/llvm/llvm-project/pull/69447 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[libunwind] 43c84e4 - [libunwind, EHABI, ARM] Fix get/set of RA_AUTH_CODE.
Author: Simon Tatham Date: 2022-06-27T09:36:21+01:00 New Revision: 43c84e463426ca35fe9fc2d38063d75fed944f23 URL: https://github.com/llvm/llvm-project/commit/43c84e463426ca35fe9fc2d38063d75fed944f23 DIFF: https://github.com/llvm/llvm-project/commit/43c84e463426ca35fe9fc2d38063d75fed944f23.diff LOG: [libunwind,EHABI,ARM] Fix get/set of RA_AUTH_CODE. According to EHABI32 §8.5.2, the PAC for the return address of a function described in an exception table is supposed to be addressed in the _Unwind_VRS_{Get,Set} API by setting regclass=_UVRSC_PSEUDO and regno=0. (The space of 'regno' values is independent for each regclass, and for _UVRSC_PSEUDO, there is only one valid regno so far.) That is indeed what libunwind's _Unwind_VRS_{Get,Set} functions expect to receive. But at two call sites, the wrong values are passed in: regno is being set to UNW_ARM_RA_AUTH_CODE (0x8F) instead of 0, and in one case, regclass is _UVRSC_CORE instead of _UVRSC_PSEUDO. As a result, those calls to _Unwind_VRS_{Get,Set} return _UVRSR_FAILED, which their callers ignore. So if you compile in the AUTG instruction that actually validates the PAC, it will try to validate what's effectively an uninitialised register as an authentication code, and trigger a CPU fault even on correct exception unwinding. Reviewed By: danielkiss Differential Revision: https://reviews.llvm.org/D128522 Added: Modified: libunwind/src/Unwind-EHABI.cpp Removed: diff --git a/libunwind/src/Unwind-EHABI.cpp b/libunwind/src/Unwind-EHABI.cpp index 6ac09adfb8fe..f203887567b6 100644 --- a/libunwind/src/Unwind-EHABI.cpp +++ b/libunwind/src/Unwind-EHABI.cpp @@ -432,8 +432,7 @@ _Unwind_VRS_Interpret(_Unwind_Context *context, const uint32_t *data, uint32_t sp; uint32_t pac; _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); - _Unwind_VRS_Get(context, _UVRSC_PSEUDO, UNW_ARM_RA_AUTH_CODE, - _UVRSD_UINT32, &pac); + _Unwind_VRS_Get(context, _UVRSC_PSEUDO, 0, _UVRSD_UINT32, &pac); __asm__ __volatile__("autg %0, %1, %2" : : "r"(pac), "r"(lr), "r"(sp) :); } #else @@ -1138,8 +1137,7 @@ _Unwind_VRS_Pop(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, } uint32_t pac = *sp++; _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp); - return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_RA_AUTH_CODE, - _UVRSD_UINT32, &pac); + return _Unwind_VRS_Set(context, _UVRSC_PSEUDO, 0, _UVRSD_UINT32, &pac); } } _LIBUNWIND_ABORT("unsupported register class"); ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] cef56d5 - [clang] Change set type used for SourceLocation.
Author: Simon Tatham Date: 2021-07-19T13:36:36+01:00 New Revision: cef56d58dbbb3bc993531c14af5e3edd2841029d URL: https://github.com/llvm/llvm-project/commit/cef56d58dbbb3bc993531c14af5e3edd2841029d DIFF: https://github.com/llvm/llvm-project/commit/cef56d58dbbb3bc993531c14af5e3edd2841029d.diff LOG: [clang] Change set type used for SourceLocation. This is part of a patch series working towards the ability to make SourceLocation into a 64-bit type to handle larger translation units. If clang is built for a 32-bit platform and SourceLocation is 64 bits wide, then a SourceLocation will be larger than a pointer, so it won't be possible to keep them in a SmallPtrSet any more. Switch to SmallDenseSet instead. Patch originally by Mikhail Maltsev. Differential Revision: https://reviews.llvm.org/D105493 Added: Modified: clang/include/clang/Basic/SourceLocation.h clang/include/clang/Lex/Preprocessor.h Removed: diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h index fc722b1d563db..0ba0f9bd3ddf2 100644 --- a/clang/include/clang/Basic/SourceLocation.h +++ b/clang/include/clang/Basic/SourceLocation.h @@ -16,7 +16,6 @@ #include "clang/Basic/LLVM.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/PointerLikeTypeTraits.h" #include #include #include @@ -510,20 +509,6 @@ namespace llvm { static void Profile(const clang::SourceLocation &X, FoldingSetNodeID &ID); }; - // Teach SmallPtrSet how to handle SourceLocation. - template<> - struct PointerLikeTypeTraits { -static constexpr int NumLowBitsAvailable = 0; - -static void *getAsVoidPointer(clang::SourceLocation L) { - return L.getPtrEncoding(); -} - -static clang::SourceLocation getFromVoidPointer(void *P) { - return clang::SourceLocation::getFromRawEncoding((unsigned)(uintptr_t)P); -} - }; - } // namespace llvm #endif // LLVM_CLANG_BASIC_SOURCELOCATION_H diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index be345d4f5b4ea..7ab13640ce2c0 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -783,8 +783,7 @@ class Preprocessor { /// deserializing from PCH, we don't need to deserialize identifier & macros /// just so that we can report that they are unused, we just warn using /// the SourceLocations of this set (that will be filled by the ASTReader). - /// We are using SmallPtrSet instead of a vector for faster removal. - using WarnUnusedMacroLocsTy = llvm::SmallPtrSet; + using WarnUnusedMacroLocsTy = llvm::SmallDenseSet; WarnUnusedMacroLocsTy WarnUnusedMacroLocs; /// A "freelist" of MacroArg objects that can be ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 21401a7 - [clang] Introduce SourceLocation::[U]IntTy typedefs.
Author: Simon Tatham Date: 2021-07-21T10:45:46+01:00 New Revision: 21401a72629cc591bab7ec6816f03e6c550f3fb3 URL: https://github.com/llvm/llvm-project/commit/21401a72629cc591bab7ec6816f03e6c550f3fb3 DIFF: https://github.com/llvm/llvm-project/commit/21401a72629cc591bab7ec6816f03e6c550f3fb3.diff LOG: [clang] Introduce SourceLocation::[U]IntTy typedefs. This is part of a patch series working towards the ability to make SourceLocation into a 64-bit type to handle larger translation units. NFC: this patch introduces typedefs for the integer type used by SourceLocation and makes all the boring changes to use the typedefs everywhere, but for the moment, they are unconditionally defined to uint32_t. Patch originally by Mikhail Maltsev. Reviewed By: tmatheson Differential Revision: https://reviews.llvm.org/D105492 Added: Modified: clang/include/clang/AST/DeclarationName.h clang/include/clang/Basic/SourceLocation.h clang/include/clang/Basic/SourceManager.h clang/include/clang/Lex/Token.h clang/include/clang/Serialization/ASTBitCodes.h clang/include/clang/Serialization/ASTReader.h clang/include/clang/Serialization/ASTWriter.h clang/include/clang/Serialization/ModuleFile.h clang/lib/ARCMigrate/TransEmptyStatementsAndDealloc.cpp clang/lib/AST/NestedNameSpecifier.cpp clang/lib/Basic/SourceLocation.cpp clang/lib/Basic/SourceManager.cpp clang/lib/CodeGen/CGOpenMPRuntime.cpp clang/lib/Lex/Lexer.cpp clang/lib/Lex/ModuleMap.cpp clang/lib/Lex/PPCaching.cpp clang/lib/Lex/TokenLexer.cpp clang/lib/Serialization/ASTReader.cpp clang/lib/Serialization/ASTWriter.cpp clang/tools/libclang/CIndex.cpp Removed: diff --git a/clang/include/clang/AST/DeclarationName.h b/clang/include/clang/AST/DeclarationName.h index acf7e243da46b..38da6fc727fbd 100644 --- a/clang/include/clang/AST/DeclarationName.h +++ b/clang/include/clang/AST/DeclarationName.h @@ -660,13 +660,13 @@ class DeclarationNameLoc { // The location (if any) of the operator keyword is stored elsewhere. struct CXXOpName { -unsigned BeginOpNameLoc; -unsigned EndOpNameLoc; +SourceLocation::UIntTy BeginOpNameLoc; +SourceLocation::UIntTy EndOpNameLoc; }; // The location (if any) of the operator keyword is stored elsewhere. struct CXXLitOpName { -unsigned OpNameLoc; +SourceLocation::UIntTy OpNameLoc; }; // struct {} CXXUsingDirective; diff --git a/clang/include/clang/Basic/SourceLocation.h b/clang/include/clang/Basic/SourceLocation.h index 0ba0f9bd3ddf2..540de23b9f55e 100644 --- a/clang/include/clang/Basic/SourceLocation.h +++ b/clang/include/clang/Basic/SourceLocation.h @@ -91,11 +91,14 @@ class SourceLocation { friend class SourceManager; friend struct llvm::FoldingSetTrait; - unsigned ID = 0; +public: + using UIntTy = uint32_t; + using IntTy = int32_t; - enum : unsigned { -MacroIDBit = 1U << 31 - }; +private: + UIntTy ID = 0; + + enum : UIntTy { MacroIDBit = 1ULL << (8 * sizeof(UIntTy) - 1) }; public: bool isFileID() const { return (ID & MacroIDBit) == 0; } @@ -111,18 +114,16 @@ class SourceLocation { private: /// Return the offset into the manager's global input view. - unsigned getOffset() const { -return ID & ~MacroIDBit; - } + UIntTy getOffset() const { return ID & ~MacroIDBit; } - static SourceLocation getFileLoc(unsigned ID) { + static SourceLocation getFileLoc(UIntTy ID) { assert((ID & MacroIDBit) == 0 && "Ran out of source locations!"); SourceLocation L; L.ID = ID; return L; } - static SourceLocation getMacroLoc(unsigned ID) { + static SourceLocation getMacroLoc(UIntTy ID) { assert((ID & MacroIDBit) == 0 && "Ran out of source locations!"); SourceLocation L; L.ID = MacroIDBit | ID; @@ -132,7 +133,7 @@ class SourceLocation { public: /// Return a source location with the specified offset from this /// SourceLocation. - SourceLocation getLocWithOffset(int Offset) const { + SourceLocation getLocWithOffset(IntTy Offset) const { assert(((getOffset()+Offset) & MacroIDBit) == 0 && "offset overflow"); SourceLocation L; L.ID = ID+Offset; @@ -144,13 +145,13 @@ class SourceLocation { /// /// This should only be passed to SourceLocation::getFromRawEncoding, it /// should not be inspected directly. - unsigned getRawEncoding() const { return ID; } + UIntTy getRawEncoding() const { return ID; } /// Turn a raw encoding of a SourceLocation object into /// a real SourceLocation. /// /// \see getRawEncoding. - static SourceLocation getFromRawEncoding(unsigned Encoding) { + static SourceLocation getFromRawEncoding(UIntTy Encoding) { SourceLocation X; X.ID = Encoding; return X; @@ -170,7 +171,7 @@ class SourceLocation { /// Turn a pointer encoding of a SourceLocation object back /// into a real SourceL
[clang] bd41136 - [clang] Use i64 for the !srcloc metadata on asm IR nodes.
Author: Simon Tatham Date: 2021-07-22T10:24:52+01:00 New Revision: bd41136746a0b47882914cee5a8d1ac6714288d1 URL: https://github.com/llvm/llvm-project/commit/bd41136746a0b47882914cee5a8d1ac6714288d1 DIFF: https://github.com/llvm/llvm-project/commit/bd41136746a0b47882914cee5a8d1ac6714288d1.diff LOG: [clang] Use i64 for the !srcloc metadata on asm IR nodes. This is part of a patch series working towards the ability to make SourceLocation into a 64-bit type to handle larger translation units. !srcloc is generated in clang codegen, and pulled back out by llvm functions like AsmPrinter::emitInlineAsm that need to report errors in the inline asm. From there it goes to LLVMContext::emitError, is stored in DiagnosticInfoInlineAsm, and ends up back in clang, at BackendConsumer::InlineAsmDiagHandler(), which reconstitutes a true clang::SourceLocation from the integer cookie. Throughout this code path, it's now 64-bit rather than 32, which means that if SourceLocation is expanded to a 64-bit type, this error report won't lose half of the data. The compiler will tolerate both of i32 and i64 !srcloc metadata in input IR without faulting. Test added in llvm/MC. (The semantic accuracy of the metadata is another matter, but I don't know of any situation where that matters: if you're reading an IR file written by a previous run of clang, you don't have the SourceManager that can relate those source locations back to the original source files.) Original version of the patch by Mikhail Maltsev. Reviewed By: dexonsmith Differential Revision: https://reviews.llvm.org/D105491 Added: Modified: clang/lib/CodeGen/CGStmt.cpp llvm/include/llvm/IR/DiagnosticInfo.h llvm/include/llvm/IR/LLVMContext.h llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp llvm/lib/CodeGen/MachineInstr.cpp llvm/lib/IR/LLVMContext.cpp llvm/test/MC/ARM/inline-asm-srcloc.ll Removed: diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index 6f6dcfa58a7f1..aeb319ca15819 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -2158,7 +2158,7 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str, SmallVector Locs; // Add the location of the first line to the MDNode. Locs.push_back(llvm::ConstantAsMetadata::get(llvm::ConstantInt::get( - CGF.Int32Ty, Str->getBeginLoc().getRawEncoding(; + CGF.Int64Ty, Str->getBeginLoc().getRawEncoding(; StringRef StrVal = Str->getString(); if (!StrVal.empty()) { const SourceManager &SM = CGF.CGM.getContext().getSourceManager(); @@ -2173,7 +2173,7 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str, SourceLocation LineLoc = Str->getLocationOfByte( i + 1, SM, LangOpts, CGF.getTarget(), &StartToken, &ByteOffset); Locs.push_back(llvm::ConstantAsMetadata::get( - llvm::ConstantInt::get(CGF.Int32Ty, LineLoc.getRawEncoding(; + llvm::ConstantInt::get(CGF.Int64Ty, LineLoc.getRawEncoding(; } } @@ -2210,8 +2210,8 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect, getAsmSrcLocInfo(gccAsmStmt->getAsmString(), CGF)); else { // At least put the line number on MS inline asm blobs. -llvm::Constant *Loc = llvm::ConstantInt::get(CGF.Int32Ty, -S.getAsmLoc().getRawEncoding()); +llvm::Constant *Loc = +llvm::ConstantInt::get(CGF.Int64Ty, S.getAsmLoc().getRawEncoding()); Result.setMetadata("srcloc", llvm::MDNode::get(CGF.getLLVMContext(), llvm::ConstantAsMetadata::get(Loc))); diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h index 9134ca12600b2..5064f4f4edf77 100644 --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -131,7 +131,7 @@ using DiagnosticHandlerFunction = std::function; class DiagnosticInfoInlineAsm : public DiagnosticInfo { private: /// Optional line information. 0 if not set. - unsigned LocCookie = 0; + uint64_t LocCookie = 0; /// Message to be reported. const Twine &MsgStr; /// Optional origin of the problem. @@ -149,7 +149,7 @@ class DiagnosticInfoInlineAsm : public DiagnosticInfo { /// \p MsgStr gives the message. /// This class does not copy \p MsgStr, therefore the reference must be valid /// for the whole life time of the Diagnostic. - DiagnosticInfoInlineAsm(unsigned LocCookie, const Twine &MsgStr, + DiagnosticInfoInlineAsm(uint64_t LocCookie, const Twine &MsgStr, DiagnosticSeverity Severity = DS_Error) : DiagnosticInfo(DK_InlineAsm, Severity), LocCookie(LocCookie), MsgStr(MsgStr) {} @@ -162,7 +162,7 @@ class DiagnosticInfoInlineAsm : public DiagnosticInfo { DiagnosticInfoInlineAsm(const Instructi
[clang] [Modules] No transitive source location change (PR #86912)
statham-arm wrote: > Let's see if @statham-arm (who introduced the `SourceLocation::[U]IntTy` > typedefs) wants to weight in here. I'm afraid my knowledge of C++ modules is very close to zero. They were mentioned in a training course I did last year, but not in much detail. On 64-bit SourceLocation in general: our patch series to implement those as an option in clang was never fully landed, because the second half of it stalled in review. I'd still like to see it finished off, though I'm sure it would need some vigorous rebasing and retesting by now. The 32-bit SourceLocation limit is a problem for at least some of our users, apparently because there's a library of header files that break the limit all by themselves. (I'm not sure how; I haven't seen them. Maybe by including each other multiple times with different `#define`s?) But I have no idea whether the same considerations would apply to modules, because I don't really know enough about modules, sorry! https://github.com/llvm/llvm-project/pull/86912 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
https://github.com/statham-arm created https://github.com/llvm/llvm-project/pull/69447 This allows a YAML-based multilib configuration to specify explicitly that a subset of its library directories are alternatives to each other, i.e. at most one of that subset should be selected. So if you have multiple sysroots each including a full set of headers and libraries, you can mark them as members of the same ExclusiveGroup, and then you'll be sure that only one of them is selected, even if two or more are compatible with the compile options. This is particularly important in multilib setups including the libc++ headers, where selecting the include directories from two different sysroots can cause an actual build failure. This occurs when including , for example: libc++'s stdio.h is included first, and will try to use `#include_next` to fetch the underlying libc's version. But if there are two include directories from separate multilibs, then both of their C++ include directories will end up on the include path first, followed by both the C directories. So the `#include_next` from the first libc++ stdio.h will include the second libc++ stdio.h, which will do nothing because it has the same include guard macro, and the libc header won't ever be included at all. If more than one of the options in an ExclusiveGroup matches the given flags, the last one wins. >From 5b3289a7ad40850cbe1c438345a181b01c500639 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 14 Sep 2023 14:51:17 +0100 Subject: [PATCH] [Driver] Add ExclusiveGroup feature to multilib.yaml. This allows a YAML-based multilib configuration to specify explicitly that a subset of its library directories are alternatives to each other, i.e. at most one of that subset should be selected. So if you have multiple sysroots each including a full set of headers and libraries, you can mark them as members of the same ExclusiveGroup, and then you'll be sure that only one of them is selected, even if two or more are compatible with the compile options. This is particularly important in multilib setups including the libc++ headers, where selecting the include directories from two different sysroots can cause an actual build failure. This occurs when including , for example: libc++'s stdio.h is included first, and will try to use `#include_next` to fetch the underlying libc's version. But if there are two include directories from separate multilibs, then both of their C++ include directories will end up on the include path first, followed by both the C directories. So the `#include_next` from the first libc++ stdio.h will include the second libc++ stdio.h, which will do nothing because it has the same include guard macro, and the libc header won't ever be included at all. If more than one of the options in an ExclusiveGroup matches the given flags, the last one wins. --- clang/include/clang/Driver/Multilib.h | 15 +++- clang/lib/Driver/Multilib.cpp | 49 ++--- .../baremetal-multilib-exclusive-group.yaml | 69 +++ 3 files changed, 122 insertions(+), 11 deletions(-) create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml diff --git a/clang/include/clang/Driver/Multilib.h b/clang/include/clang/Driver/Multilib.h index 1416559414f894b..46f23a2ff5fabac 100644 --- a/clang/include/clang/Driver/Multilib.h +++ b/clang/include/clang/Driver/Multilib.h @@ -39,13 +39,23 @@ class Multilib { std::string IncludeSuffix; flags_list Flags; + // Optionally, a multilib can be assigned a string tag indicating that it's + // part of a group of mutually exclusive possibilities. If two or more + // multilibs have the same non-empty value of ExclusiveGroup, then only the + // last matching one of them will be selected. + // + // Setting this to the empty string is a special case, indicating that the + // directory is not mutually exclusive with anything else. + std::string ExclusiveGroup; + public: /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the /// sysroot string so they must either be empty or begin with a '/' character. /// This is enforced with an assert in the constructor. Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {}, StringRef IncludeSuffix = {}, - const flags_list &Flags = flags_list()); + const flags_list &Flags = flags_list(), + StringRef ExclusiveGroup = {}); /// Get the detected GCC installation path suffix for the multi-arch /// target variant. Always starts with a '/', unless empty @@ -63,6 +73,9 @@ class Multilib { /// All elements begin with either '-' or '!' const flags_list &flags() const { return Flags; } + /// Get the exclusive group label. + const std::string &exclusiveGroup() const { return ExclusiveGroup; } + LLVM_DUMP_METHOD void dump() const; /// print summary of the Multilib void print(raw_ostream &OS) const; diff --git a/clang/lib/
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
https://github.com/statham-arm updated https://github.com/llvm/llvm-project/pull/69447 >From 3a0481134343339ce8132419fde875ac9977b734 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 14 Sep 2023 14:51:17 +0100 Subject: [PATCH] [Driver] Add ExclusiveGroup feature to multilib.yaml. This allows a YAML-based multilib configuration to specify explicitly that a subset of its library directories are alternatives to each other, i.e. at most one of that subset should be selected. So if you have multiple sysroots each including a full set of headers and libraries, you can mark them as members of the same ExclusiveGroup, and then you'll be sure that only one of them is selected, even if two or more are compatible with the compile options. This is particularly important in multilib setups including the libc++ headers, where selecting the include directories from two different sysroots can cause an actual build failure. This occurs when including , for example: libc++'s stdio.h is included first, and will try to use `#include_next` to fetch the underlying libc's version. But if there are two include directories from separate multilibs, then both of their C++ include directories will end up on the include path first, followed by both the C directories. So the `#include_next` from the first libc++ stdio.h will include the second libc++ stdio.h, which will do nothing because it has the same include guard macro, and the libc header won't ever be included at all. If more than one of the options in an ExclusiveGroup matches the given flags, the last one wins. --- clang/include/clang/Driver/Multilib.h | 16 - clang/lib/Driver/Multilib.cpp | 49 ++--- .../baremetal-multilib-exclusive-group.yaml | 69 +++ 3 files changed, 122 insertions(+), 12 deletions(-) create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml diff --git a/clang/include/clang/Driver/Multilib.h b/clang/include/clang/Driver/Multilib.h index 1416559414f894b..6a9533e6dd831f1 100644 --- a/clang/include/clang/Driver/Multilib.h +++ b/clang/include/clang/Driver/Multilib.h @@ -39,13 +39,22 @@ class Multilib { std::string IncludeSuffix; flags_list Flags; + // Optionally, a multilib can be assigned a string tag indicating that it's + // part of a group of mutually exclusive possibilities. If two or more + // multilibs have the same non-empty value of ExclusiveGroup, then only the + // last matching one of them will be selected. + // + // Setting this to the empty string is a special case, indicating that the + // directory is not mutually exclusive with anything else. + std::string ExclusiveGroup; + public: /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the /// sysroot string so they must either be empty or begin with a '/' character. /// This is enforced with an assert in the constructor. Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {}, - StringRef IncludeSuffix = {}, - const flags_list &Flags = flags_list()); + StringRef IncludeSuffix = {}, const flags_list &Flags = flags_list(), + StringRef ExclusiveGroup = {}); /// Get the detected GCC installation path suffix for the multi-arch /// target variant. Always starts with a '/', unless empty @@ -63,6 +72,9 @@ class Multilib { /// All elements begin with either '-' or '!' const flags_list &flags() const { return Flags; } + /// Get the exclusive group label. + const std::string &exclusiveGroup() const { return ExclusiveGroup; } + LLVM_DUMP_METHOD void dump() const; /// print summary of the Multilib void print(raw_ostream &OS) const; diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp index ba466af39e2dcaf..a8eff30f1416852 100644 --- a/clang/lib/Driver/Multilib.cpp +++ b/clang/lib/Driver/Multilib.cpp @@ -29,9 +29,10 @@ using namespace driver; using namespace llvm::sys; Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix, - StringRef IncludeSuffix, const flags_list &Flags) + StringRef IncludeSuffix, const flags_list &Flags, + StringRef ExclusiveGroup) : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix), - Flags(Flags) { + Flags(Flags), ExclusiveGroup(ExclusiveGroup) { assert(GCCSuffix.empty() || (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1)); assert(OSSuffix.empty() || @@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags, llvm::SmallVector &Selected) const { llvm::StringSet<> FlagSet(expandFlags(Flags)); Selected.clear(); - llvm::copy_if(Multilibs, std::back_inserter(Selected), -[&FlagSet](const Multilib &M) { - for (const std::string &F : M.flags()) -if (!FlagSet.contains(F)) - return false; - return true
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
https://github.com/statham-arm updated https://github.com/llvm/llvm-project/pull/69447 >From e4d860c2968e4bf2e0ca198bdfe00dad4e985d40 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 14 Sep 2023 14:51:17 +0100 Subject: [PATCH] [Driver] Add ExclusiveGroup feature to multilib.yaml. This allows a YAML-based multilib configuration to specify explicitly that a subset of its library directories are alternatives to each other, i.e. at most one of that subset should be selected. So if you have multiple sysroots each including a full set of headers and libraries, you can mark them as members of the same ExclusiveGroup, and then you'll be sure that only one of them is selected, even if two or more are compatible with the compile options. This is particularly important in multilib setups including the libc++ headers, where selecting the include directories from two different sysroots can cause an actual build failure. This occurs when including , for example: libc++'s stdio.h is included first, and will try to use `#include_next` to fetch the underlying libc's version. But if there are two include directories from separate multilibs, then both of their C++ include directories will end up on the include path first, followed by both the C directories. So the `#include_next` from the first libc++ stdio.h will include the second libc++ stdio.h, which will do nothing because it has the same include guard macro, and the libc header won't ever be included at all. If more than one of the options in an ExclusiveGroup matches the given flags, the last one wins. --- clang/include/clang/Driver/Multilib.h | 16 - clang/lib/Driver/Multilib.cpp | 49 ++--- .../baremetal-multilib-exclusive-group.yaml | 69 +++ 3 files changed, 122 insertions(+), 12 deletions(-) create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml diff --git a/clang/include/clang/Driver/Multilib.h b/clang/include/clang/Driver/Multilib.h index 1416559414f894b..6a9533e6dd831f1 100644 --- a/clang/include/clang/Driver/Multilib.h +++ b/clang/include/clang/Driver/Multilib.h @@ -39,13 +39,22 @@ class Multilib { std::string IncludeSuffix; flags_list Flags; + // Optionally, a multilib can be assigned a string tag indicating that it's + // part of a group of mutually exclusive possibilities. If two or more + // multilibs have the same non-empty value of ExclusiveGroup, then only the + // last matching one of them will be selected. + // + // Setting this to the empty string is a special case, indicating that the + // directory is not mutually exclusive with anything else. + std::string ExclusiveGroup; + public: /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the /// sysroot string so they must either be empty or begin with a '/' character. /// This is enforced with an assert in the constructor. Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {}, - StringRef IncludeSuffix = {}, - const flags_list &Flags = flags_list()); + StringRef IncludeSuffix = {}, const flags_list &Flags = flags_list(), + StringRef ExclusiveGroup = {}); /// Get the detected GCC installation path suffix for the multi-arch /// target variant. Always starts with a '/', unless empty @@ -63,6 +72,9 @@ class Multilib { /// All elements begin with either '-' or '!' const flags_list &flags() const { return Flags; } + /// Get the exclusive group label. + const std::string &exclusiveGroup() const { return ExclusiveGroup; } + LLVM_DUMP_METHOD void dump() const; /// print summary of the Multilib void print(raw_ostream &OS) const; diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp index 48a494d9fa38db5..085ccee7b25752e 100644 --- a/clang/lib/Driver/Multilib.cpp +++ b/clang/lib/Driver/Multilib.cpp @@ -29,9 +29,10 @@ using namespace driver; using namespace llvm::sys; Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix, - StringRef IncludeSuffix, const flags_list &Flags) + StringRef IncludeSuffix, const flags_list &Flags, + StringRef ExclusiveGroup) : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix), - Flags(Flags) { + Flags(Flags), ExclusiveGroup(ExclusiveGroup) { assert(GCCSuffix.empty() || (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1)); assert(OSSuffix.empty() || @@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags, llvm::SmallVector &Selected) const { llvm::StringSet<> FlagSet(expandFlags(Flags)); Selected.clear(); - llvm::copy_if(Multilibs, std::back_inserter(Selected), -[&FlagSet](const Multilib &M) { - for (const std::string &F : M.flags()) -if (!FlagSet.contains(F)) - return false; - return true
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
https://github.com/statham-arm updated https://github.com/llvm/llvm-project/pull/69447 >From 2a65ae75e8c8e62e7275a439849837919599e896 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 14 Sep 2023 14:51:17 +0100 Subject: [PATCH] [Driver] Add ExclusiveGroup feature to multilib.yaml. This allows a YAML-based multilib configuration to specify explicitly that a subset of its library directories are alternatives to each other, i.e. at most one of that subset should be selected. So if you have multiple sysroots each including a full set of headers and libraries, you can mark them as members of the same ExclusiveGroup, and then you'll be sure that only one of them is selected, even if two or more are compatible with the compile options. This is particularly important in multilib setups including the libc++ headers, where selecting the include directories from two different sysroots can cause an actual build failure. This occurs when including , for example: libc++'s stdio.h is included first, and will try to use `#include_next` to fetch the underlying libc's version. But if there are two include directories from separate multilibs, then both of their C++ include directories will end up on the include path first, followed by both the C directories. So the `#include_next` from the first libc++ stdio.h will include the second libc++ stdio.h, which will do nothing because it has the same include guard macro, and the libc header won't ever be included at all. If more than one of the options in an ExclusiveGroup matches the given flags, the last one wins. --- clang/include/clang/Driver/Multilib.h | 16 - clang/lib/Driver/Multilib.cpp | 49 ++--- .../baremetal-multilib-exclusive-group.yaml | 69 +++ 3 files changed, 122 insertions(+), 12 deletions(-) create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml diff --git a/clang/include/clang/Driver/Multilib.h b/clang/include/clang/Driver/Multilib.h index 1416559414f894b..6a9533e6dd831f1 100644 --- a/clang/include/clang/Driver/Multilib.h +++ b/clang/include/clang/Driver/Multilib.h @@ -39,13 +39,22 @@ class Multilib { std::string IncludeSuffix; flags_list Flags; + // Optionally, a multilib can be assigned a string tag indicating that it's + // part of a group of mutually exclusive possibilities. If two or more + // multilibs have the same non-empty value of ExclusiveGroup, then only the + // last matching one of them will be selected. + // + // Setting this to the empty string is a special case, indicating that the + // directory is not mutually exclusive with anything else. + std::string ExclusiveGroup; + public: /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the /// sysroot string so they must either be empty or begin with a '/' character. /// This is enforced with an assert in the constructor. Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {}, - StringRef IncludeSuffix = {}, - const flags_list &Flags = flags_list()); + StringRef IncludeSuffix = {}, const flags_list &Flags = flags_list(), + StringRef ExclusiveGroup = {}); /// Get the detected GCC installation path suffix for the multi-arch /// target variant. Always starts with a '/', unless empty @@ -63,6 +72,9 @@ class Multilib { /// All elements begin with either '-' or '!' const flags_list &flags() const { return Flags; } + /// Get the exclusive group label. + const std::string &exclusiveGroup() const { return ExclusiveGroup; } + LLVM_DUMP_METHOD void dump() const; /// print summary of the Multilib void print(raw_ostream &OS) const; diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp index 48a494d9fa38db5..085ccee7b25752e 100644 --- a/clang/lib/Driver/Multilib.cpp +++ b/clang/lib/Driver/Multilib.cpp @@ -29,9 +29,10 @@ using namespace driver; using namespace llvm::sys; Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix, - StringRef IncludeSuffix, const flags_list &Flags) + StringRef IncludeSuffix, const flags_list &Flags, + StringRef ExclusiveGroup) : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix), - Flags(Flags) { + Flags(Flags), ExclusiveGroup(ExclusiveGroup) { assert(GCCSuffix.empty() || (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1)); assert(OSSuffix.empty() || @@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags, llvm::SmallVector &Selected) const { llvm::StringSet<> FlagSet(expandFlags(Flags)); Selected.clear(); - llvm::copy_if(Multilibs, std::back_inserter(Selected), -[&FlagSet](const Multilib &M) { - for (const std::string &F : M.flags()) -if (!FlagSet.contains(F)) - return false; - return true
[clang] [Driver] Add `--` to some test clang-cl command lines. (PR #70055)
https://github.com/statham-arm created https://github.com/llvm/llvm-project/pull/70055 If clang/test/Driver/cl-offload.cu is run on Unix in a directory whose absolute pathname starts with `/w`, such as the `/workspace` used by at least some Jenkins CI setups, then the file name on the clang command line is misinterpreted as some kind of MSVC warning-control option, and ignored by the catch-all `_SLASH_w` option in Options.td. Other clang-cl tests take care to put a `--` before the input file name, to force clang to treat it as a filename even if it starts with a / and accidentally looks like a cl option. Do the same here. >From e5b90488cdb8b2d8865c3ce434bd1adba16e0992 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Tue, 24 Oct 2023 15:52:38 +0100 Subject: [PATCH] [Driver] Add `--` to some test clang-cl command lines. If clang/test/Driver/cl-offload.cu is run on Unix in a directory whose absolute pathname starts with `/w`, such as the `/workspace` used by at least some Jenkins CI setups, then the file name on the clang command line is misinterpreted as some kind of MSVC warning-control option, and ignored by the catch-all `_SLASH_w` option in Options.td. Other clang-cl tests take care to put a `--` before the input file name, to force clang to treat it as a filename even if it starts with a / and accidentally looks like a cl option. Do the same here. --- clang/test/Driver/cl-offload.cu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/Driver/cl-offload.cu b/clang/test/Driver/cl-offload.cu index 650c13da15b5b58..eaa4b58afa8878b 100644 --- a/clang/test/Driver/cl-offload.cu +++ b/clang/test/Driver/cl-offload.cu @@ -5,11 +5,11 @@ // RUN: %clang_cl -### -target x86_64-pc-windows-msvc --offload-arch=sm_35 -fgpu-rdc \ // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ -// RUN: /Wall -x cuda %s 2>&1 \ +// RUN: /Wall -x cuda -- %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=CUDA // RUN: %clang_cl -### -target x86_64-pc-windows-msvc --offload-arch=gfx1010 -fgpu-rdc --hip-link \ -// RUN: --rocm-path=%S/Inputs/rocm /Wall -x hip %s 2>&1 \ +// RUN: --rocm-path=%S/Inputs/rocm /Wall -x hip -- %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=HIP // CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-pc-windows-msvc" ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] Let clang-cl support CUDA/HIP (PR #68921)
statham-arm wrote: @yxsamliu I've just raised https://github.com/llvm/llvm-project/pull/70055 which fixes an issue with the new test here. Perhaps it might also allow you to remove the exclusion for `system-darwin`? https://github.com/llvm/llvm-project/pull/68921 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add `--` to some test clang-cl command lines. (PR #70055)
https://github.com/statham-arm updated https://github.com/llvm/llvm-project/pull/70055 >From 029eecc71b94130bb6d058c9f9d0779e32cd45f1 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Tue, 24 Oct 2023 15:52:38 +0100 Subject: [PATCH] [Driver] Add `--` to some test clang-cl command lines. If clang/test/Driver/cl-offload.cu is run on Unix in a directory whose absolute pathname starts with `/w`, such as the `/workspace` used by at least some Jenkins CI setups, then the file name on the clang command line is misinterpreted as some kind of MSVC warning-control option, and ignored by the catch-all `_SLASH_w` option in Options.td. Other clang-cl tests take care to put a `--` before the input file name, to force clang to treat it as a filename even if it starts with a / and accidentally looks like a cl option. Do the same here. This also allows the exclusion for `system-darwin` to be removed, because that was trying to avoid a similar filename/option clash involving `/Users`. --- clang/test/Driver/cl-offload.cu | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/test/Driver/cl-offload.cu b/clang/test/Driver/cl-offload.cu index 650c13da15b5b58..b05bf3b97b7eb71 100644 --- a/clang/test/Driver/cl-offload.cu +++ b/clang/test/Driver/cl-offload.cu @@ -1,15 +1,14 @@ -// REQUIRES: !system-darwin // REQUIRES: !system-solaris // The test cannot be run on Darwin because /Users will be treated as a MSVC option. // RUN: %clang_cl -### -target x86_64-pc-windows-msvc --offload-arch=sm_35 -fgpu-rdc \ // RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \ -// RUN: /Wall -x cuda %s 2>&1 \ +// RUN: /Wall -x cuda -- %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=CUDA // RUN: %clang_cl -### -target x86_64-pc-windows-msvc --offload-arch=gfx1010 -fgpu-rdc --hip-link \ -// RUN: --rocm-path=%S/Inputs/rocm /Wall -x hip %s 2>&1 \ +// RUN: --rocm-path=%S/Inputs/rocm /Wall -x hip -- %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=HIP // CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" "x86_64-pc-windows-msvc" ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add `--` to some test clang-cl command lines. (PR #70055)
https://github.com/statham-arm closed https://github.com/llvm/llvm-project/pull/70055 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
@@ -152,6 +180,7 @@ template <> struct llvm::yaml::MappingTraits { static void mapping(llvm::yaml::IO &io, MultilibSerialization &V) { io.mapRequired("Dir", V.Dir); io.mapRequired("Flags", V.Flags); +io.mapOptional("ExclusiveGroup", V.ExclusiveGroup); statham-arm wrote: I'll rename it if you like, but I worry that that might be ambiguous, or at least unclear. Within the general context of linking and libraries, "group" need not mean a _mutually exclusive_ group; it could mean a grouping of libraries for other purposes too, like a mutually _dependent_ group (you must select all of these or none). https://github.com/llvm/llvm-project/pull/69447 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
https://github.com/statham-arm updated https://github.com/llvm/llvm-project/pull/69447 >From 2a65ae75e8c8e62e7275a439849837919599e896 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 14 Sep 2023 14:51:17 +0100 Subject: [PATCH 1/2] [Driver] Add ExclusiveGroup feature to multilib.yaml. This allows a YAML-based multilib configuration to specify explicitly that a subset of its library directories are alternatives to each other, i.e. at most one of that subset should be selected. So if you have multiple sysroots each including a full set of headers and libraries, you can mark them as members of the same ExclusiveGroup, and then you'll be sure that only one of them is selected, even if two or more are compatible with the compile options. This is particularly important in multilib setups including the libc++ headers, where selecting the include directories from two different sysroots can cause an actual build failure. This occurs when including , for example: libc++'s stdio.h is included first, and will try to use `#include_next` to fetch the underlying libc's version. But if there are two include directories from separate multilibs, then both of their C++ include directories will end up on the include path first, followed by both the C directories. So the `#include_next` from the first libc++ stdio.h will include the second libc++ stdio.h, which will do nothing because it has the same include guard macro, and the libc header won't ever be included at all. If more than one of the options in an ExclusiveGroup matches the given flags, the last one wins. --- clang/include/clang/Driver/Multilib.h | 16 - clang/lib/Driver/Multilib.cpp | 49 ++--- .../baremetal-multilib-exclusive-group.yaml | 69 +++ 3 files changed, 122 insertions(+), 12 deletions(-) create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml diff --git a/clang/include/clang/Driver/Multilib.h b/clang/include/clang/Driver/Multilib.h index 1416559414f894b..6a9533e6dd831f1 100644 --- a/clang/include/clang/Driver/Multilib.h +++ b/clang/include/clang/Driver/Multilib.h @@ -39,13 +39,22 @@ class Multilib { std::string IncludeSuffix; flags_list Flags; + // Optionally, a multilib can be assigned a string tag indicating that it's + // part of a group of mutually exclusive possibilities. If two or more + // multilibs have the same non-empty value of ExclusiveGroup, then only the + // last matching one of them will be selected. + // + // Setting this to the empty string is a special case, indicating that the + // directory is not mutually exclusive with anything else. + std::string ExclusiveGroup; + public: /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the /// sysroot string so they must either be empty or begin with a '/' character. /// This is enforced with an assert in the constructor. Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {}, - StringRef IncludeSuffix = {}, - const flags_list &Flags = flags_list()); + StringRef IncludeSuffix = {}, const flags_list &Flags = flags_list(), + StringRef ExclusiveGroup = {}); /// Get the detected GCC installation path suffix for the multi-arch /// target variant. Always starts with a '/', unless empty @@ -63,6 +72,9 @@ class Multilib { /// All elements begin with either '-' or '!' const flags_list &flags() const { return Flags; } + /// Get the exclusive group label. + const std::string &exclusiveGroup() const { return ExclusiveGroup; } + LLVM_DUMP_METHOD void dump() const; /// print summary of the Multilib void print(raw_ostream &OS) const; diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp index 48a494d9fa38db5..085ccee7b25752e 100644 --- a/clang/lib/Driver/Multilib.cpp +++ b/clang/lib/Driver/Multilib.cpp @@ -29,9 +29,10 @@ using namespace driver; using namespace llvm::sys; Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix, - StringRef IncludeSuffix, const flags_list &Flags) + StringRef IncludeSuffix, const flags_list &Flags, + StringRef ExclusiveGroup) : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix), - Flags(Flags) { + Flags(Flags), ExclusiveGroup(ExclusiveGroup) { assert(GCCSuffix.empty() || (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1)); assert(OSSuffix.empty() || @@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags, llvm::SmallVector &Selected) const { llvm::StringSet<> FlagSet(expandFlags(Flags)); Selected.clear(); - llvm::copy_if(Multilibs, std::back_inserter(Selected), -[&FlagSet](const Multilib &M) { - for (const std::string &F : M.flags()) -if (!FlagSet.contains(F)) - return false; - return
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
@@ -0,0 +1,69 @@ +# REQUIRES: shell +# UNSUPPORTED: system-windows + +# RUN: rm -rf %t + +# RUN: mkdir -p %t/baremetal_multilib/bin +# RUN: ln -s %clang %t/baremetal_multilib/bin/clang + +# RUN: mkdir -p %t/baremetal_multilib/lib/clang-runtimes +# RUN: ln -s %s %t/baremetal_multilib/lib/clang-runtimes/multilib.yaml + +# RUN: %t/baremetal_multilib/bin/clang -no-canonical-prefixes -x c++ %s -### -o %t.out --target=thumbv7em-none-unknown-eabi --sysroot= 2>%t.err + +# RUN: FileCheck -DSYSROOT=%t/baremetal_multilib %s < %t.err --check-prefix=TESTDIR1_NON_EXCLUSIVE +# RUN: FileCheck -DSYSROOT=%t/baremetal_multilib %s < %t.err --check-prefix=TESTDIR2_NON_EXCLUSIVE +# RUN: FileCheck -DSYSROOT=%t/baremetal_multilib %s < %t.err --check-prefix=TESTDIR1_EXCLUSIVE +# RUN: FileCheck -DSYSROOT=%t/baremetal_multilib %s < %t.err --check-prefix=TESTDIR2_EXCLUSIVE +# RUN: FileCheck -DSYSROOT=%t/baremetal_multilib %s < %t.err --check-prefix=TESTDIR1_OWN_GROUP +# RUN: FileCheck -DSYSROOT=%t/baremetal_multilib %s < %t.err --check-prefix=TESTDIR2_OWN_GROUP + +# Expected results: +# +# Due to the Mappings section, all six of these library directories should +# match the command-line flag --target=thumbv7em-none-unknown-eabi. +# +# The two "non_exclusive" directories, which don't have an ExclusiveGroup at +# all, should both be selected. So should the two "own_group", each of which +# specifies a different value of ExclusiveGroup. But the two "exclusive", which +# have the _same_ ExclusiveGroup value, should not: the second one wins. So we +# expect five of these six directories to show up in the clang-cc1 command +# line, but not testdir1_exclusive. + +# TESTDIR1_NON_EXCLUSIVE: "-internal-isystem" "[[SYSROOT]]/bin/../lib/clang-runtimes/testdir1_non_exclusive/include/c++/v1" +# TESTDIR2_NON_EXCLUSIVE: "-internal-isystem" "[[SYSROOT]]/bin/../lib/clang-runtimes/testdir2_non_exclusive/include/c++/v1" +# TESTDIR2_EXCLUSIVE: "-internal-isystem" "[[SYSROOT]]/bin/../lib/clang-runtimes/testdir2_exclusive/include/c++/v1" +# TESTDIR1_OWN_GROUP: "-internal-isystem" "[[SYSROOT]]/bin/../lib/clang-runtimes/testdir1_own_group/include/c++/v1" +# TESTDIR2_OWN_GROUP: "-internal-isystem" "[[SYSROOT]]/bin/../lib/clang-runtimes/testdir2_own_group/include/c++/v1" statham-arm wrote: Thanks. Yes, I agree it would be nice to have a convenient way to share headers in cases where they don't have to vary. But for the cases where they do have to, this feature is still vital. (And for the cases where we just haven't got round to it yet it's still _useful_ :-) https://github.com/llvm/llvm-project/pull/69447 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
@@ -152,6 +180,7 @@ template <> struct llvm::yaml::MappingTraits { static void mapping(llvm::yaml::IO &io, MultilibSerialization &V) { io.mapRequired("Dir", V.Dir); io.mapRequired("Flags", V.Flags); +io.mapOptional("ExclusiveGroup", V.ExclusiveGroup); statham-arm wrote: Thanks, @MaskRay. I've left it as `ExclusiveGroup` for now. https://github.com/llvm/llvm-project/pull/69447 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)
https://github.com/statham-arm updated https://github.com/llvm/llvm-project/pull/69447 >From 2a65ae75e8c8e62e7275a439849837919599e896 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Thu, 14 Sep 2023 14:51:17 +0100 Subject: [PATCH 1/2] [Driver] Add ExclusiveGroup feature to multilib.yaml. This allows a YAML-based multilib configuration to specify explicitly that a subset of its library directories are alternatives to each other, i.e. at most one of that subset should be selected. So if you have multiple sysroots each including a full set of headers and libraries, you can mark them as members of the same ExclusiveGroup, and then you'll be sure that only one of them is selected, even if two or more are compatible with the compile options. This is particularly important in multilib setups including the libc++ headers, where selecting the include directories from two different sysroots can cause an actual build failure. This occurs when including , for example: libc++'s stdio.h is included first, and will try to use `#include_next` to fetch the underlying libc's version. But if there are two include directories from separate multilibs, then both of their C++ include directories will end up on the include path first, followed by both the C directories. So the `#include_next` from the first libc++ stdio.h will include the second libc++ stdio.h, which will do nothing because it has the same include guard macro, and the libc header won't ever be included at all. If more than one of the options in an ExclusiveGroup matches the given flags, the last one wins. --- clang/include/clang/Driver/Multilib.h | 16 - clang/lib/Driver/Multilib.cpp | 49 ++--- .../baremetal-multilib-exclusive-group.yaml | 69 +++ 3 files changed, 122 insertions(+), 12 deletions(-) create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml diff --git a/clang/include/clang/Driver/Multilib.h b/clang/include/clang/Driver/Multilib.h index 1416559414f894b..6a9533e6dd831f1 100644 --- a/clang/include/clang/Driver/Multilib.h +++ b/clang/include/clang/Driver/Multilib.h @@ -39,13 +39,22 @@ class Multilib { std::string IncludeSuffix; flags_list Flags; + // Optionally, a multilib can be assigned a string tag indicating that it's + // part of a group of mutually exclusive possibilities. If two or more + // multilibs have the same non-empty value of ExclusiveGroup, then only the + // last matching one of them will be selected. + // + // Setting this to the empty string is a special case, indicating that the + // directory is not mutually exclusive with anything else. + std::string ExclusiveGroup; + public: /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the /// sysroot string so they must either be empty or begin with a '/' character. /// This is enforced with an assert in the constructor. Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {}, - StringRef IncludeSuffix = {}, - const flags_list &Flags = flags_list()); + StringRef IncludeSuffix = {}, const flags_list &Flags = flags_list(), + StringRef ExclusiveGroup = {}); /// Get the detected GCC installation path suffix for the multi-arch /// target variant. Always starts with a '/', unless empty @@ -63,6 +72,9 @@ class Multilib { /// All elements begin with either '-' or '!' const flags_list &flags() const { return Flags; } + /// Get the exclusive group label. + const std::string &exclusiveGroup() const { return ExclusiveGroup; } + LLVM_DUMP_METHOD void dump() const; /// print summary of the Multilib void print(raw_ostream &OS) const; diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp index 48a494d9fa38db5..085ccee7b25752e 100644 --- a/clang/lib/Driver/Multilib.cpp +++ b/clang/lib/Driver/Multilib.cpp @@ -29,9 +29,10 @@ using namespace driver; using namespace llvm::sys; Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix, - StringRef IncludeSuffix, const flags_list &Flags) + StringRef IncludeSuffix, const flags_list &Flags, + StringRef ExclusiveGroup) : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix), - Flags(Flags) { + Flags(Flags), ExclusiveGroup(ExclusiveGroup) { assert(GCCSuffix.empty() || (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1)); assert(OSSuffix.empty() || @@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags, llvm::SmallVector &Selected) const { llvm::StringSet<> FlagSet(expandFlags(Flags)); Selected.clear(); - llvm::copy_if(Multilibs, std::back_inserter(Selected), -[&FlagSet](const Multilib &M) { - for (const std::string &F : M.flags()) -if (!FlagSet.contains(F)) - return false; - return
[clang] 4978296 - [ARM, MVE] Support -ve offsets in gather-load intrinsics.
Author: Simon Tatham Date: 2020-01-06T16:33:07Z New Revision: 4978296cd8e4d10724cfa41f0308d256c0fd490c URL: https://github.com/llvm/llvm-project/commit/4978296cd8e4d10724cfa41f0308d256c0fd490c DIFF: https://github.com/llvm/llvm-project/commit/4978296cd8e4d10724cfa41f0308d256c0fd490c.diff LOG: [ARM,MVE] Support -ve offsets in gather-load intrinsics. Summary: The ACLE intrinsics with `gather_base` or `scatter_base` in the name are wrappers on the MVE load/store instructions that take a vector of base addresses and an immediate offset. The immediate offset can be up to 127 times the alignment unit, and it can be positive or negative. At the MC layer, we got that right. But in the Sema error checking for the wrapping intrinsics, the offset was erroneously constrained to be positive. To fix this I've adjusted the `imm_mem7bit` class in the Tablegen that defines the intrinsics. But that causes integer literals like `0xfe04` to appear in the autogenerated calls to `SemaBuiltinConstantArgRange`, which provokes a compiler warning because that's out of the non-overflowing range of an `int64_t`. So I've also tweaked `MveEmitter` to emit that as `-0x1fc` instead. Updated the tests of the Sema checks themselves, and also adjusted a random sample of the CodeGen tests to actually use negative offsets and prove they get all the way through code generation without causing a crash. Reviewers: dmgreen, miyuki, MarkMurrayARM Reviewed By: dmgreen Subscribers: kristof.beyls, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D72268 Added: Modified: clang/include/clang/Basic/arm_mve_defs.td clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c clang/test/Sema/arm-mve-immediates.c clang/utils/TableGen/MveEmitter.cpp llvm/test/CodeGen/Thumb2/mve-intrinsics/scatter-gather.ll Removed: diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index 939d5eb0cd6b..6fba88df34bf 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -345,9 +345,10 @@ def imm_1248 : Immediate> { // imm_mem7bit is a valid immediate offset for a load/store intrinsic whose // memory access size is n bytes (e.g. 1 for vldrb_[whatever], 2 for vldrh, -// ...). The set of valid immediates for these is {0*n, 1*n, ..., 127*n}. +// ...). The set of valid immediates for these is {-127*n, ..., -1*n, 0*n, 1*n, +// ..., 127*n}. class imm_mem7bit - : Immediate> { + : Immediate> { let extra = !if(!eq(membytes, 1), ?, "Multiple"); let extraarg = !cast(membytes); } diff --git a/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c b/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c index 8bf2111a9e63..564965acc04d 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c @@ -196,12 +196,12 @@ int64x2_t test_vldrdq_gather_base_s64(uint64x2_t addr) // CHECK-LABEL: @test_vldrdq_gather_base_u64( // CHECK-NEXT: entry: -// CHECK-NEXT:[[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> [[ADDR:%.*]], i32 336) +// CHECK-NEXT:[[TMP0:%.*]] = call <2 x i64> @llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> [[ADDR:%.*]], i32 -336) // CHECK-NEXT:ret <2 x i64> [[TMP0]] // uint64x2_t test_vldrdq_gather_base_u64(uint64x2_t addr) { -return vldrdq_gather_base_u64(addr, 0x150); +return vldrdq_gather_base_u64(addr, -0x150); } // CHECK-LABEL: @test_vldrdq_gather_base_wb_s64( @@ -221,7 +221,7 @@ int64x2_t test_vldrdq_gather_base_wb_s64(uint64x2_t *addr) // CHECK-LABEL: @test_vldrdq_gather_base_wb_u64( // CHECK-NEXT: entry: // CHECK-NEXT:[[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], align 8 -// CHECK-NEXT:[[TMP1:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 328) +// CHECK-NEXT:[[TMP1:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 -328) // CHECK-NEXT:[[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP1]], 1 // CHECK-NEXT:store <2 x i64> [[TMP2]], <2 x i64>* [[ADDR]], align 8 // CHECK-NEXT:[[TMP3:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP1]], 0 @@ -229,7 +229,7 @@ int64x2_t test_vldrdq_gather_base_wb_s64(uint64x2_t *addr) // uint64x2_t test_vldrdq_gather_base_wb_u64(uint64x2_t *addr) { -return vldrdq_gather_base_wb_u64(addr, 0x148); +return vldrdq_gather_base_wb_u64(addr, -0x148); } // CHECK-LABEL: @test_vldrdq_gather_base_wb_z_s64( @@ -280,12 +280,12 @@ int64x2_t test_vldrdq_gather_base_z_s64(uint64x2_t addr, mve_pred16_t p) // CHECK-NEXT: entry: // CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT:[[TMP1:%.*]] = call <4 x i1> @llvm.ar
[clang] d857e11 - [ARM,MVE] Fix valid immediate range for vsliq_n.
Author: Simon Tatham Date: 2020-01-09T15:04:47Z New Revision: d857e114b5e04f5143485a5aea7ad9b283768692 URL: https://github.com/llvm/llvm-project/commit/d857e114b5e04f5143485a5aea7ad9b283768692 DIFF: https://github.com/llvm/llvm-project/commit/d857e114b5e04f5143485a5aea7ad9b283768692.diff LOG: [ARM,MVE] Fix valid immediate range for vsliq_n. In common with most MVE immediate shift instructions, the left shift takes an immediate in the range [0,n-1], while the right shift takes one in the range [1,n]. I had absent-mindedly made them both the latter. While I'm here, I've added a set of regression tests checking both ends of the immediate range for a representative sample of the immediate shifts. Added: Modified: clang/include/clang/Basic/arm_mve.td clang/test/Sema/arm-mve-immediates.c Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 87091a325071..86a04e33ce76 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -684,7 +684,7 @@ let params = [s16, s32], pnt = PNT_NType in { defm vqrshrun : VSHRN; } let params = T.Int, pnt = PNT_NType in { - defm vsli : DyadicImmShift; + defm vsli : DyadicImmShift; defm vsri : DyadicImmShift; } diff --git a/clang/test/Sema/arm-mve-immediates.c b/clang/test/Sema/arm-mve-immediates.c index 54cdb96efcd3..b8106fbb7028 100644 --- a/clang/test/Sema/arm-mve-immediates.c +++ b/clang/test/Sema/arm-mve-immediates.c @@ -110,3 +110,96 @@ void test_lane_indices(uint8x16_t v16, uint16x8_t v8, vsetq_lane_u64(23, v2, 1); vsetq_lane_u64(23, v2, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}} } + +void test_immediate_shifts(uint8x16_t vb, uint16x8_t vh, uint32x4_t vw) +{ + vshlq_n(vb, 0); + vshlq_n(vb, 7); + vshlq_n(vh, 0); + vshlq_n(vh, 15); + vshlq_n(vw, 0); + vshlq_n(vw, 31); + + vshlq_n(vb, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} + vshlq_n(vb, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + vshlq_n(vh, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}} + vshlq_n(vh, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + vshlq_n(vw, -1); // expected-error {{argument value -1 is outside the valid range [0, 31]}} + vshlq_n(vw, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + + vqshlq_n(vb, 0); + vqshlq_n(vb, 7); + vqshlq_n(vh, 0); + vqshlq_n(vh, 15); + vqshlq_n(vw, 0); + vqshlq_n(vw, 31); + + vqshlq_n(vb, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} + vqshlq_n(vb, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + vqshlq_n(vh, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}} + vqshlq_n(vh, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + vqshlq_n(vw, -1); // expected-error {{argument value -1 is outside the valid range [0, 31]}} + vqshlq_n(vw, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + + vsliq(vb, vb, 0); + vsliq(vb, vb, 7); + vsliq(vh, vh, 0); + vsliq(vh, vh, 15); + vsliq(vw, vw, 0); + vsliq(vw, vw, 31); + + vsliq(vb, vb, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}} + vsliq(vb, vb, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}} + vsliq(vh, vh, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}} + vsliq(vh, vh, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}} + vsliq(vw, vw, -1); // expected-error {{argument value -1 is outside the valid range [0, 31]}} + vsliq(vw, vw, 32); // expected-error {{argument value 32 is outside the valid range [0, 31]}} + + vshllbq(vb, 1); + vshllbq(vb, 8); + vshllbq(vh, 1); + vshllbq(vh, 16); + + vshllbq(vb, 0); // expected-error {{argument value 0 is outside the valid range [1, 8]}} + vshllbq(vb, 9); // expected-error {{argument value 9 is outside the valid range [1, 8]}} + vshllbq(vh, 0); // expected-error {{argument value 0 is outside the valid range [1, 16]}} + vshllbq(vh, 17); // expected-error {{argument value 17 is outside the valid range [1, 16]}} + + vshrq(vb, 1); + vshrq(vb, 8); + vshrq(vh, 1); + vshrq(vh, 16); + vshrq(vw, 1); + vshrq(vw, 32); + + vshrq(vb, 0); // expected-error {{argument value 0 is outside the valid range [1, 8]}} + vshrq(vb, 9); // expected-error {{argument value 9 is outside the valid range [1, 8]}} + vshrq(vh, 0); // expected-error {{argument value 0 is outside the valid range [1, 16]}} + vshrq(vh, 17); // expected-error {{argument value 17 is outside the valid range [1, 16]}} + vshrq(vw, 0); // expected-error {{argument value 0 is outside the valid rang
[clang] 06d07ec - [Clang] Handle target-specific builtins returning aggregates.
Author: Simon Tatham Date: 2020-01-09T17:28:37Z New Revision: 06d07ec4a372b55e6fb77bf0b97964bde16a3184 URL: https://github.com/llvm/llvm-project/commit/06d07ec4a372b55e6fb77bf0b97964bde16a3184 DIFF: https://github.com/llvm/llvm-project/commit/06d07ec4a372b55e6fb77bf0b97964bde16a3184.diff LOG: [Clang] Handle target-specific builtins returning aggregates. Summary: A few of the ARM MVE builtins directly return a structure type. This causes an assertion failure at code-gen time if you try to assign the result of the builtin to a variable, because the `RValue` created in `EmitBuiltinExpr` from the `llvm::Value` produced by codegen is always made by `RValue::get()`, which creates a non-aggregate `RValue` that will fail an assertion when `AggExprEmitter::withReturnValueSlot` calls `Src.getAggregatePointer()`. A similar failure occurs if you try to use the struct return value directly to extract one field, e.g. `vld2q(address).val[0]`. The existing code-gen tests for those MVE builtins pass the returned structure type directly to the C `return` statement, which apparently managed to avoid that particular code path, so we didn't notice the crash. Now `EmitBuiltinExpr` checks the evaluation kind of the builtin's return value, and does the necessary handling for aggregate returns. I've added two extra test cases, both of which crashed before this change. Reviewers: dmgreen, rjmccall Reviewed By: rjmccall Subscribers: kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D72271 Added: Modified: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/arm-mve-intrinsics/vld24.c Removed: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 3fadf09c460d..2842fe826636 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4332,9 +4332,29 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(V); } - // See if we have a target specific builtin that needs to be lowered. - if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) -return RValue::get(V); + // Some target-specific builtins can have aggregate return values, e.g. + // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force + // ReturnValue to be non-null, so that the target-specific emission code can + // always just emit into it. + TypeEvaluationKind EvalKind = getEvaluationKind(E->getType()); + if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) { +Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp"); +ReturnValue = ReturnValueSlot(DestPtr, false); + } + + // Now see if we can emit a target-specific builtin. + if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) { +switch (EvalKind) { +case TEK_Scalar: + return RValue::get(V); +case TEK_Aggregate: + return RValue::getAggregate(ReturnValue.getValue(), + ReturnValue.isVolatile()); +case TEK_Complex: + llvm_unreachable("No current target builtin returns complex"); +} +llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr"); + } ErrorUnsupported(E, "builtin function"); diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vld24.c b/clang/test/CodeGen/arm-mve-intrinsics/vld24.c index 984d5989217e..a0f37fe65d3d 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vld24.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vld24.c @@ -98,3 +98,45 @@ void test_vst2q_f16(float16_t *addr, float16x8x2_t value) vst2q_f16(addr, value); #endif /* POLYMORPHIC */ } + +// CHECK-LABEL: @load_into_variable( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.arm.mve.vld2q.v8i16.p0i16(i16* [[ADDR:%.*]]) +// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP0]], 0 +// CHECK-NEXT:[[TMP2:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T:%.*]] undef, <8 x i16> [[TMP1]], 0, 0 +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP0]], 1 +// CHECK-NEXT:[[TMP4:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] [[TMP2]], <8 x i16> [[TMP3]], 0, 1 +// CHECK-NEXT:store <8 x i16> [[TMP1]], <8 x i16>* [[VALUES:%.*]], align 8 +// CHECK-NEXT:[[ARRAYIDX4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[VALUES]], i32 1 +// CHECK-NEXT:store <8 x i16> [[TMP3]], <8 x i16>* [[ARRAYIDX4]], align 8 +// CHECK-NEXT:ret void +// +void load_into_variable(const uint16_t *addr, uint16x8_t *values) +{ +uint16x8x2_t v; +#ifdef POLYMORPHIC +v = vld2q(addr); +#else /* POLYMORPHIC */ +v = vld2q_u16(addr); +#endif /* POLYMORPHIC */ +values[0] = v.val[0]; +values[1] = v.val[1]; +} + +// CHECK-LABEL: @extract_one_vector( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.arm.mve.vld2q.v4i32.
[clang] 1ccee0e - [ARM, MVE] Make `vqrshrun` generate the right instruction.
Author: Simon Tatham Date: 2020-01-10T11:25:05Z New Revision: 1ccee0e86386762bd742fd067391b6c4be089806 URL: https://github.com/llvm/llvm-project/commit/1ccee0e86386762bd742fd067391b6c4be089806 DIFF: https://github.com/llvm/llvm-project/commit/1ccee0e86386762bd742fd067391b6c4be089806.diff LOG: [ARM,MVE] Make `vqrshrun` generate the right instruction. Summary: A copy-paste error in `arm_mve.td` meant that the MVE `vqrshrun` intrinsic family was generating the `vqshrun` machine instruction, because in the IR intrinsic call, the rounding flag argument was set to 0 rather than 1. Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard Reviewed By: dmgreen Subscribers: kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D72496 Added: Modified: clang/include/clang/Basic/arm_mve.td clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm-dyadic.c Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 86a04e33ce76..6d0bb96cba6f 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -681,7 +681,7 @@ let params = [s16, s32, u16, u32], pnt = PNT_NType in { } let params = [s16, s32], pnt = PNT_NType in { defm vqshrun : VSHRN; - defm vqrshrun : VSHRN; + defm vqrshrun : VSHRN; } let params = T.Int, pnt = PNT_NType in { defm vsli : DyadicImmShift; diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm-dyadic.c b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm-dyadic.c index 3d4f77b99d74..c5591392e373 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm-dyadic.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm-dyadic.c @@ -1086,7 +1086,7 @@ uint16x8_t test_vqrshrntq_m_n_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p) // CHECK-LABEL: @test_vqrshrunbq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 7, i32 1, i32 0, i32 1, i32 0, i32 0) +// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 7, i32 1, i32 1, i32 1, i32 0, i32 0) // CHECK-NEXT:ret <16 x i8> [[TMP0]] // uint8x16_t test_vqrshrunbq_n_s16(uint8x16_t a, int16x8_t b) @@ -1100,7 +1100,7 @@ uint8x16_t test_vqrshrunbq_n_s16(uint8x16_t a, int16x8_t b) // CHECK-LABEL: @test_vqrshrunbq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, i32 1, i32 0, i32 1, i32 0, i32 0) +// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, i32 1, i32 1, i32 1, i32 0, i32 0) // CHECK-NEXT:ret <8 x i16> [[TMP0]] // uint16x8_t test_vqrshrunbq_n_s32(uint16x8_t a, int32x4_t b) @@ -1114,7 +1114,7 @@ uint16x8_t test_vqrshrunbq_n_s32(uint16x8_t a, int32x4_t b) // CHECK-LABEL: @test_vqrshruntq_n_s16( // CHECK-NEXT: entry: -// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 1, i32 0, i32 1, i32 0, i32 1) +// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 1, i32 1, i32 1, i32 0, i32 1) // CHECK-NEXT:ret <16 x i8> [[TMP0]] // uint8x16_t test_vqrshruntq_n_s16(uint8x16_t a, int16x8_t b) @@ -1128,7 +1128,7 @@ uint8x16_t test_vqrshruntq_n_s16(uint8x16_t a, int16x8_t b) // CHECK-LABEL: @test_vqrshruntq_n_s32( // CHECK-NEXT: entry: -// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 3, i32 1, i32 0, i32 1, i32 0, i32 1) +// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 3, i32 1, i32 1, i32 1, i32 0, i32 1) // CHECK-NEXT:ret <8 x i16> [[TMP0]] // uint16x8_t test_vqrshruntq_n_s32(uint16x8_t a, int32x4_t b) @@ -1144,7 +1144,7 @@ uint16x8_t test_vqrshruntq_n_s32(uint16x8_t a, int32x4_t b) // CHECK-NEXT: entry: // CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 // CHECK-NEXT:[[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT:[[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 4, i32 1, i32 0, i32 1, i32 0, i32 0, <8 x i1> [[TMP1]]) +// CHECK-NEXT:[[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 4, i32 1, i32 1, i32 1, i32 0, i32 0, <8 x i1> [[TMP1]]) // CHECK-NEXT:ret <16 x i8> [[TMP2]] // uint8x16_t test_vqrshrunbq_m_n_s16(uint8x16_t a, int16x8_t b, mve_pred16_t p
[clang] 71d5454 - [ARM, MVE] Use the new Tablegen `defvar` and `if` statements.
Author: Simon Tatham Date: 2020-01-14T12:08:03Z New Revision: 71d5454b377239213874a0d762860e6a3e60bf54 URL: https://github.com/llvm/llvm-project/commit/71d5454b377239213874a0d762860e6a3e60bf54 DIFF: https://github.com/llvm/llvm-project/commit/71d5454b377239213874a0d762860e6a3e60bf54.diff LOG: [ARM,MVE] Use the new Tablegen `defvar` and `if` statements. Summary: This cleans up a lot of ugly `foreach` bodges that I've been using to work around the lack of those two language features. Now they both exist, I can make then all into something more legible! In particular, in the common pattern in `ARMInstrMVE.td` where a multiclass defines an `Instruction` instance plus one or more `Pat` that select it, I've used a `defvar` to wrap `!cast(NAME)` so that the patterns themselves become a little more legible. Replacing a `foreach` with a `defvar` removes a level of block structure, so several pieces of code have their indentation changed by this patch. Best viewed with whitespace ignored. NFC: the output of `llvm-tblgen -print-records` on the two affected Tablegen sources is exactly identical before and after this change, so there should be no effect at all on any of the other generated files. Reviewers: MarkMurrayARM, miyuki Reviewed By: MarkMurrayARM Subscribers: kristof.beyls, hiraditya, dmgreen, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D72690 Added: Modified: clang/include/clang/Basic/arm_mve.td llvm/lib/Target/ARM/ARMInstrMVE.td Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 6d0bb96cba6f..0e023b85459c 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -212,20 +212,17 @@ def vmaxvq: Intrinsic $prev, $vec))>; } -foreach half = [ "b", "t" ] in -foreach halfconst = [ !if(!eq(half, "b"), 0, 1) ] in { - -let params = [f32], pnt = PNT_None in { - -def vcvt#half#q_f16: Intrinsic< -VecOf, (args VecOf:$inactive, Vector:$a), -(IRInt<"vcvt_narrow"> $inactive, $a, halfconst)>; -def vcvt#half#q_m_f16: Intrinsic< -VecOf, (args VecOf:$inactive, Vector:$a, PredOf:$pred), -(IRInt<"vcvt_narrow_predicated"> $inactive, $a, halfconst, $pred)>; - -} // params = [f32], pnt = PNT_None - +foreach half = [ "b", "t" ] in { + defvar halfconst = !if(!eq(half, "b"), 0, 1); + + let params = [f32], pnt = PNT_None in { +def vcvt#half#q_f16: Intrinsic< + VecOf, (args VecOf:$inactive, Vector:$a), + (IRInt<"vcvt_narrow"> $inactive, $a, halfconst)>; +def vcvt#half#q_m_f16: Intrinsic< + VecOf, (args VecOf:$inactive, Vector:$a, PredOf:$pred), + (IRInt<"vcvt_narrow_predicated"> $inactive, $a, halfconst, $pred)>; + } // params = [f32], pnt = PNT_None } // loop over half = "b", "t" multiclass compare_with_pred; multiclass DyadicImmShift { - foreach intparams = [!if(!eq(!cast(outtype), !cast(Vector)), - [Vector], [outtype, Vector])] in { -def q_n: Intrinsic< -outtype, (args outtype:$a, Vector:$b, imm:$sh), -!con((IRInt $a, $b, $sh), extraargs)>; - -def q_m_n: Intrinsic< -outtype, (args outtype:$a, Vector:$b, imm:$sh, Predicate:$pred), -!con((IRInt - $a, $b, $sh), extraargs, (? $pred))>; - } + defvar intparams = !if(!eq(!cast(outtype), !cast(Vector)), + [Vector], [outtype, Vector]); + + def q_n: Intrinsic< + outtype, (args outtype:$a, Vector:$b, imm:$sh), + !con((IRInt $a, $b, $sh), extraargs)>; + + def q_m_n: Intrinsic< + outtype, (args outtype:$a, Vector:$b, imm:$sh, Predicate:$pred), + !con((IRInt + $a, $b, $sh), extraargs, (? $pred))>; } multiclass VSHRN { @@ -672,12 +669,11 @@ multiclass VSHRN { } let params = [s16, s32, u16, u32], pnt = PNT_NType in { - foreach U = [(unsignedflag Scalar)] in { -defm vshrn : VSHRN; -defm vqshrn : VSHRN; -defm vrshrn : VSHRN; -defm vqrshrn : VSHRN; - } + defvar U = (unsignedflag Scalar); + defm vshrn : VSHRN; + defm vqshrn : VSHRN; + defm vrshrn : VSHRN; + defm vqrshrn : VSHRN; } let params = [s16, s32], pnt = PNT_NType in { defm vqshrun : VSHRN; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 325c9153491d..604291be822c 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -594,25 +594,24 @@ class MVE_VABAV size> multiclass MVE_VABAV_m { def "" : MVE_VABAV; + defvar Inst = !cast(NAME); let Predicates = [HasMVEInt] in { def : Pat<(i32 (int_arm_mve_vabav -(i32 VTI.Unsigned), -(i32 rGPR:$Rda_src), -(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))), - (i32 (!cast(NAME) -(i32 rGPR:$Rda_src), -
[clang] ada01d1 - [clang] New __attribute__((__clang_arm_mve_strict_polymorphism)).
Author: Simon Tatham Date: 2020-01-15T15:04:10Z New Revision: ada01d1b869763f7d5d3438dcfce02066b06ab0a URL: https://github.com/llvm/llvm-project/commit/ada01d1b869763f7d5d3438dcfce02066b06ab0a DIFF: https://github.com/llvm/llvm-project/commit/ada01d1b869763f7d5d3438dcfce02066b06ab0a.diff LOG: [clang] New __attribute__((__clang_arm_mve_strict_polymorphism)). This is applied to the vector types defined in for use with the intrinsics for the ARM MVE vector architecture. Its purpose is to inhibit lax vector conversions, but only in the context of overload resolution of the MVE polymorphic intrinsic functions. This solves an ambiguity problem with polymorphic MVE intrinsics that take a vector and a scalar argument: the scalar argument can often have the wrong integer type due to default integer promotions or unsuffixed literals, and therefore, the type of the vector argument should be considered trustworthy when resolving MVE polymorphism. As part of the same change, I've added the new attribute to the declarations generated by the MveEmitter Tablegen backend (and corrected a namespace issue with the other attribute while I was there). Reviewers: aaron.ballman, dmgreen Reviewed By: aaron.ballman Subscribers: kristof.beyls, JDevlieghere, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D72518 Added: clang/test/Sema/overload-arm-mve.c Modified: clang/include/clang/Basic/Attr.td clang/include/clang/Basic/AttrDocs.td clang/include/clang/Basic/DiagnosticSemaKinds.td clang/lib/AST/TypePrinter.cpp clang/lib/Sema/SemaOverload.cpp clang/lib/Sema/SemaType.cpp clang/utils/TableGen/MveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 16556b5f0745..10db2a868dce 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1479,6 +1479,11 @@ def NeonVectorType : TypeAttr { let ASTNode = 0; } +def ArmMveStrictPolymorphism : TypeAttr, TargetSpecificAttr { + let Spellings = [Clang<"__clang_arm_mve_strict_polymorphism">]; + let Documentation = [ArmMveStrictPolymorphismDocs]; +} + def NoUniqueAddress : InheritableAttr, TargetSpecificAttr { let Spellings = [CXX11<"", "no_unique_address", 201803>]; let Subjects = SubjectList<[NonBitField], ErrorDiag>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 03d36ae7ab32..456edd1daafc 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4789,3 +4789,45 @@ close the handle. It is also assumed to require an open handle to work with. zx_status_t zx_handle_close(zx_handle_t handle [[clang::release_handle]]); }]; } + +def ArmMveStrictPolymorphismDocs : Documentation { +let Category = DocCatType; +let Content = [{ +This attribute is used in the implementation of the ACLE intrinsics for the Arm +MVE instruction set. It is used to define the vector types used by the MVE +intrinsics. + +Its effect is to modify the behavior of a vector type with respect to function +overloading. If a candidate function for overload resolution has a parameter +type with this attribute, then the selection of that candidate function will be +disallowed if the actual argument can only be converted via a lax vector +conversion. The aim is to prevent spurious ambiguity in ARM MVE polymorphic +intrinsics. + +.. code-block:: c++ + + void overloaded(uint16x8_t vector, uint16_t scalar); + void overloaded(int32x4_t vector, int32_t scalar); + uint16x8_t myVector; + uint16_t myScalar; + + // myScalar is promoted to int32_t as a side effect of the addition, + // so if lax vector conversions are considered for myVector, then + // the two overloads are equally good (one argument conversion + // each). But if the vector has the __clang_arm_mve_strict_polymorphism + // attribute, only the uint16x8_t,uint16_t overload will match. + overloaded(myVector, myScalar + 1); + +However, this attribute does not prohibit lax vector conversions in contexts +other than overloading. + +.. code-block:: c++ + + uint16x8_t function(); + + // This is still permitted with lax vector conversion enabled, even + // if the vector types have __clang_arm_mve_strict_polymorphism + int32x4_t result = function(); + +}]; +} diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 7d8231d140e4..ffa326932a1c 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6593,6 +6593,8 @@ def note_objc_unsafe_perform_selector_method_declared_here : Note< "method %0 that returns %1 declared here">; def err_attribute_arm_mve_alias : Error< "'__clang_arm_mve_alias' attribute can only be applied to an ARM MVE builtin">; +def err_attribute_arm_mv
[clang] fd569a1 - [libclang] Fix error handler in translateSourceLocation.
Author: Simon Tatham Date: 2021-06-18T13:43:14+01:00 New Revision: fd569a11b585d13cdceac2d890c2beda0fa5f0eb URL: https://github.com/llvm/llvm-project/commit/fd569a11b585d13cdceac2d890c2beda0fa5f0eb DIFF: https://github.com/llvm/llvm-project/commit/fd569a11b585d13cdceac2d890c2beda0fa5f0eb.diff LOG: [libclang] Fix error handler in translateSourceLocation. Given an invalid SourceLocation, translateSourceLocation will call clang_getNullLocation, and then do nothing with the result. But clang_getNullLocation has no side effects: it just constructs and returns a null CXSourceLocation value. Surely the intention was to //return// that null CXSourceLocation to the caller, instead of throwing it away and pressing on anyway. Reviewed By: miyuki Differential Revision: https://reviews.llvm.org/D104442 Added: Modified: clang/tools/libclang/CXSourceLocation.h Removed: diff --git a/clang/tools/libclang/CXSourceLocation.h b/clang/tools/libclang/CXSourceLocation.h index ce3d09e1c9eb8..c86f6850375bb 100644 --- a/clang/tools/libclang/CXSourceLocation.h +++ b/clang/tools/libclang/CXSourceLocation.h @@ -29,7 +29,7 @@ static inline CXSourceLocation translateSourceLocation(const SourceManager &SM, const LangOptions &LangOpts, SourceLocation Loc) { if (Loc.isInvalid()) -clang_getNullLocation(); +return clang_getNullLocation(); CXSourceLocation Result = { { &SM, &LangOpts, }, Loc.getRawEncoding() }; ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] e49985b - Remove unused parameter from parseMSInlineAsm.
Author: Simon Tatham Date: 2021-07-12T15:07:03+01:00 New Revision: e49985bb6065d4f5ea69fe578e326ec6d43a6b24 URL: https://github.com/llvm/llvm-project/commit/e49985bb6065d4f5ea69fe578e326ec6d43a6b24 DIFF: https://github.com/llvm/llvm-project/commit/e49985bb6065d4f5ea69fe578e326ec6d43a6b24.diff LOG: Remove unused parameter from parseMSInlineAsm. No implementation uses the `LocCookie` parameter at all. Errors are reported from inside that function by `llvm::SourceMgr`, and the instance of that at the clang call site arranges to pass the error messages back to a `ClangAsmParserCallback`, which is where the clang SourceLocation for the error is computed. (This is part of a patch series working towards the ability to make SourceLocation into a 64-bit type to handle larger translation units. But this particular change seems beneficial in its own right.) Reviewed By: miyuki Differential Revision: https://reviews.llvm.org/D105490 Added: Modified: clang/lib/Parse/ParseStmtAsm.cpp llvm/include/llvm/MC/MCParser/MCAsmParser.h llvm/lib/MC/MCParser/AsmParser.cpp llvm/lib/MC/MCParser/MasmParser.cpp Removed: diff --git a/clang/lib/Parse/ParseStmtAsm.cpp b/clang/lib/Parse/ParseStmtAsm.cpp index 9037895a3bbfc..e520151dcad76 100644 --- a/clang/lib/Parse/ParseStmtAsm.cpp +++ b/clang/lib/Parse/ParseStmtAsm.cpp @@ -633,9 +633,9 @@ StmtResult Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) { SmallVector, 4> OpExprs; SmallVector Constraints; SmallVector Clobbers; - if (Parser->parseMSInlineAsm(AsmLoc.getPtrEncoding(), AsmStringIR, NumOutputs, - NumInputs, OpExprs, Constraints, Clobbers, - MII.get(), IP.get(), Callback)) + if (Parser->parseMSInlineAsm(AsmStringIR, NumOutputs, NumInputs, OpExprs, + Constraints, Clobbers, MII.get(), IP.get(), + Callback)) return StmtError(); // Filter out "fpsw" and "mxcsr". They aren't valid GCC asm clobber diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h index 56188b7ebaec7..c9b3ab3256da8 100644 --- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h +++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h @@ -202,8 +202,8 @@ class MCAsmParser { /// Parse MS-style inline assembly. virtual bool parseMSInlineAsm( - void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, - unsigned &NumInputs, SmallVectorImpl> &OpDecls, + std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, + SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints, SmallVectorImpl &Clobbers, const MCInstrInfo *MII, const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) = 0; diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index 3bc668e699cbc..45e6dfee4ca4b 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -258,9 +258,9 @@ class AsmParser : public MCAsmParser { return LTODiscardSymbols.contains(Name); } - bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString, -unsigned &NumOutputs, unsigned &NumInputs, -SmallVectorImpl> &OpDecls, + bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs, +unsigned &NumInputs, +SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints, SmallVectorImpl &Clobbers, const MCInstrInfo *MII, const MCInstPrinter *IP, @@ -5927,8 +5927,8 @@ static int rewritesSort(const AsmRewrite *AsmRewriteA, } bool AsmParser::parseMSInlineAsm( -void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, -unsigned &NumInputs, SmallVectorImpl> &OpDecls, +std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs, +SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints, SmallVectorImpl &Clobbers, const MCInstrInfo *MII, const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) { diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index a91623770116a..a5a48d4b1e9d8 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -514,9 +514,9 @@ class MasmParser : public MCAsmParser { bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override; - bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString, -unsigned &NumOutputs, unsigned &NumInputs, -SmallVectorImpl> &OpDecls, + bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs, +unsigned &NumInputs, +SmallVectorImpl> &OpDecls, SmallVectorImpl &Constraints,
[clang] 60ea6f3 - [ARM] Allow selecting hard-float ABI in integer-only MVE.
Author: Simon Tatham Date: 2023-02-01T09:05:12Z New Revision: 60ea6f35a270d11c91770a2fc366888e7d3859f4 URL: https://github.com/llvm/llvm-project/commit/60ea6f35a270d11c91770a2fc366888e7d3859f4 DIFF: https://github.com/llvm/llvm-project/commit/60ea6f35a270d11c91770a2fc366888e7d3859f4.diff LOG: [ARM] Allow selecting hard-float ABI in integer-only MVE. Armv8.1-M can be configured to support the integer subset of the MVE vector instructions, and no floating point. In that situation, the FP and vector registers still exist, and so do the load, store and move instructions that transfer data in and out of them. So there's no reason the hard floating point ABI can't be supported, and you might reasonably want to use it, for the sake of intrinsics-based code passing explicit MVE vector types between functions. But the selection of the hard float ABI in the backend was gated on Subtarget->hasVFP2Base(), which is false in the case of integer MVE and no FP. As a result, you'd silently get the soft float ABI even if you deliberately tried to select it, e.g. with clang options such as --target=arm-none-eabi -mfloat-abi=hard -march=armv8.1m.main+nofp+mve The hard float ABI should have been gated on the weaker condition Subtarget->hasFPRegs(), because the only requirement for being able to pass arguments in the FP registers is that the registers themselves should exist. I haven't added a new test, because changing the existing CodeGen/Thumb2/float-ops.ll test seemed sufficient. But I've added a comment explaining why the results are expected to be what they are. Reviewed By: lenary Differential Revision: https://reviews.llvm.org/D142703 Added: Modified: clang/docs/ReleaseNotes.rst llvm/docs/ReleaseNotes.rst llvm/lib/Target/ARM/ARMFastISel.cpp llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/test/CodeGen/Thumb2/float-ops.ll Removed: diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c6139252e0c34..2b2ca8b2987f0 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -163,6 +163,13 @@ DWARF Support in Clang Arm and AArch64 Support in Clang +* The hard-float ABI is now available in Armv8.1-M configurations that + have integer MVE instructions (and therefore have FP registers) but + no scalar or vector floating point computation. Previously, trying + to select the hard-float ABI on such a target (via + ``-mfloat-abi=hard`` or a triple ending in ``hf``) would silently + use the soft-float ABI instead. + Floating Point Support in Clang --- diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index d628257a76904..a3f02992048a4 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -71,6 +71,10 @@ Changes to the AMDGPU Backend Changes to the ARM Backend -- +- The hard-float ABI is now available in Armv8.1-M configurations that + have integer MVE instructions (and therefore have FP registers) but + no scalar or vector floating point computation. + Changes to the AVR Backend -- diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp index 62a090f4bca81..60a6e9ade9234 100644 --- a/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -1842,7 +1842,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, case CallingConv::CXX_FAST_TLS: // Use target triple & subtarget features to do actual dispatch. if (Subtarget->isAAPCS_ABI()) { - if (Subtarget->hasVFP2Base() && + if (Subtarget->hasFPRegs() && TM.Options.FloatABIType == FloatABI::Hard && !isVarArg) return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); else diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 8a28e6b4e4fd2..07fa829731563 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2081,7 +2081,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, case CallingConv::Tail: if (!Subtarget->isAAPCS_ABI()) return CallingConv::ARM_APCS; -else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && +else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() && getTargetMachine().Options.FloatABIType == FloatABI::Hard && !isVarArg) return CallingConv::ARM_AAPCS_VFP; diff --git a/llvm/test/CodeGen/Thumb2/float-ops.ll b/llvm/test/CodeGen/Thumb2/float-ops.ll index 51f18afaf0a46..d2b1dd6f05a3f 100644 --- a/llvm/test/CodeGen/Thumb2/float-ops.ll +++ b/llvm/test/CodeGen/Thumb2/float-ops.ll @@ -83,7 +83,7 @@ entry: define float @rem_f(float %a, float %b) { entry: ; CHECK-LABEL: rem_f: -; NONE: bl fmodf +; NONE: {{b|bl}} fmodf ; HARD: b fmod
[clang] ceb21fa - [ARM] Fix how size-0 bitfields affect homogeneous aggregates.
Author: Simon Tatham Date: 2022-06-10T11:27:24+01:00 New Revision: ceb21fa4e49ddc8478371b41250f206082c5c67e URL: https://github.com/llvm/llvm-project/commit/ceb21fa4e49ddc8478371b41250f206082c5c67e DIFF: https://github.com/llvm/llvm-project/commit/ceb21fa4e49ddc8478371b41250f206082c5c67e.diff LOG: [ARM] Fix how size-0 bitfields affect homogeneous aggregates. By both AAPCS32 and AAPCS64, the test for whether an aggregate qualifies as homogeneous (either HFA or HVA) is based on the data layout alone. So any logical member of the structure that does not affect the data layout also should not affect homogeneity. In particular, an empty bitfield ('int : 0') should make no difference. In fact, clang considered it to make a difference in C but not in C++, and justified that policy as compatible with gcc. But that's considered a bug in gcc as well (at least for Arm targets), and it's fixed in gcc 12.1. This fix mimics gcc's: zero-sized bitfields are now ignored in all languages for the Arm (32- and 64-bit) ABIs. But I've left the previous behaviour unchanged in other ABIs, by means of adding an ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate query method which the Arm subclasses override. Reviewed By: lenary Differential Revision: https://reviews.llvm.org/D127197 Added: clang/test/CodeGen/homogeneous-aggregates.c Modified: clang/lib/CodeGen/ABIInfo.h clang/lib/CodeGen/TargetInfo.cpp Removed: diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h index 0d12183055e18..6214148adab93 100644 --- a/clang/lib/CodeGen/ABIInfo.h +++ b/clang/lib/CodeGen/ABIInfo.h @@ -100,6 +100,7 @@ namespace swiftcall { virtual bool isHomogeneousAggregateSmallEnough(const Type *Base, uint64_t Members) const; +virtual bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const; bool isHomogeneousAggregate(QualType Ty, const Type *&Base, uint64_t &Members) const; diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 5e97a946782ca..d481d1c2857bc 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -240,6 +240,11 @@ bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, return false; } +bool ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() const { + // For compatibility with GCC, ignore empty bitfields in C++ mode. + return getContext().getLangOpts().CPlusPlus; +} + LLVM_DUMP_METHOD void ABIArgInfo::dump() const { raw_ostream &OS = llvm::errs(); OS << "(ABIArgInfo Kind="; @@ -5213,8 +5218,7 @@ bool ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base, if (isEmptyRecord(getContext(), FT, true)) continue; - // For compatibility with GCC, ignore empty bitfields in C++ mode. - if (getContext().getLangOpts().CPlusPlus && + if (isZeroLengthBitfieldPermittedInHomogeneousAggregate() && FD->isZeroLengthBitField(getContext())) continue; @@ -5511,6 +5515,7 @@ class AArch64ABIInfo : public SwiftABIInfo { bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; + bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override; bool isIllegalVectorType(QualType Ty) const; @@ -5970,6 +5975,16 @@ bool AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, return Members <= 4; } +bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() +const { + // AAPCS64 says that the rule for whether something is a homogeneous + // aggregate is applied to the output of the data layout decision. So + // anything that doesn't affect the data layout also does not affect + // homogeneity. In particular, zero-length bitfields don't stop a struct + // being homogeneous. + return true; +} + Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF) const { ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true, @@ -6339,6 +6354,7 @@ class ARMABIInfo : public SwiftABIInfo { bool isHomogeneousAggregateBaseType(QualType Ty) const override; bool isHomogeneousAggregateSmallEnough(const Type *Ty, uint64_t Members) const override; + bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override; bool isEffectivelyAAPCS_VFP(unsigned callConvention, bool acceptHalf) const; @@ -7002,6 +7018,15 @@ bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base, return Members <= 4; } +bool ARMABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() const { + // AAPCS32 says that the rule for w
[clang] 9073b53 - [Clang,ARM] Add release note for D127197.
Author: Simon Tatham Date: 2022-06-10T15:19:33+01:00 New Revision: 9073b53e5d7f0bdc603a5c816300ac27644bc6a8 URL: https://github.com/llvm/llvm-project/commit/9073b53e5d7f0bdc603a5c816300ac27644bc6a8 DIFF: https://github.com/llvm/llvm-project/commit/9073b53e5d7f0bdc603a5c816300ac27644bc6a8.diff LOG: [Clang,ARM] Add release note for D127197. I should have put that in the original commit, but @lenary only just reminded me that it needed to be there. Added: Modified: clang/docs/ReleaseNotes.rst Removed: diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 1ead55633a09..5905fa2e917e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -458,6 +458,10 @@ ABI Changes in Clang such packing. Clang now matches the gcc behavior (except on Darwin and PS4). You can switch back to the old ABI behavior with the flag: ``-fclang-abi-compat=14.0``. +- When compiling C for ARM or AArch64, a zero-length bitfield in a ``struct`` + (e.g. ``int : 0``) no longer prevents the structure from being considered a + homogeneous floating-point or vector aggregate. The new behavior agrees with + the AAPCS specification, and matches the similar bug fix in GCC 12.1. OpenMP Support in Clang --- ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 45a9945 - [ARM, MVE] Add ACLE intrinsics for the vminv/vmaxv family.
Author: Simon Tatham Date: 2020-03-20T15:42:33Z New Revision: 45a9945b9ea95bd065d3c4e08d9089a309b24a23 URL: https://github.com/llvm/llvm-project/commit/45a9945b9ea95bd065d3c4e08d9089a309b24a23 DIFF: https://github.com/llvm/llvm-project/commit/45a9945b9ea95bd065d3c4e08d9089a309b24a23.diff LOG: [ARM,MVE] Add ACLE intrinsics for the vminv/vmaxv family. Summary: I've implemented these as target-specific IR intrinsics, because they're not //quite// enough like @llvm.experimental.vector.reduce.min (which doesn't take the extra scalar parameter). Also this keeps the predicated and unpredicated versions looking similar, and the floating-point minnm/maxnm versions fold into the same schema. We had a couple of min/max reductions already implemented, from the initial pathfinding exercise in D67158. Those were done by having separate IR intrinsic names for the signed and unsigned integer versions; as part of this commit, I've changed them to use a flag parameter indicating signedness, which is how we ended up deciding that the rest of the MVE intrinsics family ought to work. So now hopefully the ewhole lot is consistent. In the new llc test, the output code from the `v8f16` test functions looks quite unpleasant, but most of it is PCS lowering (you can't pass a `half` directly in or out of a function). In other circumstances, where you do something else with your `half` in the same function, it doesn't look nearly as nasty. Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard Reviewed By: MarkMurrayARM Subscribers: kristof.beyls, hiraditya, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D76490 Added: Modified: clang/include/clang/Basic/arm_mve.td clang/test/CodeGen/arm-mve-intrinsics/vminvq.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/vminvq.ll Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 45e45899de5f..d32f7fd92f2c 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -536,11 +536,42 @@ let params = T.Float in { (IRInt<"vmaxnma_predicated", [Vector,Predicate]> $a, $b, $pred)>; } +multiclass Reduction basetypes, + bit needSign = 0, + dag postCG = (seq (id $ret)), + dag accArg = (args Accumulator:$prev), + dag preCG = (seq)> { + defvar intArgsBase = (? $prev, $vec); + defvar intArgsUnpred = !con(intArgsBase, + !if(needSign, (? (unsignedflag Scalar)), (?))); + defvar intArgsPred = !con(intArgsUnpred, (? $pred)); + defvar intUnpred = !setop(intArgsUnpred, IRInt); + defvar intPred = !setop(intArgsPred, IRInt< +basename#"_predicated", !listconcat(basetypes, [Predicate])>); + + def "": Intrinsic< +Accumulator, !con(accArg, (args Vector:$vec)), +!con(preCG, (seq intUnpred:$ret), postCG)>; + def _p: Intrinsic< +Accumulator, !con(accArg, (args Vector:$vec, Predicate:$pred)), +!con(preCG, (seq intPred:$ret), postCG)>; +} + let params = T.Int in { -def vminvq: Intrinsic $prev, $vec))>; -def vmaxvq: Intrinsic $prev, $vec))>; +defm vminvq: Reduction; +defm vmaxvq: Reduction; +} + +let params = T.Signed in { +defm vminavq: Reduction; +defm vmaxavq: Reduction; +} + +let params = T.Float in { +defm vminnmvq: Reduction; +defm vmaxnmvq: Reduction; +defm vminnmavq: Reduction; +defm vmaxnmavq: Reduction; } foreach half = [ "b", "t" ] in { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c index 1cf4d0ee198e..0d484bf98f7a 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c @@ -1,97 +1,853 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa | FileCheck %s
[clang] 1adfa4c - [ARM, MVE] Add ACLE intrinsics for the vaddv/vaddlv family.
Author: Simon Tatham Date: 2020-03-20T15:42:33Z New Revision: 1adfa4c99169733dedb67b4f7ab03d2fbb196162 URL: https://github.com/llvm/llvm-project/commit/1adfa4c99169733dedb67b4f7ab03d2fbb196162 DIFF: https://github.com/llvm/llvm-project/commit/1adfa4c99169733dedb67b4f7ab03d2fbb196162.diff LOG: [ARM,MVE] Add ACLE intrinsics for the vaddv/vaddlv family. Summary: I've implemented them as target-specific IR intrinsics rather than using `@llvm.experimental.vector.reduce.add`, on the grounds that the 'experimental' intrinsic doesn't currently have much code generation benefit, and my replacements encapsulate the sign- or zero-extension so that you don't expose the illegal MVE vector type (`<4 x i64>`) in IR. The machine instructions come in two versions: with and without an input accumulator. My new IR intrinsics, like the 'experimental' one, don't take an accumulator parameter: we represent that by just adding on the input value using an ordinary i32 or i64 add. So if you write the `vaddvaq` C-language intrinsic with an input accumulator of zero, it can be optimised to VADDV, and conversely, if you write something like `x += vaddvq(y)` then that can be combined into VADDVA. Most of this is achieved in isel lowering, by converting these IR intrinsics into the existing `ARMISD::VADDV` family of custom SDNode types. For the difficult case (64-bit accumulators), isel lowering already implements the optimization of folding an addition into a VADDLV to make a VADDLVA; so once we've made a VADDLV, our job is already done, except that I had to introduce a parallel set of ARMISD nodes for the //predicated// forms of VADDLV. For the simpler VADDV, we handle the predicated form by just leaving the IR intrinsic alone and matching it in an ordinary dag pattern. Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, danielkiss, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D76491 Added: clang/test/CodeGen/arm-mve-intrinsics/vaddv.c llvm/test/CodeGen/Thumb2/mve-intrinsics/vaddv.ll Modified: clang/include/clang/Basic/arm_mve.td llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/lib/Target/ARM/ARMISelLowering.h llvm/lib/Target/ARM/ARMInstrMVE.td Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index d32f7fd92f2c..25daae2a0a25 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -1445,6 +1445,33 @@ multiclass MVEBinaryVectorHoriz64R { "vrmlldavha">; } +multiclass VADDV { + defvar accArg = !if(acc, (args Scalar:$acc), (args)); + defvar predArg = !if(pred, (args Predicate:$pred), (args)); + defvar intrinsic = !if(pred, + IRInt, + IRInt); + defvar intCG = !con((intrinsic $v, (unsignedflag Scalar)), + !if(pred, (? $pred), (?))); + defvar accCG = !if(acc, (add intCG, $acc), intCG); + + def "": Intrinsic; +} + +let params = T.Int in { +defm vaddvq: VADDV<0, 0, "addv", Scalar32>; +defm vaddvaq : VADDV<1, 0, "addv", Scalar32>; +defm vaddvq_p : VADDV<0, 1, "addv", Scalar32>; +defm vaddvaq_p : VADDV<1, 1, "addv", Scalar32>; +} + +let params = [s32, u32] in { +defm vaddlvq: VADDV<0, 0, "addlv", Scalar64>; +defm vaddlvaq : VADDV<1, 0, "addlv", Scalar64>; +defm vaddlvq_p : VADDV<0, 1, "addlv", Scalar64>; +defm vaddlvaq_p : VADDV<1, 1, "addlv", Scalar64>; +} + let params = T.Int in { def vabavq : Intrinsic (unsignedflag Scalar), $a, $b, $c)>; diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vaddv.c b/clang/test/CodeGen/arm-mve-intrinsics/vaddv.c new file mode 100644 index ..6bacc2775881 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vaddv.c @@ -0,0 +1,470 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + // RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + // RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vaddvq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> [[A:%.*]], i32 0) +// CHECK-NEXT:ret i32 [[TMP0]] +// +int32_t test_vaddvq_s8(int8x16_t a) { +#ifdef POLYMORPHIC + return vaddvq(a); +#else /* POLYMORPHIC */ + return vaddvq_s8(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vaddvq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call i32 @llvm.arm.mve.addv.v8i16(<8 x i16> [[A:%.*]], i32 0) +// CHECK-NEXT:ret i32 [[TMP0]] +// +int32_t test_vaddvq_s16(in
[clang] f282b6a - [ReleaseNotes, ARM] MVE intrinsics are all implemented!
Author: Simon Tatham Date: 2020-03-24T11:42:25Z New Revision: f282b6ab23a0f6ede0f1c8b6ccb5ad3c17a5ed2f URL: https://github.com/llvm/llvm-project/commit/f282b6ab23a0f6ede0f1c8b6ccb5ad3c17a5ed2f DIFF: https://github.com/llvm/llvm-project/commit/f282b6ab23a0f6ede0f1c8b6ccb5ad3c17a5ed2f.diff LOG: [ReleaseNotes,ARM] MVE intrinsics are all implemented! Summary: The next release of LLVM will support the full ACLE spec for MVE intrinsics, so it's worth saying so in the release notes. Reviewers: kristof.beyls Reviewed By: kristof.beyls Subscribers: cfe-commits, hans, dmgreen, llvm-commits Tags: #llvm, #clang Differential Revision: https://reviews.llvm.org/D76513 Added: Modified: clang/docs/ReleaseNotes.rst llvm/docs/ReleaseNotes.rst Removed: diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 9f20c271b50e..ad13fb1b3e95 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -57,6 +57,10 @@ Improvements to Clang's diagnostics Non-comprehensive list of changes in this release - +- For the ARM target, C-language intrinsics are now provided for the full Arm + v8.1-M MVE instruction set. supports the complete API defined + in the Arm C Language Extensions. + New Compiler Flags -- diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index bbfcc6076c01..4f6e759bbeb3 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -72,6 +72,9 @@ Changes to the ARM Backend During this release ... +* Implemented C-language intrinsics for the full Arm v8.1-M MVE instruction + set. now supports the complete API defined in the Arm C + Language Extensions. Changes to the MIPS Target -- ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 8f1651c - [ARM, MVE] Add missing tests for vqdmlash intrinsics.
Author: Simon Tatham Date: 2020-03-25T09:46:16Z New Revision: 8f1651ccead149fbd2e6fe692fb8a7f787a222bd URL: https://github.com/llvm/llvm-project/commit/8f1651ccead149fbd2e6fe692fb8a7f787a222bd DIFF: https://github.com/llvm/llvm-project/commit/8f1651ccead149fbd2e6fe692fb8a7f787a222bd.diff LOG: [ARM,MVE] Add missing tests for vqdmlash intrinsics. Summary: These were accidentally left out of D76123. I added tests for the other three instructions in this small cross-product family (vqdmlah, vqrdmlah, vqrdmlash) but missed this one. Reviewers: miyuki Reviewed By: miyuki Subscribers: kristof.beyls, dmgreen, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D76714 Added: Modified: clang/test/CodeGen/arm-mve-intrinsics/ternary.c llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll Removed: diff --git a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c index 90e258715d26..77eb8d41fe58 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c @@ -357,6 +357,47 @@ int32x4_t test_vqdmlahq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { #endif /* POLYMORPHIC */ } +// CHECK-LABEL: @test_vqdmlashq_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = zext i8 [[ADD:%.*]] to i32 +// CHECK-NEXT:[[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlash.v16i8(<16 x i8> [[M1:%.*]], <16 x i8> [[M2:%.*]], i32 [[TMP0]]) +// CHECK-NEXT:ret <16 x i8> [[TMP1]] +// +int8x16_t test_vqdmlashq_n_s8(int8x16_t m1, int8x16_t m2, int8_t add) { +#ifdef POLYMORPHIC + return vqdmlashq(m1, m2, add); +#else /* POLYMORPHIC */ + return vqdmlashq_n_s8(m1, m2, add); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqdmlashq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[ADD:%.*]] to i32 +// CHECK-NEXT:[[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlash.v8i16(<8 x i16> [[M1:%.*]], <8 x i16> [[M2:%.*]], i32 [[TMP0]]) +// CHECK-NEXT:ret <8 x i16> [[TMP1]] +// +int16x8_t test_vqdmlashq_n_s16(int16x8_t m1, int16x8_t m2, int16_t add) { +#ifdef POLYMORPHIC + return vqdmlashq(m1, m2, add); +#else /* POLYMORPHIC */ + return vqdmlashq_n_s16(m1, m2, add); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqdmlashq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlash.v4i32(<4 x i32> [[M1:%.*]], <4 x i32> [[M2:%.*]], i32 [[ADD:%.*]]) +// CHECK-NEXT:ret <4 x i32> [[TMP0]] +// +int32x4_t test_vqdmlashq_n_s32(int32x4_t m1, int32x4_t m2, int32_t add) { +#ifdef POLYMORPHIC + return vqdmlashq(m1, m2, add); +#else /* POLYMORPHIC */ + return vqdmlashq_n_s32(m1, m2, add); +#endif /* POLYMORPHIC */ +} + // CHECK-LABEL: @test_vqrdmlahq_n_s8( // CHECK-NEXT: entry: // CHECK-NEXT:[[TMP0:%.*]] = zext i8 [[C:%.*]] to i32 @@ -810,6 +851,53 @@ int32x4_t test_vqdmlahq_m_n_s32(int32x4_t a, int32x4_t b, int32_t c, mve_pred16_ #endif /* POLYMORPHIC */ } +// CHECK-LABEL: @test_vqdmlashq_m_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = zext i8 [[ADD:%.*]] to i32 +// CHECK-NEXT:[[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT:[[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]]) +// CHECK-NEXT:[[TMP3:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlash.predicated.v16i8.v16i1(<16 x i8> [[M1:%.*]], <16 x i8> [[M2:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]]) +// CHECK-NEXT:ret <16 x i8> [[TMP3]] +// +int8x16_t test_vqdmlashq_m_n_s8(int8x16_t m1, int8x16_t m2, int8_t add, mve_pred16_t p) { +#ifdef POLYMORPHIC + return vqdmlashq_m(m1, m2, add, p); +#else /* POLYMORPHIC */ + return vqdmlashq_m_n_s8(m1, m2, add, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqdmlashq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[ADD:%.*]] to i32 +// CHECK-NEXT:[[TMP1:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT:[[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]]) +// CHECK-NEXT:[[TMP3:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlash.predicated.v8i16.v8i1(<8 x i16> [[M1:%.*]], <8 x i16> [[M2:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]]) +// CHECK-NEXT:ret <8 x i16> [[TMP3]] +// +int16x8_t test_vqdmlashq_m_n_s16(int16x8_t m1, int16x8_t m2, int16_t add, mve_pred16_t p) { +#ifdef POLYMORPHIC + return vqdmlashq_m(m1, m2, add, p); +#else /* POLYMORPHIC */ + return vqdmlashq_m_n_s16(m1, m2, add, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqdmlashq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT:[[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT:[[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlash.predicated.v4i32.v4i1(<4 x i32> [[M1:%.*]], <4 x i32> [[M2:%.*]], i32 [[ADD:%.*]], <4 x
[clang] 8c26f42 - [clang, ARM, MVE] Remove redundant #includes in test file.
Author: Simon Tatham Date: 2020-02-27T09:39:35Z New Revision: 8c26f42fe90e3f8612d2f57a3c9c5e7fcff5e91e URL: https://github.com/llvm/llvm-project/commit/8c26f42fe90e3f8612d2f57a3c9c5e7fcff5e91e DIFF: https://github.com/llvm/llvm-project/commit/8c26f42fe90e3f8612d2f57a3c9c5e7fcff5e91e.diff LOG: [clang,ARM,MVE] Remove redundant #includes in test file. I made that file by pasting together several pieces, and forgot to take out the #include from the tops of the later ones, so the test was pointlessly including the same header five times. NFC. Added: Modified: clang/test/CodeGen/arm-mve-intrinsics/absneg.c Removed: diff --git a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c index 94339c834809..4f888093d8b8 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c @@ -527,7 +527,6 @@ int32x4_t test_vqnegq_s32(int32x4_t a) return vqnegq_s32(a); #endif /* POLYMORPHIC */ } -#include // CHECK-LABEL: @test_vnegq_m_f16( // CHECK-NEXT: entry: @@ -689,8 +688,6 @@ int32x4_t test_vnegq_x_s32(int32x4_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -#include - // CHECK-LABEL: @test_vabsq_m_f16( // CHECK-NEXT: entry: // CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 @@ -851,8 +848,6 @@ int32x4_t test_vabsq_x_s32(int32x4_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -#include - // CHECK-LABEL: @test_vqnegq_m_s8( // CHECK-NEXT: entry: // CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 @@ -901,8 +896,6 @@ int32x4_t test_vqnegq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) #endif /* POLYMORPHIC */ } -#include - // CHECK-LABEL: @test_vqabsq_m_s8( // CHECK-NEXT: entry: // CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] a41ecf0 - [ARM, MVE] Add ACLE intrinsics for VQMOV[U]N family.
Author: Simon Tatham Date: 2020-03-02T10:33:30Z New Revision: a41ecf0eb05190c8597f98b8d41d7a6e678aec0b URL: https://github.com/llvm/llvm-project/commit/a41ecf0eb05190c8597f98b8d41d7a6e678aec0b DIFF: https://github.com/llvm/llvm-project/commit/a41ecf0eb05190c8597f98b8d41d7a6e678aec0b.diff LOG: [ARM,MVE] Add ACLE intrinsics for VQMOV[U]N family. Summary: These instructions work like VMOVN (narrowing a vector of wide values to half size, and overwriting every other lane of an output register with the result), except that the narrowing conversion is saturating. They come in three signedness flavours: signed to signed, unsigned to unsigned, and signed to unsigned. All are represented in IR by a target-specific intrinsic that takes two separate 'unsigned' flags. Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D75252 Added: clang/test/CodeGen/arm-mve-intrinsics/vqmovn.c llvm/test/CodeGen/Thumb2/mve-intrinsics/vqmovn.ll Modified: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index efc6be1158b8..c64a75ffeb8e 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -514,6 +514,33 @@ defm vmovntq: vmovn<1, (zip (vreinterpret $inactive, Vector), $a)>; defm vmovnbq: vmovn<0, (zip $a, (vreinterpret (vrev $inactive, (bitsize Scalar)), Vector))>; +multiclass vqmovn { + defvar RetVector = VecOf; + + let params = [s16, u16, s32, u32] in { +def : Intrinsic< + RetVector, (args RetVector:$inactive, Vector:$a), + (IRInt<"vqmovn", [RetVector, Vector]> + $inactive, $a, (unsignedflag RetScalar), (unsignedflag Scalar), top)>, + NameOverride; +def: Intrinsic< + RetVector, (args RetVector:$inactive, Vector:$a, Predicate:$pred), + (IRInt<"vqmovn_predicated", [RetVector, Vector, Predicate]> + $inactive, $a, (unsignedflag RetScalar), (unsignedflag Scalar), + top, $pred)>, + NameOverride; + } +} + +let params = [s16, s32, u16, u32] in { + defm vqmovntq: vqmovn<1, HalfScalar>; + defm vqmovnbq: vqmovn<0, HalfScalar>; +} +let params = [s16, s32] in { + defm vqmovuntq: vqmovn<1, UHalfScalar>; + defm vqmovunbq: vqmovn<0, UHalfScalar>; +} + multiclass vrnd { let params = T.Float in { def "": Intrinsic; diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index dbcad78cce75..daf73871f052 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -323,8 +323,10 @@ def SVector: VecOf; // UHalfVector is a vector of half-sized _unsigned integers_. def DblVector: VecOf>; def DblPredicate: PredOf>; -def HalfVector: VecOf>; -def UHalfVector: VecOf>>; +def HalfScalar: HalfSize; +def HalfVector: VecOf; +def UHalfScalar: Unsigned>; +def UHalfVector: VecOf; // Expands to the 32-bit integer of the same signedness as Scalar. def Scalar32: CopyKind; diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vqmovn.c b/clang/test/CodeGen/arm-mve-intrinsics/vqmovn.c new file mode 100644 index ..24c3fd550bf4 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vqmovn.c @@ -0,0 +1,366 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -DPOLYMORPHIC -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vqmovnbq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, i32 0, i32 0) +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +int8x16_t test_vqmovnbq_s16(int8x16_t a, int16x8_t b) +{ +#ifdef POLYMORPHIC +return vqmovnbq(a, b); +#else /* POLYMORPHIC */ +return vqmovnbq_s16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqmovnbq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, i32 0, i32 0) +// CHECK-NEXT:ret <8 x i16> [[TMP0]] +// +int16x8_t test_vqmovnbq_s32(int16x8_t a, int32x4_t b) +{ +#ifdef POLYMORPHIC +return vqmovnbq(a, b); +#else /* POLYMORPHIC */ +return
[clang] 1a8cbfa - [ARM, MVE] Add ACLE intrinsics for VCVT[ANPM] family.
Author: Simon Tatham Date: 2020-03-02T10:33:30Z New Revision: 1a8cbfa514ff83ac62c20deec0d9ea2c6606bbdf URL: https://github.com/llvm/llvm-project/commit/1a8cbfa514ff83ac62c20deec0d9ea2c6606bbdf DIFF: https://github.com/llvm/llvm-project/commit/1a8cbfa514ff83ac62c20deec0d9ea2c6606bbdf.diff LOG: [ARM,MVE] Add ACLE intrinsics for VCVT[ANPM] family. Summary: These instructions convert a vector of floats to a vector of integers of the same size, with assorted non-default rounding modes. Implemented in IR as target-specific intrinsics, because as far as I can see there are no matches for that functionality in the standard IR intrinsics list. Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D75255 Added: clang/test/CodeGen/arm-mve-intrinsics/vcvt_anpm.c llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt_anpm.ll Modified: clang/include/clang/Basic/arm_mve.td llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index dfdb101d587f..c1cc10b09dc6 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -482,11 +482,25 @@ multiclass float_int_conversions, NameOverride<"vcvtq_" # IScalar>; + + foreach suffix = ["a","n","p","m"] in +def : Intrinsic +(unsignedflag IScalar), $a)>, + NameOverride<"vcvt"#suffix#"q_" # IScalar>; } defm vcvtq: IntrinsicMX $a, (unsignedflag IScalar), $pred, $inactive), 1, "_" # IScalar, PNT_2Type, PNT_None>; + +foreach suffix = ["a","n","p","m"] in { + defm "vcvt"#suffix#"q" : IntrinsicMX< + IVector, (args FVector:$a, Predicate:$pred), + (IRInt<"vcvt"#suffix#"_predicated", [IVector, FVector, Predicate]> + (unsignedflag IScalar), $inactive, $a, $pred), + 1, "_" # IScalar, PNT_2Type, PNT_None>; +} } } diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt_anpm.c b/clang/test/CodeGen/arm-mve-intrinsics/vcvt_anpm.c new file mode 100644 index ..e5dbd4c8f68b --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt_anpm.c @@ -0,0 +1,614 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -DPOLYMORPHIC -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vcvtaq_s16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvta.v8i16.v8f16(i32 0, <8 x half> [[A:%.*]]) +// CHECK-NEXT:ret <8 x i16> [[TMP0]] +// +int16x8_t test_vcvtaq_s16_f16(float16x8_t a) +{ +return vcvtaq_s16_f16(a); +} + +// CHECK-LABEL: @test_vcvtaq_s32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcvta.v4i32.v4f32(i32 0, <4 x float> [[A:%.*]]) +// CHECK-NEXT:ret <4 x i32> [[TMP0]] +// +int32x4_t test_vcvtaq_s32_f32(float32x4_t a) +{ +return vcvtaq_s32_f32(a); +} + +// CHECK-LABEL: @test_vcvtaq_u16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvta.v8i16.v8f16(i32 1, <8 x half> [[A:%.*]]) +// CHECK-NEXT:ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vcvtaq_u16_f16(float16x8_t a) +{ +return vcvtaq_u16_f16(a); +} + +// CHECK-LABEL: @test_vcvtaq_u32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcvta.v4i32.v4f32(i32 1, <4 x float> [[A:%.*]]) +// CHECK-NEXT:ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vcvtaq_u32_f32(float32x4_t a) +{ +return vcvtaq_u32_f32(a); +} + +// CHECK-LABEL: @test_vcvtmq_s16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvtm.v8i16.v8f16(i32 0, <8 x half> [[A:%.*]]) +// CHECK-NEXT:ret <8 x i16> [[TMP0]] +// +int16x8_t test_vcvtmq_s16_f16(float16x8_t a) +{ +return vcvtmq_s16_f16(a); +} + +// CHECK-LABEL: @test_vcvtmq_s32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vcvtm.v4i32.v4f32(i32 0, <4 x float> [[A:%.*]]) +// CHECK-NEXT:ret <4 x i32> [[TMP0]] +// +int32x4_t test_vcvtmq_s32_f32(float32x4_t a) +{ +return vcvtmq_s32_f32(a); +} + +// CHECK-LABEL: @test_vcvtmq_u16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vcvtm.v
[clang] b08d2dd - [ARM, MVE] Add ACLE intrinsics for VCVT.F32.F16 family.
Author: Simon Tatham Date: 2020-03-02T10:33:30Z New Revision: b08d2ddd69b4a2209930b31fe456b4d7c1ce148f URL: https://github.com/llvm/llvm-project/commit/b08d2ddd69b4a2209930b31fe456b4d7c1ce148f DIFF: https://github.com/llvm/llvm-project/commit/b08d2ddd69b4a2209930b31fe456b4d7c1ce148f.diff LOG: [ARM,MVE] Add ACLE intrinsics for VCVT.F32.F16 family. Summary: These instructions make a vector of `<4 x float>` by widening every other lane of a vector of `<8 x half>`. I wondered about representing these using standard IR, along the lines of a shufflevector to extract elements of the input into a `<4 x half>` followed by an `fpext` to turn that into `<4 x float>`. But it looks as if that would take a lot of work in isel lowering to make it match any pattern I could sensibly write in Tablegen, and also I haven't been able to think of any other case where that pattern might be generated in IR, so there wouldn't be any extra code generation win from doing it that way. Therefore, I've just used another target-specific intrinsic. We can always change it to the other way later if anyone thinks of a good reason. (In order to put the intrinsic definition near similar things in `IntrinsicsARM.td`, I've also lifted the definition of the `MVEMXPredicated` multiclass higher up the file, without changing it.) Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard Reviewed By: miyuki Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D75254 Added: Modified: clang/include/clang/Basic/arm_mve.td clang/test/CodeGen/arm-mve-intrinsics/vcvt.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt.ll Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index c64a75ffeb8e..dfdb101d587f 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -453,6 +453,15 @@ foreach half = [ "b", "t" ] in { VecOf, (args VecOf:$inactive, Vector:$a, PredOf:$pred), (IRInt<"vcvt_narrow_predicated"> $inactive, $a, halfconst, $pred)>; } // params = [f32], pnt = PNT_None + + let params = [f16], pnt = PNT_None in { +def vcvt#half#q_f32: Intrinsic, (args Vector:$a), + (IRInt<"vcvt_widen"> $a, halfconst)>; +defm vcvt#half#q: IntrinsicMX< + VecOf, (args Vector:$a, PredOf:$pred), + (IRInt<"vcvt_widen_predicated"> $inactive, $a, halfconst, $pred), + 1, "_f32">; + } // params = [f16], pnt = PNT_None } // loop over half = "b", "t" multiclass float_int_conversions { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c index 0391b77e365f..d03ac31a8024 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c @@ -697,3 +697,71 @@ uint32x4_t test_vcvtq_x_n_u32_f32(float32x4_t a, mve_pred16_t p) { return vcvtq_x_n_u32_f32(a, 32, p); } + +// CHECK-LABEL: @test_vcvtbq_f32_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> [[A:%.*]], i32 0) +// CHECK-NEXT:ret <4 x float> [[TMP0]] +// +float32x4_t test_vcvtbq_f32_f16(float16x8_t a) +{ +return vcvtbq_f32_f16(a); +} + +// CHECK-LABEL: @test_vcvttq_f32_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen(<8 x half> [[A:%.*]], i32 1) +// CHECK-NEXT:ret <4 x float> [[TMP0]] +// +float32x4_t test_vcvttq_f32_f16(float16x8_t a) +{ +return vcvttq_f32_f16(a); +} + +// CHECK-LABEL: @test_vcvtbq_m_f32_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT:[[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT:[[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 0, <4 x i1> [[TMP1]]) +// CHECK-NEXT:ret <4 x float> [[TMP2]] +// +float32x4_t test_vcvtbq_m_f32_f16(float32x4_t inactive, float16x8_t a, mve_pred16_t p) +{ +return vcvtbq_m_f32_f16(inactive, a, p); +} + +// CHECK-LABEL: @test_vcvttq_m_f32_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT:[[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT:[[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen.predicated(<4 x float> [[INACTIVE:%.*]], <8 x half> [[A:%.*]], i32 1, <4 x i1> [[TMP1]]) +// CHECK-NEXT:ret <4 x float> [[TMP2]] +// +float32x4_t test_vcvttq_m_f32_f16(float32x4_t inactive, float16x8_t a, mve_pred16_t p) +{ +return vcvttq_m_f32_f16(inactive, a, p); +} + +// CHECK-LABEL: @test_vcvtbq_x_f32_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]]
[clang] 810127f - [ARM,MVE] Add the `vsbciq` intrinsics.
Author: Simon Tatham Date: 2020-03-04T08:49:27Z New Revision: 810127f6ab5d5d7e7d6b8c3ae0b96f2027437ca8 URL: https://github.com/llvm/llvm-project/commit/810127f6ab5d5d7e7d6b8c3ae0b96f2027437ca8 DIFF: https://github.com/llvm/llvm-project/commit/810127f6ab5d5d7e7d6b8c3ae0b96f2027437ca8.diff LOG: [ARM,MVE] Add the `vsbciq` intrinsics. Summary: These are exactly parallel to the existing `vadciq` intrinsics, which we implemented last year as part of the original MVE intrinsics framework setup. Just like VADC/VADCI, the MVE VSBC/VSBCI instructions deliver two outputs, both of which the intrinsic exposes: a modified vector register and a carry flag. So they have to be instruction-selected in C++ rather than Tablegen. However, in this case, that's trivial: the same C++ isel routine we already have for VADC works unchanged, and all we have to do is to pass it a different instruction id. Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard Reviewed By: miyuki Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D75444 Added: Modified: clang/include/clang/Basic/arm_mve.td clang/test/CodeGen/arm-mve-intrinsics/vadc.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/test/CodeGen/Thumb2/mve-intrinsics/vadc.ll Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index c1cc10b09dc6..fd04dfde95c2 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -1139,27 +1139,31 @@ defm sqrshr: ScalarSaturatingShiftReg; def lsll: LongScalarShift $lo, $hi, $sh)>; def asrl: LongScalarShift $lo, $hi, $sh)>; +multiclass vadcsbc { + def q: Intrinsic:$carry), + (seq (IRInt $a, $b, (shl (load $carry), 29)):$pair, + (store (and 1, (lshr (xval $pair, 1), 29)), $carry), + (xval $pair, 0))>; + def iq: Intrinsic:$carry), + (seq (IRInt $a, $b, 0):$pair, + (store (and 1, (lshr (xval $pair, 1), 29)), $carry), + (xval $pair, 0))>; + def q_m: Intrinsic:$carry, Predicate:$pred), + (seq (IRInt $inactive, $a, $b, + (shl (load $carry), 29), $pred):$pair, + (store (and 1, (lshr (xval $pair, 1), 29)), $carry), + (xval $pair, 0))>; + def iq_m: Intrinsic:$carry, Predicate:$pred), + (seq (IRInt $inactive, $a, $b, + 0, $pred):$pair, + (store (and 1, (lshr (xval $pair, 1), 29)), $carry), + (xval $pair, 0))>; +} let params = T.Int32 in { -def vadcq: Intrinsic:$carry), -(seq (IRInt<"vadc", [Vector]> $a, $b, (shl (load $carry), 29)):$pair, - (store (and 1, (lshr (xval $pair, 1), 29)), $carry), - (xval $pair, 0))>; -def vadciq: Intrinsic:$carry), -(seq (IRInt<"vadc", [Vector]> $a, $b, 0):$pair, - (store (and 1, (lshr (xval $pair, 1), 29)), $carry), - (xval $pair, 0))>; -def vadcq_m: Intrinsic:$carry, Predicate:$pred), -(seq (IRInt<"vadc_predicated", [Vector, Predicate]> $inactive, $a, $b, - (shl (load $carry), 29), $pred):$pair, - (store (and 1, (lshr (xval $pair, 1), 29)), $carry), - (xval $pair, 0))>; -def vadciq_m: Intrinsic:$carry, Predicate:$pred), -(seq (IRInt<"vadc_predicated", [Vector, Predicate]> $inactive, $a, $b, - 0, $pred):$pair, - (store (and 1, (lshr (xval $pair, 1), 29)), $carry), - (xval $pair, 0))>; + defm vadc: vadcsbc; + defm vsbc: vadcsbc; } multiclass VectorComplexAddPred { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vadc.c b/clang/test/CodeGen/arm-mve-intrinsics/vadc.c index 94fa1d1b00f2..f5e6c7d33983 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vadc.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vadc.c @@ -87,3 +87,163 @@ int32x4_t test_vadcq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, unsigne return vadcq_m_s32(inactive, a, b, carry, p); #endif /* POLYMORPHIC */ } + +// CHECK-LABEL: @test_vsbciq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0) +// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1 +// CHECK-NEXT:[[TMP2:%.*]] = lshr i32 [[TMP1]], 29 +// CHECK-NEXT:[[TMP3:%.*]] = and i32 1, [[TMP2]] +// CHECK-NEXT:store i32 [[TMP3]], i32* [[CARRY_OUT:%.*]], align 4 +// CHECK-NEXT:[[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0 +// CHECK-NEXT:ret <4 x i32> [[TMP4]] +// +int32x4_t test_vsbciq_s32(int32x4_t a, int32x4_t b, unsigned *carry_out) { +#ifdef POLYMORPHIC + return vsbciq(a, b, carry_out); +#else /* POLYMORPHIC */ + return vsbciq_s32(a, b, carry_out); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vsbciq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]]
[clang] 068b2f3 - [ARM,MVE] Add the `vshlcq` intrinsics.
Author: Simon Tatham Date: 2020-03-04T08:49:27Z New Revision: 068b2f313c7d27d9f6445df12d4d45d2d8c00898 URL: https://github.com/llvm/llvm-project/commit/068b2f313c7d27d9f6445df12d4d45d2d8c00898 DIFF: https://github.com/llvm/llvm-project/commit/068b2f313c7d27d9f6445df12d4d45d2d8c00898.diff LOG: [ARM,MVE] Add the `vshlcq` intrinsics. Summary: The VSHLC instruction performs a left shift of a whole vector register by an immediate shift count up to 32, shifting in new bits at the low end from a GPR and delivering the shifted-out bits from the high end back into the same GPR. Since the instruction produces two outputs (the shifted vector register and the output GPR of shifted-out bits), it has to be instruction-selected in C++ rather than Tablegen. Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard Reviewed By: miyuki Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D75445 Added: clang/test/CodeGen/arm-mve-intrinsics/vshlc.c llvm/test/CodeGen/Thumb2/mve-intrinsics/vshlc.ll Modified: clang/include/clang/Basic/arm_mve.td llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index fd04dfde95c2..d9a2035e8a0e 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -1166,6 +1166,22 @@ let params = T.Int32 in { defm vsbc: vadcsbc; } +let params = T.Int in { + def vshlcq: Intrinsic< +Vector, (args Vector:$v, Ptr:$ps, imm_1to32:$imm), +(seq (load $ps):$s, + (IRInt<"vshlc", [Vector]> $v, $s, $imm):$pair, + (store (xval $pair, 0), $ps), + (xval $pair, 1))>; + def vshlcq_m: Intrinsic< +Vector, (args Vector:$v, Ptr:$ps, imm_1to32:$imm, Predicate:$pred), +(seq (load $ps):$s, + (IRInt<"vshlc_predicated", [Vector, Predicate]> + $v, $s, $imm, $pred):$pair, + (store (xval $pair, 0), $ps), + (xval $pair, 1))>; +} + multiclass VectorComplexAddPred { def "" : Intrinsic not_halving, angle, $a, $b)>; diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vshlc.c b/clang/test/CodeGen/arm-mve-intrinsics/vshlc.c new file mode 100644 index ..1a53a90f26fa --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vshlc.c @@ -0,0 +1,221 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vshlcq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4 +// CHECK-NEXT:[[TMP1:%.*]] = call { i32, <16 x i8> } @llvm.arm.mve.vshlc.v16i8(<16 x i8> [[A:%.*]], i32 [[TMP0]], i32 18) +// CHECK-NEXT:[[TMP2:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 0 +// CHECK-NEXT:store i32 [[TMP2]], i32* [[B]], align 4 +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 1 +// CHECK-NEXT:ret <16 x i8> [[TMP3]] +// +int8x16_t test_vshlcq_s8(int8x16_t a, uint32_t *b) { +#ifdef POLYMORPHIC + return vshlcq(a, b, 18); +#else /* POLYMORPHIC */ + return vshlcq_s8(a, b, 18); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlcq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4 +// CHECK-NEXT:[[TMP1:%.*]] = call { i32, <8 x i16> } @llvm.arm.mve.vshlc.v8i16(<8 x i16> [[A:%.*]], i32 [[TMP0]], i32 16) +// CHECK-NEXT:[[TMP2:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 0 +// CHECK-NEXT:store i32 [[TMP2]], i32* [[B]], align 4 +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 1 +// CHECK-NEXT:ret <8 x i16> [[TMP3]] +// +int16x8_t test_vshlcq_s16(int16x8_t a, uint32_t *b) { +#ifdef POLYMORPHIC + return vshlcq(a, b, 16); +#else /* POLYMORPHIC */ + return vshlcq_s16(a, b, 16); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlcq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4 +// CHECK-NEXT:[[TMP1:%.*]] = call { i32, <4 x i32> } @llvm.arm.mve.vshlc.v4i32(<4 x i32> [[A:%.*]], i32 [[TMP0]], i32 4) +// CHECK-NEXT:[[TMP2:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 0 +// CHECK-NEXT:store i32 [[TMP2]], i32* [[B]], align 4 +// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 1 +// CHECK-NEXT:ret <4 x i32> [[TMP3]] +// +int32x4_t test_vshlcq_s
[clang] 26bc7cb - [clang, MveEmitter] Fix sign/zero extension in range limits.
Author: Simon Tatham Date: 2019-11-06T09:01:42Z New Revision: 26bc7cb05edd6bea4b9a1593baf0fbe9e45f54e4 URL: https://github.com/llvm/llvm-project/commit/26bc7cb05edd6bea4b9a1593baf0fbe9e45f54e4 DIFF: https://github.com/llvm/llvm-project/commit/26bc7cb05edd6bea4b9a1593baf0fbe9e45f54e4.diff LOG: [clang,MveEmitter] Fix sign/zero extension in range limits. In the code that generates Sema range checks on constant arguments, I had a piece of code that checks the bounds specified in the Tablegen intrinsic description against the range of the integer type being tested. If the bounds are large enough to permit any value of the integer type, you can omit the compile-time range check. (This case is expected to come up in some of the bitwise operation intrinsics.) But somehow I got my signed/unsigned check backwards (asking for the signed min/max of an unsigned type and vice versa), and also made a sign extension error in which a signed negative value gets zero-extended. Now rewritten more sensibly, and it should get its first sensible test from the next batch of intrinsics I'm planning to add in D69791. Reviewers: dmgreen Subscribers: cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D69789 Added: Modified: clang/utils/TableGen/MveEmitter.cpp Removed: diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp index 9c3328e3bbfb..1f9752261fbf 100644 --- a/clang/utils/TableGen/MveEmitter.cpp +++ b/clang/utils/TableGen/MveEmitter.cpp @@ -782,15 +782,14 @@ class ACLEIntrinsic { } llvm::APInt typelo, typehi; - if (cast(IA.ArgType)->kind() == ScalarTypeKind::UnsignedInt) { -typelo = llvm::APInt::getSignedMinValue(IA.ArgType->sizeInBits()); -typehi = llvm::APInt::getSignedMaxValue(IA.ArgType->sizeInBits()); + unsigned Bits = IA.ArgType->sizeInBits(); + if (cast(IA.ArgType)->kind() == ScalarTypeKind::SignedInt) { +typelo = llvm::APInt::getSignedMinValue(Bits).sext(128); +typehi = llvm::APInt::getSignedMaxValue(Bits).sext(128); } else { -typelo = llvm::APInt::getMinValue(IA.ArgType->sizeInBits()); -typehi = llvm::APInt::getMaxValue(IA.ArgType->sizeInBits()); +typelo = llvm::APInt::getMinValue(Bits).zext(128); +typehi = llvm::APInt::getMaxValue(Bits).zext(128); } - typelo = typelo.sext(128); - typehi = typehi.sext(128); std::string Index = utostr(kv.first); ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 38f0165 - [ARM MVE] Remove accidental 64-bit vst2/vld2 intrinsics.
Author: Simon Tatham Date: 2019-11-06T09:01:42Z New Revision: 38f016520f6edbfa7d059b60ac54e80dd955ada5 URL: https://github.com/llvm/llvm-project/commit/38f016520f6edbfa7d059b60ac54e80dd955ada5 DIFF: https://github.com/llvm/llvm-project/commit/38f016520f6edbfa7d059b60ac54e80dd955ada5.diff LOG: [ARM MVE] Remove accidental 64-bit vst2/vld2 intrinsics. ACLE defines no such intrinsic as vst2q_u64, and the MVE instruction set has no corresponding instruction. But I had accidentally added them to the fledgling anyway, and if you used them, you'd get a compiler crash. Reviewers: dmgreen Subscribers: kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D69788 Added: Modified: clang/include/clang/Basic/arm_mve.td Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index a760fdd87b1a..30a76511a3cd 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -18,7 +18,7 @@ include "arm_mve_defs.td" -let params = T.All in +let params = T.Usual in foreach n = [ 2, 4 ] in { def "vst"#n#"q": Intrinsic, MultiVector), (CustomCodegen<"VST24"> n:$NumVectors, ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 902e845 - [ARM, MVE] Add intrinsics for 'administrative' vector operations.
Author: Simon Tatham Date: 2019-11-15T09:53:43Z New Revision: 902e84556a51c70d95088aaa059ab9c494ab3516 URL: https://github.com/llvm/llvm-project/commit/902e84556a51c70d95088aaa059ab9c494ab3516 DIFF: https://github.com/llvm/llvm-project/commit/902e84556a51c70d95088aaa059ab9c494ab3516.diff LOG: [ARM,MVE] Add intrinsics for 'administrative' vector operations. This batch of intrinsics includes lots of things that move vector data around or change its type without really affecting its value very much. It includes the `vreinterpretq` family (cast one vector type to another); `vuninitializedq` (create a vector of a given type with don't-care contents); and `vcreateq` (make a 128-bit vector out of two `uint64_t` halves). These are all implemented using completely standard IR that's already tested in existing LLVM unit tests, so I've just written a clang test to check the IR is correct, and left it at that. I've also added some richer infrastructure to the MveEmitter Tablegen backend, to make it specify the exact integer type of integer arguments passed to IR construction functions, and wrap those arguments in a `static_cast` in the autogenerated C++. That was necessary to prevent an overloading ambiguity when passing the integer literal `0` to `IRBuilder::CreateInsertElement`, because otherwise, it could mean either a null pointer `llvm::Value *` or a zero `uint64_t`. Reviewers: ostannard, MarkMurrayARM, dmgreen Subscribers: kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70133 Added: clang/test/CodeGen/arm-mve-intrinsics/admin.c Modified: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/utils/TableGen/MveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index d2f877dda28e..c8501813d264 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -373,3 +373,44 @@ def vadciq_m: Intrinsic; } + +foreach desttype = T.All in { + // We want a vreinterpretq between every pair of supported vector types + // _except_ that there shouldn't be one from a type to itself. + // + // So this foldl expression implements what you'd write in Python as + // [srctype for srctype in T.All if srctype != desttype] + let params = !foldl([], T.All, tlist, srctype, !listconcat(tlist, + !if(!eq(!cast(desttype),!cast(srctype)),[],[srctype]))) + in { +def "vreinterpretq_" # desttype: Intrinsic< +VecOf, (args Vector:$x), (bitcast $x, VecOf)>; + } +} + +let params = T.All in { + let pnt = PNT_None in { +def vcreateq: Intrinsic), $a, 0), + $b, 1), Vector)>; +def vuninitializedq: Intrinsic; + } + + // This is the polymorphic form of vuninitializedq, which takes no type + // suffix, but takes an _unevaluated_ vector parameter and returns an + // uninitialized vector of the same vector type. + // + // This intrinsic has no _non_-polymorphic form exposed to the user. But each + // separately typed version of it still has to have its own clang builtin id, + // which can't be called vuninitializedq_u32 or similar because that would + // collide with the explicit nullary versions above. So I'm calling them + // vuninitializedq_polymorphic_u32 (and so on) for builtin id purposes; that + // full name never appears in the header file due to the polymorphicOnly + // flag, and the _polymorphic suffix is omitted from the shortened name by + // the custom PolymorphicNameType here. + let polymorphicOnly = 1, nonEvaluating = 1, + pnt = PolymorphicNameType<1, "polymorphic"> in { +def vuninitializedq_polymorphic: Intrinsic< +Vector, (args Vector), (undef Vector)>; + } +} diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index da6928fc137a..911c2c129db9 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -29,6 +29,11 @@ def args; // - // Family of nodes for use in the codegen dag for an intrinsic, corresponding // to function calls that return LLVM IR nodes. +class IRBuilderParam { int index = index_; } +class IRBuilderAddrParam : IRBuilderParam; +class IRBuilderIntParam : IRBuilderParam { + string type = type_; +} class IRBuilderBase { // The prefix of the function call, including an open parenthesis. string prefix; @@ -36,8 +41,7 @@ class IRBuilderBase { // Any parameters that have types that have to be treated specially by the // Tablegen back end. Generally these will be types other than llvm::Value *, // although not all other types need special treatment (e.g. llvm::Type *). - list address_params = []; // indices of parameters with type Address - list int_constant_params =
[clang] 9e37892 - [ARM,MVE] Add intrinsics for vector get/set lane.
Author: Simon Tatham Date: 2019-11-15T09:53:58Z New Revision: 9e37892773c0954a15f84b011223da1e707ab3bf URL: https://github.com/llvm/llvm-project/commit/9e37892773c0954a15f84b011223da1e707ab3bf DIFF: https://github.com/llvm/llvm-project/commit/9e37892773c0954a15f84b011223da1e707ab3bf.diff LOG: [ARM,MVE] Add intrinsics for vector get/set lane. This adds the `vgetq_lane` and `vsetq_lane` families, to copy between a scalar and a specified lane of a vector. One of the new `vgetq_lane` intrinsics returns a `float16_t`, which causes a compile error if `%clang_cc1` doesn't get the option `-fallow-half-arguments-and-returns`. The driver passes that option to cc1 already, but I've had to edit all the explicit cc1 command lines in the existing MVE intrinsics tests. A couple of fixes are included for the code I wrote up front in MveEmitter to support lane-index immediates (and which nothing has tested until now): the type was wrong (`uint32_t` instead of `int`) and the range was off by one. I've also added a method of bypassing the default promotion to `i32` that is done by the MveEmitter code generation: it's sensible to promote short scalars like `i16` to `i32` if they're going to be passed to custom IR intrinsics representing a machine instruction operating on GPRs, but not if they're going to be passed to standard IR operations like `insertelement` which expect the exact type. Reviewers: ostannard, MarkMurrayARM, dmgreen Reviewed By: dmgreen Subscribers: kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D70188 Added: clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c Modified: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/test/CodeGen/arm-mve-intrinsics/load-store.c clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c clang/test/CodeGen/arm-mve-intrinsics/vadc.c clang/test/CodeGen/arm-mve-intrinsics/vaddq.c clang/test/CodeGen/arm-mve-intrinsics/vcvt.c clang/test/CodeGen/arm-mve-intrinsics/vld24.c clang/test/CodeGen/arm-mve-intrinsics/vldr.c clang/test/CodeGen/arm-mve-intrinsics/vminvq.c clang/test/Sema/arm-mve-immediates.c clang/utils/TableGen/MveEmitter.cpp Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index c8501813d264..b72a8303ba3e 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -413,4 +413,9 @@ let params = T.All in { def vuninitializedq_polymorphic: Intrinsic< Vector, (args Vector), (undef Vector)>; } + + def vgetq_lane: Intrinsic; + def vsetq_lane: Intrinsic:$e, Vector:$v, imm_lane:$lane), +(ielt_var $v, $e, $lane)>; } diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index 911c2c129db9..4a1d6ed92664 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -77,6 +77,8 @@ def xval: IRBuilder<"CreateExtractValue"> { def ielt_const: IRBuilder<"CreateInsertElement"> { let special_params = [IRBuilderIntParam<2, "uint64_t">]; } +def ielt_var: IRBuilder<"CreateInsertElement">; +def xelt_var: IRBuilder<"CreateExtractElement">; def trunc: IRBuilder<"CreateTrunc">; def bitcast: IRBuilder<"CreateBitCast">; def extend: CGHelperFn<"SignOrZeroExtend"> { @@ -172,6 +174,10 @@ def CTO_CopyKind: ComplexTypeOp; // of _s32 / _f16 / _u8 suffix. def Void : Type; +// A wrapper you can put on an intrinsic's argument type to prevent it from +// being automatically promoted to i32 from a smaller integer type. +class unpromoted : Type { Type underlying_type = t; } + // Primitive types: base class, and an instance for the set of scalar integer // and floating types that MVE uses. class PrimitiveType: Type { @@ -285,7 +291,7 @@ def imm_0toNm1 : Immediate>; // imm_lane has to be the index of a vector lane in the main vector type, i.e // it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane) -def imm_lane : Immediate; +def imm_lane : Immediate; // imm_1to32 can be in the range 1 to 32, unconditionally. (e.g. scalar shift // intrinsics) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c b/clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c new file mode 100644 index ..6eaf0f8a71f5 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c @@ -0,0 +1,291 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-
[clang] 254b4f2 - [ARM,MVE] Add intrinsics for scalar shifts.
Author: Simon Tatham Date: 2019-11-19T14:47:29Z New Revision: 254b4f250007ef9f2d2377eb912963beafa39754 URL: https://github.com/llvm/llvm-project/commit/254b4f250007ef9f2d2377eb912963beafa39754 DIFF: https://github.com/llvm/llvm-project/commit/254b4f250007ef9f2d2377eb912963beafa39754.diff LOG: [ARM,MVE] Add intrinsics for scalar shifts. This fills in the small family of MVE intrinsics that have nothing to do with vectors: they implement bit-shift operations on 32- or 64-bit values held in one or two general-purpose registers. Most of these shift operations saturate if shifting left, and round to nearest if shifting right, although LSLL and ASRL behave like ordinary shifts. When these instructions take a variable shift count in a register, they pay attention to its sign, so that (for example) LSLL or UQRSHLL will shift left if given a positive number but right if given a negative one. That makes even LSLL and ASRL different enough from standard LLVM IR shift semantics that I couldn't see any better alternative than to simply model the whole family as a set of MVE-specific IR intrinsics. (The //immediate// forms of LSLL and ASRL, on the other hand, do behave exactly like a standard IR shift of a 64-bit value. In fact, those forms don't have ACLE intrinsics defined at all, because you can just write an ordinary C shift operation if you want one of those.) The 64-bit shifts have to be instruction-selected in C++, because they deliver two output values. But the 32-bit ones are simple enough that I could write a DAG isel pattern directly into each Instruction record. Reviewers: ostannard, MarkMurrayARM, dmgreen Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D70319 Added: Modified: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/scalar-shifts.ll Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index e227d95f9735..d8d199f464d9 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -388,13 +388,56 @@ defm vstrhq: scatter_offset_both; defm vstrwq: scatter_offset_both; defm vstrdq: scatter_offset_both; -let params = [Void], pnt = PNT_None in -def urshrl: Intrinsic $lo, $hi, $shift):$pair, - (or (shl (u64 (xval $pair, 1)), (u64 32)), - (u64 (xval $pair, 0>; +// Base class for the scalar shift intrinsics. +class ScalarShift: + Intrinsic { + let params = [Void]; + let pnt = PNT_None; +} + +// Subclass that includes the machinery to take a 64-bit input apart +// into halves, retrieve the two halves of a shifted output as a pair, +// and glue the pieces of the pair back into an i64 for output. +class LongScalarShift: + ScalarShift; + +// The family of saturating/rounding scalar shifts that take an +// immediate shift count. They come in matched 32- and 64-bit pairs. +multiclass ScalarSaturatingShiftImm { + def "": ScalarShift $value, $sh)>; + def l: LongScalarShift $lo, $hi, $sh)>; +} +defm uqshl: ScalarSaturatingShiftImm; +defm urshr: ScalarSaturatingShiftImm; +defm sqshl: ScalarSaturatingShiftImm; +defm srshr: ScalarSaturatingShiftImm; + +// The family of saturating/rounding scalar shifts that take a +// register shift count. They also have 32- and 64-bit forms, but the +// 64-bit form also has a version that saturates to 48 bits, so the IR +// intrinsic takes an extra saturation-type operand. +multiclass ScalarSaturatingShiftReg { + def "": ScalarShift $value, $sh)>; + def l: LongScalarShift $lo, $hi, $sh, 64)>; + def l_sat48: LongScalarShift $lo, $hi, $sh, 48)>; +} +defm uqrshl: ScalarSaturatingShiftReg; +defm sqrshr: ScalarSaturatingShiftReg; + +// The intrinsics for LSLL and ASRL come in 64-bit versions only, with +// no saturation count. +def lsll: LongScalarShift $lo, $hi, $sh)>; +def asrl: LongScalarShift $lo, $hi, $sh)>; let params = T.Int32 in { def vadcq: Intrinsic:$carry), diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index a4ba4ed87de3..27cdada02ec4 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -312,7 +312,7 @@ def imm_lane : Immediate; // imm_1to32 can be in the range 1 to 32, unconditionally. (e.g. scalar shift // intrinsics) -def imm_1to32 : Immediate>; +def imm_1to32 : Immediate>; // imm_1248 can be 1, 2, 4 or 8. (e.g. vidupq) def imm_1248 : Immediate> { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
[clang] d97b3e3 - [ARM][MVE] Add intrinsics for immediate shifts.
Author: Simon Tatham Date: 2019-12-09T15:44:09Z New Revision: d97b3e3e65cd77a81b39732af84a1a4229e95091 URL: https://github.com/llvm/llvm-project/commit/d97b3e3e65cd77a81b39732af84a1a4229e95091 DIFF: https://github.com/llvm/llvm-project/commit/d97b3e3e65cd77a81b39732af84a1a4229e95091.diff LOG: [ARM][MVE] Add intrinsics for immediate shifts. Summary: This adds the family of `vshlq_n` and `vshrq_n` ACLE intrinsics, which shift every lane of a vector left or right by a compile-time immediate. They mostly work by expanding to the IR `shl`, `lshr` and `ashr` operations, with their second operand being a vector splat of the immediate. There's a fiddly special case, though. ACLE specifies that the immediate in `vshrq_n` can take values up to //and including// the bit size of the vector lane. But LLVM IR thinks that shifting right by the full size of the lane is UB, and feels free to replace the `lshr` with an `undef` half way through the optimization pipeline. Hence, to keep this legal in source code, I have to detect it at codegen time. Logical (unsigned) right shifts by the element size are handled by simply emitting the zero vector; arithmetic ones are converted into a shift of one bit less, which will always give the same output. In order to do that check, I also had to enhance the tablegen MveEmitter so that it can cope with converting a builtin function's operand into a bare integer to pass to a code-generating subfunction. Previously the only bare integers it knew how to handle were flags generated from within `arm_mve.td`. Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: MarkMurrayARM Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71065 Added: clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll Modified: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/lib/CodeGen/CGBuiltin.cpp clang/utils/TableGen/MveEmitter.cpp llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 19852702c1bc..cc4b6d9e8234 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -522,6 +522,33 @@ defm vstrhq: scatter_offset_both; defm vstrwq: scatter_offset_both; defm vstrdq: scatter_offset_both; +multiclass PredicatedImmediateVectorShift< +Immediate immtype, string predIntrName, list unsignedFlag = []> { + foreach predIntr = [IRInt] in { +def _m_n: Intrinsic; +def _x_n: Intrinsic; + } +} + +let params = T.Int in { + def vshlq_n: Intrinsic; + defm vshlq: PredicatedImmediateVectorShift; + + let pnt = PNT_NType in { +def vshrq_n: Intrinsic; +defm vshrq: PredicatedImmediateVectorShift; + } +} + // Base class for the scalar shift intrinsics. class ScalarShift: Intrinsic { diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index d837a1d33d00..5aa10f250eda 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -66,6 +66,10 @@ def xor: IRBuilder<"CreateXor">; def sub: IRBuilder<"CreateSub">; def shl: IRBuilder<"CreateShl">; def lshr: IRBuilder<"CreateLShr">; +def immshr: CGHelperFn<"MVEImmediateShr"> { + let special_params = [IRBuilderIntParam<1, "unsigned">, +IRBuilderIntParam<2, "bool">]; +} def fadd: IRBuilder<"CreateFAdd">; def fmul: IRBuilder<"CreateFMul">; def fsub: IRBuilder<"CreateFSub">; @@ -308,8 +312,8 @@ def imm_simd_vmvn : Immediate { // // imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1 // inclusive. -def imm_1toN : Immediate>; -def imm_0toNm1 : Immediate>; +def imm_1toN : Immediate>; +def imm_0toNm1 : Immediate>; // imm_lane has to be the index of a vector lane in the main vector type, i.e // it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b5b0c3e61d47..94d10a1aedf2 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6801,6 +6801,14 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } } +template +static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) { + llvm::APSInt IntVal; + bool IsConst = E->isIntegerConstantExpr(IntVal, Context); + assert(IsConst && "Sema should have checked this was a constant"); + return IntVal.getExtValue(); +} + static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned) { // Helper function called by Tablegen-constructed ARM MVE bui
[clang] bd0f271 - [ARM][MVE] Add intrinsics for immediate shifts. (reland)
Author: Simon Tatham Date: 2019-12-11T10:10:09Z New Revision: bd0f271c9e55ab69b45258e4922869099ed18307 URL: https://github.com/llvm/llvm-project/commit/bd0f271c9e55ab69b45258e4922869099ed18307 DIFF: https://github.com/llvm/llvm-project/commit/bd0f271c9e55ab69b45258e4922869099ed18307.diff LOG: [ARM][MVE] Add intrinsics for immediate shifts. (reland) This adds the family of `vshlq_n` and `vshrq_n` ACLE intrinsics, which shift every lane of a vector left or right by a compile-time immediate. They mostly work by expanding to the IR `shl`, `lshr` and `ashr` operations, with their second operand being a vector splat of the immediate. There's a fiddly special case, though. ACLE specifies that the immediate in `vshrq_n` can take values up to //and including// the bit size of the vector lane. But LLVM IR thinks that shifting right by the full size of the lane is UB, and feels free to replace the `lshr` with an `undef` half way through the optimization pipeline. Hence, to keep this legal in source code, I have to detect it at codegen time. Logical (unsigned) right shifts by the element size are handled by simply emitting the zero vector; arithmetic ones are converted into a shift of one bit less, which will always give the same output. In order to do that check, I also had to enhance the tablegen MveEmitter so that it can cope with converting a builtin function's operand into a bare integer to pass to a code-generating subfunction. Previously the only bare integers it knew how to handle were flags generated from within `arm_mve.td`. Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: dmgreen, MarkMurrayARM Subscribers: echristo, hokein, rdhindsa, kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71065 Added: clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll Modified: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/lib/CodeGen/CGBuiltin.cpp clang/utils/TableGen/MveEmitter.cpp llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 618a087d6275..9b6053e57861 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -609,6 +609,33 @@ defm vstrhq: scatter_offset_both; defm vstrwq: scatter_offset_both; defm vstrdq: scatter_offset_both; +multiclass PredicatedImmediateVectorShift< +Immediate immtype, string predIntrName, list unsignedFlag = []> { + foreach predIntr = [IRInt] in { +def _m_n: Intrinsic; +def _x_n: Intrinsic; + } +} + +let params = T.Int in { + def vshlq_n: Intrinsic; + defm vshlq: PredicatedImmediateVectorShift; + + let pnt = PNT_NType in { +def vshrq_n: Intrinsic; +defm vshrq: PredicatedImmediateVectorShift; + } +} + // Base class for the scalar shift intrinsics. class ScalarShift: Intrinsic { diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index 1d72cc45796c..6bc9b35f0fc4 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -66,6 +66,10 @@ def xor: IRBuilder<"CreateXor">; def sub: IRBuilder<"CreateSub">; def shl: IRBuilder<"CreateShl">; def lshr: IRBuilder<"CreateLShr">; +def immshr: CGHelperFn<"MVEImmediateShr"> { + let special_params = [IRBuilderIntParam<1, "unsigned">, +IRBuilderIntParam<2, "bool">]; +} def fadd: IRBuilder<"CreateFAdd">; def fmul: IRBuilder<"CreateFMul">; def fsub: IRBuilder<"CreateFSub">; @@ -318,8 +322,8 @@ def imm_simd_vmvn : Immediate { // // imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1 // inclusive. -def imm_1toN : Immediate>; -def imm_0toNm1 : Immediate>; +def imm_1toN : Immediate>; +def imm_0toNm1 : Immediate>; // imm_lane has to be the index of a vector lane in the main vector type, i.e // it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 68c956a98637..8a53739626e1 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6916,6 +6916,15 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } } +template +static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) { + llvm::APSInt IntVal; + bool IsConst = E->isIntegerConstantExpr(IntVal, Context); + assert(IsConst && "Sema should have checked this was a constant"); + (void)IsConst; + return IntVal.getExtValue(); +} + static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned) { // He
[clang] d290424 - [ARM][MVE] Factor out an IntrinsicMX multiclass.
Author: Simon Tatham Date: 2019-12-11T12:07:26Z New Revision: d290424731ede31fd5fd75b929df8fe0adb547c7 URL: https://github.com/llvm/llvm-project/commit/d290424731ede31fd5fd75b929df8fe0adb547c7 DIFF: https://github.com/llvm/llvm-project/commit/d290424731ede31fd5fd75b929df8fe0adb547c7.diff LOG: [ARM][MVE] Factor out an IntrinsicMX multiclass. Summary: The ACLE intrinsics for MVE contain a lot of pairs of functions with `_m` and `_x` in the name, wrapping a predicated MVE instruction which only partially overwrites its output register. They have the common pattern that the `_m` variant takes an initial argument called 'inactive', of the same type as the return value, supplying the input value of the output register, so that lanes disabled by the predication will be taken from that parameter; the `_x` variant omits that initial argument, and simply sets it to undef. That common pattern is simple enough to wrap into a multiclass, which should save a lot of effort in setting up all the rest of the `_x` variants. In this commit I introduce `multiclass IntrinsicMX` in `arm_mve_defs.td`, and convert existing generation of m/x pairs to use it. This allows me to remove the `PredicatedImmediateVectorShift` multiclass (from D71065) completely, because the new multiclass makes it so much simpler that it's not worth bothering to define it at all. Reviewers: MarkMurrayARM, miyuki Reviewed By: MarkMurrayARM, miyuki Subscribers: kristof.beyls, dmgreen, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D71335 Added: Modified: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 9b6053e57861..7ed3c04c58db 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -609,30 +609,21 @@ defm vstrhq: scatter_offset_both; defm vstrwq: scatter_offset_both; defm vstrdq: scatter_offset_both; -multiclass PredicatedImmediateVectorShift< -Immediate immtype, string predIntrName, list unsignedFlag = []> { - foreach predIntr = [IRInt] in { -def _m_n: Intrinsic; -def _x_n: Intrinsic; - } -} - let params = T.Int in { def vshlq_n: Intrinsic; - defm vshlq: PredicatedImmediateVectorShift; + defm vshlq: IntrinsicMX + $v, $sh, $pred, $inactive), "_n">; let pnt = PNT_NType in { def vshrq_n: Intrinsic; -defm vshrq: PredicatedImmediateVectorShift; +defm vshrq: IntrinsicMX + $v, $sh, (unsignedflag Scalar), $pred, $inactive), "_n">; } } @@ -713,25 +704,17 @@ def vadciq_m: Intrinsic { def "" : Intrinsic not_halving, angle, $a, $b)>; - def _m : Intrinsic not_halving, angle, $inactive, $a, $b, $pred)>; - def _x : Intrinsic - not_halving, angle, (undef Vector), $a, $b, $pred)>; } multiclass VectorComplexMulPred { def "" : Intrinsic angle, $a, $b)>; - def _m : Intrinsic angle, $inactive, $a, $b, $pred)>; - def _x : Intrinsic angle, (undef Vector), $a, - $b, $pred)>; } multiclass VectorComplexMLAPred { diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index 6bc9b35f0fc4..3e22e44607ca 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -432,6 +432,30 @@ class NameOverride { string basename = basename_; } +// A wrapper to define both _m and _x versions of a predicated +// intrinsic. +multiclass IntrinsicMX { + // The _m variant takes an initial parameter called $inactive, which + // provides the input value of the output register, i.e. all the + // inactive lanes in the predicated operation take their values from + // this. + def "_m" # nameSuffix: + Intrinsic; + + // The _x variant leaves off that parameter, and simply uses an + // undef value of the same type. + def "_x" # nameSuffix: + Intrinsic { +// Allow overriding of the polymorphic name type, because +// sometimes the _m and _x variants polymorph diff erently +// (typically because the type of the inactive parameter can be +// used as a disambiguator if it's present). +let pnt = pnt_x; + } +} + // - // Convenience lists of parameter types. 'T' is just a container record, so you // can define a typical intrinsic with 'let Params = T.Usual', or similar, ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 25305a9 - [ARM][MVE] Add intrinsics for more immediate shifts.
Author: Simon Tatham Date: 2019-12-13T13:07:39Z New Revision: 25305a9311d45bc602014b7ee7584e80675aaf59 URL: https://github.com/llvm/llvm-project/commit/25305a9311d45bc602014b7ee7584e80675aaf59 DIFF: https://github.com/llvm/llvm-project/commit/25305a9311d45bc602014b7ee7584e80675aaf59.diff LOG: [ARM][MVE] Add intrinsics for more immediate shifts. Summary: This fills in the remaining shift operations that take a single vector input and an immediate shift count: the `vqshl`, `vqshlu`, `vrshr` and `vshll[bt]` families. `vshll[bt]` (which shifts each input lane left into a double-width output lane) is the most interesting one. There are separate MC instruction ids for shifting by exactly the input lane width and shifting by less than that, because the instruction encoding is so completely different for the lane-width special case. So I had to write two sets of patterns to match based on the immediate shift count, which involved adding a ComplexPattern matcher to avoid the general-case pattern accidentally matching the special case too. For that family I've made sure to add an llc codegen test for both versions of each instruction. I'm experimenting with a new strategy for parametrising the isel patterns for all these instructions: adding extra fields to the relevant `Instruction` subclass itself, which are ignored by the Tablegen backends that generate the MC data, but can be retrieved from each instance of that instruction subclass when it's passed as a template parameter to the multiclass that generates its isel patterns. A nice effect of that is that I can fill in those informational fields using `let` blocks, rather than having to type them out once per instruction at `defm` time. (As a result, quite a lot of existing instruction `def`s are reindented by this patch, so it's clearer to read with whitespace changes ignored.) Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard Reviewed By: MarkMurrayARM Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D71458 Added: Modified: clang/include/clang/Basic/arm_mve.td clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp llvm/lib/Target/ARM/ARMInstrInfo.td llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 8bb567c573a3..9d9c067ade1c 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -606,6 +606,47 @@ let params = T.Int in { } } +let params = T.Int in { + def vqshlq_n: Intrinsic $v, $sh, (unsignedflag Scalar))>; + def vqshlq_m_n: Intrinsic +$v, $sh, (unsignedflag Scalar), $pred, $inactive)>; + + let pnt = PNT_NType in { +def vrshrq_n: Intrinsic $v, $sh, (unsignedflag Scalar))>; +defm vrshrq: IntrinsicMX + $v, $sh, (unsignedflag Scalar), $pred, $inactive), "_n">; + } +} + +let params = T.Signed, pnt = PNT_NType in { + def vqshluq_n: Intrinsic $v, $sh)>; + def vqshluq_m_n: Intrinsic +$v, $sh, $pred, $inactive)>; +} + +multiclass vshll_imm { + let params = !listconcat(T.Int8, T.Int16), pnt = PNT_NType in { +def _n: Intrinsic +$v, $sh, (unsignedflag Scalar), top)>; +defm "": IntrinsicMX +$v, $sh, (unsignedflag Scalar), top, $pred, $inactive), "_n">; + } +} +defm vshllbq : vshll_imm<0>; +defm vshlltq : vshll_imm<1>; + // Base class for the scalar shift intrinsics. class ScalarShift: Intrinsic { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c index 200273c03654..2128d0801c6a 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c @@ -720,3 +720,918 @@ uint32x4_t test_vshrq_x_n_u32(uint32x4_t a, mve_pred16_t p) return vshrq_x_n_u32(a, 6, p); #endif /* POLYMORPHIC */ } + +// CHECK-LABEL: @test_vqshlq_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> [[A:%.*]], i32 3, i32 0) +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +int8x16_t test_vqshlq_n_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC +return vqshlq_n(a, 3); +#else /* POLYMORPHIC */ +return vqshlq_n_s8(a, 3); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshlq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> [[A:%.*]], i32 4, i32 0) +// CHECK-NEXT:ret <8 x i16> [[TMP0]] +// +int16x8_t test_vqshlq_n_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC +return vqshlq_n(a, 4); +#else /* POLYMORPHIC */ +return vqshlq_n_s16(a,
[clang] d608fee - [ARM, MVE] Fix user-namespace violation in arm_mve.h.
Author: Simon Tatham Date: 2020-03-12T11:13:50Z New Revision: d608fee8399a9fa6f2819076131c6ac30cc16eef URL: https://github.com/llvm/llvm-project/commit/d608fee8399a9fa6f2819076131c6ac30cc16eef DIFF: https://github.com/llvm/llvm-project/commit/d608fee8399a9fa6f2819076131c6ac30cc16eef.diff LOG: [ARM,MVE] Fix user-namespace violation in arm_mve.h. Summary: We were generating the declarations of polymorphic intrinsics using `__attribute__((overloadable))`. But `overloadable` is a valid identifier for an end user to define as a macro in a C program, and if they do that before including ``, then we shouldn't cause a compile error. Fixed to spell the attribute name `__overloadable__` instead. Reviewers: miyuki, MarkMurrayARM, ostannard Reviewed By: miyuki Subscribers: kristof.beyls, dmgreen, danielkiss, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D75997 Added: Modified: clang/utils/TableGen/MveEmitter.cpp Removed: diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp index 9a9fe00eed74..f75f5000f0f6 100644 --- a/clang/utils/TableGen/MveEmitter.cpp +++ b/clang/utils/TableGen/MveEmitter.cpp @@ -1874,7 +1874,7 @@ void MveEmitter::EmitHeader(raw_ostream &OS) { // match your call". OS << "static __inline__ __attribute__((" - << (Polymorphic ? "overloadable, " : "") + << (Polymorphic ? "__overloadable__, " : "") << "__clang_arm_builtin_alias(__builtin_arm_mve_" << Int.fullName() << ")))\n" << RetTypeName << FunctionName << "(" << ArgTypesString << ");\n"; @@ -2041,7 +2041,7 @@ void CdeEmitter::EmitHeader(raw_ostream &OS) { // Emit the actual declaration. See MveEmitter::EmitHeader for detailed // comments OS << "static __inline__ __attribute__((" - << (Polymorphic ? "overloadable, " : "") + << (Polymorphic ? "__overloadable__, " : "") << "__clang_arm_builtin_alias(__builtin_arm_" << Int.builtinExtension() << "_" << Int.fullName() << ")))\n" << RetTypeName << FunctionName << "(" << ArgTypesString << ");\n"; ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 3f8e714 - [ARM,MVE] Add intrinsics and isel for MVE fused multiply-add.
Author: Simon Tatham Date: 2020-03-12T11:13:50Z New Revision: 3f8e714e2f9f2dc3367d2f3fc569abfaf28f314c URL: https://github.com/llvm/llvm-project/commit/3f8e714e2f9f2dc3367d2f3fc569abfaf28f314c DIFF: https://github.com/llvm/llvm-project/commit/3f8e714e2f9f2dc3367d2f3fc569abfaf28f314c.diff LOG: [ARM,MVE] Add intrinsics and isel for MVE fused multiply-add. Summary: This adds the ACLE intrinsic family for the VFMA and VFMS instructions, which perform fused multiply-add on vectors of floats. I've represented the unpredicated versions in IR using the cross- platform `@llvm.fma` IR intrinsic. We already had isel rules to convert one of those into a vector VFMA in the simplest possible way; but we didn't have rules to detect a negated argument and turn it into VFMS, or rules to detect a splat argument and turn it into one of the two vector/scalar forms of the instruction. Now we have all of those. The predicated form uses a target-specific intrinsic as usual, but I've stuck to just one, for a predicated FMA. The subtraction and splat versions are code-generated by passing an fneg or a splat as one of its operands, the same way as the unpredicated version. In arm_mve_defs.h, I've had to introduce a tiny extra piece of infrastructure: a record `id` for use in codegen dags which implements the identity function. (Just because you can't declare a Tablegen value of type dag which is //only// a `$varname`: you have to wrap it in something. Now I can write `(id $varname)` to get the same effect.) Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, danielkiss, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D75998 Added: clang/test/CodeGen/arm-mve-intrinsics/ternary.c llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll Modified: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-fmas.ll Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index d9a2035e8a0e..d2203d650301 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -162,6 +162,46 @@ let pnt = PNT_NType in { } } +multiclass FMA { + // FMS instructions are defined in the ArmARM as if they negate the + // second multiply input. + defvar m2_cg = !if(add, (id $m2), (fneg $m2)); + + defvar unpred_cg = (IRIntBase<"fma", [Vector]> $m1, m2_cg, $addend); + defvar pred_cg = (IRInt<"fma_predicated", [Vector, Predicate]> + $m1, m2_cg, $addend, $pred); + + def q: Intrinsic; + + def q_m: Intrinsic; + + // Only FMA has the vector/scalar variants, not FMS + if add then let pnt = PNT_NType in { + +def q_n: Intrinsic:$m2_s), + (seq (splat $m2_s):$m2, unpred_cg)>; +def sq_n: Intrinsic:$addend_s), +(seq (splat $addend_s):$addend, unpred_cg)>; +def q_m_n: Intrinsic:$m2_s, + Predicate:$pred), + (seq (splat $m2_s):$m2, pred_cg)>; +def sq_m_n: Intrinsic:$addend_s, +Predicate:$pred), + (seq (splat $addend_s):$addend, pred_cg)>; + } +} + +let params = T.Float in { + defm vfma: FMA<1>; + defm vfms: FMA<0>; +} + let params = !listconcat(T.Int16, T.Int32) in { let pnt = PNT_None in { def vmvnq_n: Intrinsic { } def zip: CGHelperFn<"VectorZip">; +// Trivial 'codegen' function that just returns its argument. Useful +// for wrapping up a variable name like $foo into a thing you can pass +// around as type 'dag'. +def id: IRBuilderBase { + // All the other cases of IRBuilderBase use 'prefix' to specify a function + // call, including the open parenthesis. MveEmitter puts the closing paren on + // the end. So if we _just_ specify an open paren with no function name + // before it, then the generated C++ code will simply wrap the input value in + // parentheses, returning it unchanged. + let prefix = "("; +} + // Helper for making boolean flags in IR def i1: IRBuilderBase { let prefix = "llvm::ConstantInt::get(Builder.getInt1Ty(), "; diff --git a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c new file mode 100644 index ..ab1cb14c3aed --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c @@ -0,0 +1,261 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -sroa | FileCheck %s +// RUN: %clang_cc1 -triple th
[clang] 28c5d97 - [ARM, MVE] Add intrinsics and isel for MVE integer VMLA.
Author: Simon Tatham Date: 2020-03-18T10:55:04Z New Revision: 28c5d97beec7a2582869f992f54a178c805e2e51 URL: https://github.com/llvm/llvm-project/commit/28c5d97beec7a2582869f992f54a178c805e2e51 DIFF: https://github.com/llvm/llvm-project/commit/28c5d97beec7a2582869f992f54a178c805e2e51.diff LOG: [ARM,MVE] Add intrinsics and isel for MVE integer VMLA. Summary: These instructions compute multiply+add in integers, with one of the operands being a splat of a scalar. (VMLA and VMLAS differ in whether the splat operand is a multiplier or the addend.) I've represented these in IR using existing standard IR operations for the unpredicated forms. The predicated forms are done with target- specific intrinsics, as usual. When operating on n-bit vector lanes, only the bottom n bits of the i32 scalar operand are used. So we have to tell that to isel lowering, to allow it to remove a pointless sign- or zero-extension instruction on that input register. That's done in `PerformIntrinsicCombine`, but first I had to enable `PerformIntrinsicCombine` for MVE targets (previously all the intrinsics it handled were for NEON), and make it a method of `ARMTargetLowering` so that it can get at `SimplifyDemandedBits`. Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, danielkiss, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D76122 Added: Modified: clang/include/clang/Basic/arm_mve.td clang/test/CodeGen/arm-mve-intrinsics/ternary.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/lib/Target/ARM/ARMISelLowering.h llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index d2203d650301..5498a144c9e2 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -202,6 +202,24 @@ let params = T.Float in { defm vfms: FMA<0>; } +let params = T.Int, pnt = PNT_NType in { + def vmlaq_n: Intrinsic< +Vector, (args Vector:$addend, Vector:$m1, unpromoted:$m2_s), +(add (mul $m1, (splat $m2_s)), $addend)>; + def vmlasq_n: Intrinsic< +Vector, (args Vector:$m1, Vector:$m2, unpromoted:$addend_s), +(add (mul $m1, $m2), (splat $addend_s))>; + + def vmlaq_m_n: Intrinsic< +Vector, (args Vector:$addend, Vector:$m1, Scalar:$m2_s, Predicate:$pred), +(IRInt<"vmla_n_predicated", [Vector, Predicate]> +$addend, $m1, $m2_s, $pred)>; + def vmlasq_m_n: Intrinsic< +Vector, (args Vector:$m1, Vector:$m2, Scalar:$addend_s, Predicate:$pred), +(IRInt<"vmlas_n_predicated", [Vector, Predicate]> +$m1, $m2, $addend_s, $pred)>; +} + let params = !listconcat(T.Int16, T.Int32) in { let pnt = PNT_None in { def vmvnq_n: Intrinsic undef, i8 [[C:%.*]], i32 0 +// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT:[[TMP0:%.*]] = mul <16 x i8> [[B:%.*]], [[DOTSPLAT]] +// CHECK-NEXT:[[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[A:%.*]] +// CHECK-NEXT:ret <16 x i8> [[TMP1]] +// +int8x16_t test_vmlaq_n_s8(int8x16_t a, int8x16_t b, int8_t c) { +#ifdef POLYMORPHIC + return vmlaq(a, b, c); +#else /* POLYMORPHIC */ + return vmlaq_n_s8(a, b, c); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmlaq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[C:%.*]], i32 0 +// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT:[[TMP0:%.*]] = mul <8 x i16> [[B:%.*]], [[DOTSPLAT]] +// CHECK-NEXT:[[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[A:%.*]] +// CHECK-NEXT:ret <8 x i16> [[TMP1]] +// +int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { +#ifdef POLYMORPHIC + return vmlaq(a, b, c); +#else /* POLYMORPHIC */ + return vmlaq_n_s16(a, b, c); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmlaq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[C:%.*]], i32 0 +// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +// CHECK-NEXT:[[TMP0:%.*]] = mul <4 x i32> [[B:%.*]], [[DOTSPLAT]] +// CHECK-NEXT:[[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[A:%.*]] +// CHECK-NEXT:ret <4 x i32> [[TMP1]] +// +int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { +#ifdef POLYMORPHIC + return vmlaq(a, b, c); +#else /* POLYMORPHIC */ + return vmlaq_n_s32(a, b, c); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmlaq_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <16 x
[clang] 928776d - [ARM,MVE] Add intrinsics for the VQDMLAH family.
Author: Simon Tatham Date: 2020-03-18T10:55:04Z New Revision: 928776de9233be1487c1b56f90c90ed25b25e355 URL: https://github.com/llvm/llvm-project/commit/928776de9233be1487c1b56f90c90ed25b25e355 DIFF: https://github.com/llvm/llvm-project/commit/928776de9233be1487c1b56f90c90ed25b25e355.diff LOG: [ARM,MVE] Add intrinsics for the VQDMLAH family. Summary: These are complicated integer multiply+add instructions with extra saturation, taking the high half of a double-width product, and optional rounding. There's no sensible way to represent that in standard IR, so I've converted the clang builtins directly to target-specific intrinsics. Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard Reviewed By: miyuki Subscribers: kristof.beyls, hiraditya, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D76123 Added: Modified: clang/include/clang/Basic/arm_mve.td clang/test/CodeGen/arm-mve-intrinsics/ternary.c llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMISelLowering.cpp llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 5498a144c9e2..ae6ce4837d76 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -220,6 +220,29 @@ let params = T.Int, pnt = PNT_NType in { $m1, $m2, $addend_s, $pred)>; } +multiclass VQDMLA { + def hq_n: Intrinsic< +Vector, (args Vector:$addend, Vector:$m1, Scalar:$m2_s), +(IRInt $addend, $m1, $m2_s)>; + def shq_n: Intrinsic< +Vector, (args Vector:$m1, Vector:$m2, Scalar:$addend_s), +(IRInt $m1, $m2, $addend_s)>; + + def hq_m_n: Intrinsic< +Vector, (args Vector:$addend, Vector:$m1, Scalar:$m2_s, Predicate:$pred), +(IRInt + $addend, $m1, $m2_s, $pred)>; + def shq_m_n: Intrinsic< +Vector, (args Vector:$m1, Vector:$m2, Scalar:$addend_s, Predicate:$pred), +(IRInt + $m1, $m2, $addend_s, $pred)>; +} + +let params = T.Signed, pnt = PNT_NType in { + defm vqdmla: VQDMLA; + defm vqrdmla: VQDMLA; +} + let params = !listconcat(T.Int16, T.Int32) in { let pnt = PNT_None in { def vmvnq_n: Intrinsic @llvm.arm.mve.vqdmlah.v16i8(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]]) +// CHECK-NEXT:ret <16 x i8> [[TMP1]] +// +int8x16_t test_vqdmlahq_n_s8(int8x16_t a, int8x16_t b, int8_t c) { +#ifdef POLYMORPHIC + return vqdmlahq(a, b, c); +#else /* POLYMORPHIC */ + return vqdmlahq_n_s8(a, b, c); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqdmlahq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[C:%.*]] to i32 +// CHECK-NEXT:[[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlah.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]]) +// CHECK-NEXT:ret <8 x i16> [[TMP1]] +// +int16x8_t test_vqdmlahq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { +#ifdef POLYMORPHIC + return vqdmlahq(a, b, c); +#else /* POLYMORPHIC */ + return vqdmlahq_n_s16(a, b, c); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqdmlahq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlah.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]]) +// CHECK-NEXT:ret <4 x i32> [[TMP0]] +// +int32x4_t test_vqdmlahq_n_s32(int32x4_t a, int32x4_t b, int32_t c) { +#ifdef POLYMORPHIC + return vqdmlahq(a, b, c); +#else /* POLYMORPHIC */ + return vqdmlahq_n_s32(a, b, c); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrdmlahq_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = zext i8 [[C:%.*]] to i32 +// CHECK-NEXT:[[TMP1:%.*]] = call <16 x i8> @llvm.arm.mve.vqrdmlah.v16i8(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 [[TMP0]]) +// CHECK-NEXT:ret <16 x i8> [[TMP1]] +// +int8x16_t test_vqrdmlahq_n_s8(int8x16_t a, int8x16_t b, int8_t c) { +#ifdef POLYMORPHIC + return vqrdmlahq(a, b, c); +#else /* POLYMORPHIC */ + return vqrdmlahq_n_s8(a, b, c); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrdmlahq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[C:%.*]] to i32 +// CHECK-NEXT:[[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqrdmlah.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]]) +// CHECK-NEXT:ret <8 x i16> [[TMP1]] +// +int16x8_t test_vqrdmlahq_n_s16(int16x8_t a, int16x8_t b, int16_t c) { +#ifdef POLYMORPHIC + return vqrdmlahq(a, b, c); +#else /* POLYMORPHIC */ + return vqrdmlahq_n_s16(a, b, c); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqrdmlahq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqrdmlah.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]]) +// CHECK-NEXT:ret <4 x i32> [[TMP0]] +// +int32x4_t test_vqrdmlahq_n_s32(int32x4_t a, int32x4_t b,
[clang] e13d153 - [ARM,MVE] Add intrinsics for the VQDMLAD family.
Author: Simon Tatham Date: 2020-03-18T17:11:22Z New Revision: e13d153c1b59a11185bf6a1aa8853c9e14d556a5 URL: https://github.com/llvm/llvm-project/commit/e13d153c1b59a11185bf6a1aa8853c9e14d556a5 DIFF: https://github.com/llvm/llvm-project/commit/e13d153c1b59a11185bf6a1aa8853c9e14d556a5.diff LOG: [ARM,MVE] Add intrinsics for the VQDMLAD family. Summary: This is another set of instructions too complicated to be sensibly expressed in IR by anything short of a target-specific intrinsic. Given input vectors a,b, the instruction generates intermediate values 2*(a[0]*b[0]+a[1]+b[1]), 2*(a[2]*b[2]+a[3]+b[3]), etc; takes the high half of each double-width values, and overwrites half the lanes in the output vector c, which you therefore have to provide the input value of. Optionally you can swap the elements of b so that the are things like a[0]*b[1]+a[1]*b[0]; optionally you can round to nearest when taking the high half; and optionally you can take the difference rather than sum of the two products. Finally, saturation is applied when converting back to a single-width vector lane. Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard Reviewed By: miyuki Subscribers: kristof.beyls, hiraditya, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D76359 Added: clang/test/CodeGen/arm-mve-intrinsics/vqdmlad.c llvm/test/CodeGen/Thumb2/mve-intrinsics/vqdmlad.ll Modified: clang/include/clang/Basic/arm_mve.td llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index ae6ce4837d76..45e45899de5f 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -243,6 +243,26 @@ let params = T.Signed, pnt = PNT_NType in { defm vqrdmla: VQDMLA; } +multiclass VQDMLAD { + def "": Intrinsic $a, $b, $c, +(u32 exchange), (u32 round), (u32 subtract))>; + def _m: Intrinsic $a, $b, $c, +(u32 exchange), (u32 round), (u32 subtract), $pred)>; +} +let params = T.Signed in { + defm vqdmladhq: VQDMLAD<0, 0, 0>; + defm vqdmladhxq: VQDMLAD<1, 0, 0>; + defm vqdmlsdhq: VQDMLAD<0, 0, 1>; + defm vqdmlsdhxq: VQDMLAD<1, 0, 1>; + defm vqrdmladhq: VQDMLAD<0, 1, 0>; + defm vqrdmladhxq: VQDMLAD<1, 1, 0>; + defm vqrdmlsdhq: VQDMLAD<0, 1, 1>; + defm vqrdmlsdhxq: VQDMLAD<1, 1, 1>; +} + let params = !listconcat(T.Int16, T.Int32) in { let pnt = PNT_None in { def vmvnq_n: Intrinsic + +// CHECK-LABEL: @test_vqdmladhq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> [[INACTIVE:%.*]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0, i32 0, i32 0) +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +int8x16_t test_vqdmladhq_s8(int8x16_t inactive, int8x16_t a, int8x16_t b) { +#ifdef POLYMORPHIC + return vqdmladhq(inactive, a, b); +#else /* POLYMORPHIC */ + return vqdmladhq_s8(inactive, a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqdmladhq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, i32 0, i32 0) +// CHECK-NEXT:ret <8 x i16> [[TMP0]] +// +int16x8_t test_vqdmladhq_s16(int16x8_t inactive, int16x8_t a, int16x8_t b) { +#ifdef POLYMORPHIC + return vqdmladhq(inactive, a, b); +#else /* POLYMORPHIC */ + return vqdmladhq_s16(inactive, a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqdmladhq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, i32 0, i32 0) +// CHECK-NEXT:ret <4 x i32> [[TMP0]] +// +int32x4_t test_vqdmladhq_s32(int32x4_t inactive, int32x4_t a, int32x4_t b) { +#ifdef POLYMORPHIC + return vqdmladhq(inactive, a, b); +#else /* POLYMORPHIC */ + return vqdmladhq_s32(inactive, a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqdmladhxq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqdmlad.v16i8(<16 x i8> [[INACTIVE:%.*]], <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, i32 0, i32 0) +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +int8x16_t test_vqdmladhxq_s8(int8x16_t inactive, int8x16_t a, int8x16_t b) { +#ifdef POLYMORPHIC + return vqdmladhxq(inactive, a, b); +#else /* POLYMORPHIC */ + return vqdmladhxq_s8(inactive, a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqdmladhxq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 x i16> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 0, i32 0) +// CHECK-NEXT:ret <8 x i16> [[TMP0]] +// +int16x8_t test_vqdmladhxq_s16(int16x8_t inactive, int16x8
[clang] cf7e98e - [ARM,MVE] Add intrinsics for vdupq.
Author: Simon Tatham Date: 2020-02-03T11:20:06Z New Revision: cf7e98e6f7805f4e2693a6dbbd12c10fe06fde70 URL: https://github.com/llvm/llvm-project/commit/cf7e98e6f7805f4e2693a6dbbd12c10fe06fde70 DIFF: https://github.com/llvm/llvm-project/commit/cf7e98e6f7805f4e2693a6dbbd12c10fe06fde70.diff LOG: [ARM,MVE] Add intrinsics for vdupq. Summary: The unpredicated case of this is trivial: the clang codegen just makes a vector splat of the input, and LLVM isel is already prepared to handle that. For the predicated version, I've generated a `select` between the same vector splat and the `inactive` input parameter, and added new Tablegen isel rules to match that pattern into a predicated `MVE_VDUP` instruction. Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D73356 Added: clang/test/CodeGen/arm-mve-intrinsics/dup.c llvm/test/CodeGen/Thumb2/mve-intrinsics/dup.ll Modified: clang/include/clang/Basic/arm_mve.td llvm/lib/Target/ARM/ARMInstrMVE.td Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index ee0ce25bf516..e9ad26a4e88e 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -138,6 +138,15 @@ let params = !listconcat(T.Int16, T.Int32) in { (select $pred, (or $v, (splat (Scalar $imm))), $v)>; } +let params = T.Usual in { + let pnt = PNT_None in +def vdupq_n: Intrinsic:$s), (splat $s)>; + + defm vdupq: IntrinsicMX< + Vector, (args unpromoted:$s, Predicate:$pred), + (select $pred, (splat $s), $inactive), 1, "_n", PNT_NType, PNT_None>; +} + // The bitcasting below is not overcomplicating the IR because while // Vector and UVector may be diff erent vector types at the C level i.e. // vectors of same size signed/unsigned ints. Once they're lowered diff --git a/clang/test/CodeGen/arm-mve-intrinsics/dup.c b/clang/test/CodeGen/arm-mve-intrinsics/dup.c new file mode 100644 index ..3bcec9d2549e --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/dup.c @@ -0,0 +1,351 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s + +#include + +// CHECK-LABEL: @test_vdupq_n_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32 +// CHECK-NEXT:[[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 +// CHECK-NEXT:[[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half +// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[TMP1]], i32 0 +// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer +// CHECK-NEXT:ret <8 x half> [[DOTSPLAT]] +// +float16x8_t test_vdupq_n_f16(float16_t a) +{ +return vdupq_n_f16(a); +} + +// CHECK-LABEL: @test_vdupq_n_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0 +// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer +// CHECK-NEXT:ret <4 x float> [[DOTSPLAT]] +// +float32x4_t test_vdupq_n_f32(float32_t a) +{ +return vdupq_n_f32(a); +} + +// CHECK-LABEL: @test_vdupq_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0 +// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer +// CHECK-NEXT:ret <16 x i8> [[DOTSPLAT]] +// +int8x16_t test_vdupq_n_s8(int8_t a) +{ +return vdupq_n_s8(a); +} + +// CHECK-LABEL: @test_vdupq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0 +// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer +// CHECK-NEXT:ret <8 x i16> [[DOTSPLAT]] +// +int16x8_t test_vdupq_n_s16(int16_t a) +{ +return vdupq_n_s16(a); +} + +// CHECK-LABEL: @test_vdupq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0 +// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> u
[clang] 90dc78b - [ARM, MVE] Add intrinsics for abs, neg and not operations.
Author: Simon Tatham Date: 2020-02-18T09:34:50Z New Revision: 90dc78bc62784faaa55afb0320cf3c2187d80ac6 URL: https://github.com/llvm/llvm-project/commit/90dc78bc62784faaa55afb0320cf3c2187d80ac6 DIFF: https://github.com/llvm/llvm-project/commit/90dc78bc62784faaa55afb0320cf3c2187d80ac6.diff LOG: [ARM,MVE] Add intrinsics for abs, neg and not operations. Summary: This commit adds the unpredicated intrinsics for the unary operations vabsq (absolute value), vnegq (arithmetic negation), vmvnq (bitwise complement), vqabsq and vqnegq (saturating versions of abs and neg for signed integers, in the sense that they give INT_MAX if an input lane is INT_MIN). This is done entirely in clang: all of these operations have existing isel patterns and existing tests for them on the LLVM side, so I've just made clang emit the same IR that those patterns already match. Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: MarkMurrayARM Subscribers: kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D74331 Added: clang/test/CodeGen/arm-mve-intrinsics/absneg.c Modified: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/lib/CodeGen/CGBuiltin.cpp Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 5cd88b07ebaa..dfc0ee87bb2f 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -234,6 +234,33 @@ let params = T.Unsigned in { defm vdwdup: vxdup_mc<(? u32:$limit, imm_1248:$step), (? $limit, $step)>; } +let params = T.Int in { + def vmvnq: Intrinsic; +} +let params = T.Signed in { + def vnegq: Intrinsic; + def vabsq: Intrinsic; + def vqnegq: Intrinsic; + def vqabsq: Intrinsic; +} +let params = T.Float in { + def vnegq_f: Intrinsic, + NameOverride<"vnegq">; + def vabsq_f: Intrinsic $a)>, NameOverride<"vabsq">; +} + // The bitcasting below is not overcomplicating the IR because while // Vector and UVector may be diff erent vector types at the C level i.e. // vectors of same size signed/unsigned ints. Once they're lowered diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index d4e821589cfd..2d080f2653aa 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -98,6 +98,9 @@ def extend: CGHelperFn<"SignOrZeroExtend"> { let special_params = [IRBuilderIntParam<2, "bool">]; } def zeroinit: IRFunction<"llvm::Constant::getNullValue">; +def int_min: CGHelperFn<"ARMMVEConstantSplat<1,0>">; +def int_max: CGHelperFn<"ARMMVEConstantSplat<0,1>">; +def uint_max: CGHelperFn<"ARMMVEConstantSplat<1,1>">; def undef: IRFunction<"UndefValue::get">; def icmp_eq: IRBuilder<"CreateICmpEQ">; def icmp_ne: IRBuilder<"CreateICmpNE">; @@ -117,6 +120,7 @@ def fcmp_lt: IRBuilder<"CreateFCmpOLT">; def fcmp_le: IRBuilder<"CreateFCmpOLE">; def splat: CGHelperFn<"ARMMVEVectorSplat">; def select: IRBuilder<"CreateSelect">; +def fneg: IRBuilder<"CreateFNeg">; // A node that makes an Address out of a pointer-typed Value, by // providing an alignment as the second argument. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5e411bc7aa93..0081740f7280 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7056,6 +7056,19 @@ static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder, } } +template +static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) { + // MVE-specific helper function to make a vector splat of a constant such as + // UINT_MAX or INT_MIN, in which all bits below the highest one are equal. + llvm::Type *T = VT->getVectorElementType(); + unsigned LaneBits = T->getPrimitiveSizeInBits(); + uint32_t Value = HighBit << (LaneBits - 1); + if (OtherBits) +Value |= (1UL << (LaneBits - 1)) - 1; + llvm::Value *Lane = llvm::ConstantInt::get(T, Value); + return ARMMVEVectorSplat(Builder, Lane); +} + Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, diff --git a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c new file mode 100644 index ..db4253f3590b --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c @@ -0,0 +1,338 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp
[clang] b6236e9 - [ARM, MVE] Add the vrev16q, vrev32q, vrev64q family.
Author: Simon Tatham Date: 2020-02-18T09:34:50Z New Revision: b6236e94799e43fad1f024e84ed56a85d9a3623f URL: https://github.com/llvm/llvm-project/commit/b6236e94799e43fad1f024e84ed56a85d9a3623f DIFF: https://github.com/llvm/llvm-project/commit/b6236e94799e43fad1f024e84ed56a85d9a3623f.diff LOG: [ARM,MVE] Add the vrev16q, vrev32q, vrev64q family. Summary: These intrinsics just reorder the lanes of a vector, so the natural IR representation is as a shufflevector operation. Existing LLVM codegen already recognizes those particular shufflevectors and generates the MVE VREV instruction. This commit adds the unpredicated forms only. Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: dmgreen Subscribers: kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D74334 Added: clang/test/CodeGen/arm-mve-intrinsics/vrev.c Modified: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/lib/CodeGen/CGBuiltin.cpp Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index a2bf7afad41e..126c2e2214ae 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -1180,6 +1180,13 @@ defm vrmlsldavh : MVEBinaryVectorHoriz64R; defm vrmlsldavh : MVEBinaryVectorHoriz64R; } +let params = T.All8 in +def vrev16q : Intrinsic; +let params = !listconcat(T.All8, T.All16) in +def vrev32q : Intrinsic; +let params = T.Usual in +def vrev64q : Intrinsic; + foreach desttype = T.All in { // We want a vreinterpretq between every pair of supported vector types // _except_ that there shouldn't be one from a type to itself. diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index c2e4a4232c23..9f245d0436c4 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -125,6 +125,9 @@ def sitofp: IRBuilder<"CreateSIToFP">; def uitofp: IRBuilder<"CreateUIToFP">; def fptosi: IRBuilder<"CreateFPToSI">; def fptoui: IRBuilder<"CreateFPToUI">; +def vrev: CGHelperFn<"ARMMVEVectorElementReverse"> { + let special_params = [IRBuilderIntParam<1, "unsigned">]; +} // A node that makes an Address out of a pointer-typed Value, by // providing an alignment as the second argument. diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0081740f7280..788f14b37123 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7069,6 +7069,21 @@ static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) { return ARMMVEVectorSplat(Builder, Lane); } +static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder, + llvm::Value *V, + unsigned ReverseWidth) { + // MVE-specific helper function which reverses the elements of a + // vector within every (ReverseWidth)-bit collection of lanes. + SmallVector Indices; + unsigned LaneSize = V->getType()->getScalarSizeInBits(); + unsigned Elements = 128 / LaneSize; + unsigned Mask = ReverseWidth / LaneSize - 1; + for (unsigned i = 0; i < Elements; i++) +Indices.push_back(i ^ Mask); + return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()), + Indices); +} + Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue, diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrev.c b/clang/test/CodeGen/arm-mve-intrinsics/vrev.c new file mode 100644 index ..384d736d2a6d --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vrev.c @@ -0,0 +1,215 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s + +#include + +// CHECK-LABEL: @test_vrev16q_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <16 x i32> +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +int8x16_t test_vrev16q_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC +return vrev16q(a); +#else /* POLYMORPHIC */ +return vrev16q_s8(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev16q_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = s
[clang] c8b3196 - [ARM, MVE] Add intrinsics for FP rounding operations.
Author: Simon Tatham Date: 2020-02-18T09:34:50Z New Revision: c8b3196e54308b0113d2a0888d13ccc92e3b7ccc URL: https://github.com/llvm/llvm-project/commit/c8b3196e54308b0113d2a0888d13ccc92e3b7ccc DIFF: https://github.com/llvm/llvm-project/commit/c8b3196e54308b0113d2a0888d13ccc92e3b7ccc.diff LOG: [ARM,MVE] Add intrinsics for FP rounding operations. Summary: This adds the unpredicated forms of six different MVE intrinsics which all round a vector of floating-point numbers to integer values, leaving them still in FP format, differing only in rounding mode and exception settings. Five of them map to existing target-independent intrinsics in LLVM IR, such as @llvm.trunc and @llvm.rint. The sixth, mapping to the `vrintn` instruction, is done by inventing a target-specific intrinsic. (`vrintn` behaves the same as `vrintx` in terms of the output value: the side effects on the FPSCR flags are the only difference between the two. But ACLE specifies separate user-callable intrinsics for the two, so the side effects matter enough to make sure we generate the right one of the two instructions in each case.) Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: miyuki Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D74333 Added: clang/test/CodeGen/arm-mve-intrinsics/vrnd.c llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll Modified: clang/include/clang/Basic/arm_mve.td llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 5b20f23c75c7..a2bf7afad41e 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -417,6 +417,21 @@ defm : float_int_conversions; defm : float_int_conversions; defm : float_int_conversions; +let params = T.Float in { + def vrndq: Intrinsic $a)>; + def vrndmq: Intrinsic $a)>; + def vrndpq: Intrinsic $a)>; + def vrndaq: Intrinsic $a)>; + def vrndxq: Intrinsic $a)>; + def vrndnq: Intrinsic $a)>; +} + multiclass compare_with_pred { // Make the predicated and unpredicated versions of a single comparison. diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c new file mode 100644 index ..a324c36ed838 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c @@ -0,0 +1,173 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s + +#include + +// CHECK-LABEL: @test_vrndaq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <8 x half> @llvm.round.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT:ret <8 x half> [[TMP0]] +// +float16x8_t test_vrndaq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC +return vrndaq(a); +#else /* POLYMORPHIC */ +return vrndaq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndaq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT:ret <4 x float> [[TMP0]] +// +float32x4_t test_vrndaq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC +return vrndaq(a); +#else /* POLYMORPHIC */ +return vrndaq_f32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndmq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <8 x half> @llvm.floor.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT:ret <8 x half> [[TMP0]] +// +float16x8_t test_vrndmq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC +return vrndmq(a); +#else /* POLYMORPHIC */ +return vrndmq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndmq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT:ret <4 x float> [[TMP0]] +// +float32x4_t test_vrndmq_f32(float32x4_t a) +{ +#ifdef POLYMORPHIC +return vrndmq(a); +#else /* POLYMORPHIC */ +return vrndmq_f32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndpq_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <8 x half> @llvm.ceil.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT:ret <8 x half> [[TMP0]] +// +float16x8_t test_vrndpq_f16(float16x8_t a) +{ +#ifdef POLYMORPHIC +return vrndpq(a); +#else /* POLYMORPHIC */ +return vrndpq_f16(a); +#endif /* POLYMORPHIC */ +} + +// CH
[clang] df3ed6c - [ARM, MVE] Add intrinsics for int <-> float conversion.
Author: Simon Tatham Date: 2020-02-18T09:34:50Z New Revision: df3ed6c0fe31094941e4cd814cdf924b63993c4e URL: https://github.com/llvm/llvm-project/commit/df3ed6c0fe31094941e4cd814cdf924b63993c4e DIFF: https://github.com/llvm/llvm-project/commit/df3ed6c0fe31094941e4cd814cdf924b63993c4e.diff LOG: [ARM,MVE] Add intrinsics for int <-> float conversion. Summary: This adds the unpredicated versions of the family of vcvtq intrinsics that convert between a vector of floats and a vector of the same size of integer. These are represented in IR using the standard fptosi, fptoui, sitofp and uitofp operations, which existing LLVM codegen already handles. Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: MarkMurrayARM Subscribers: kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D74332 Added: Modified: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/test/CodeGen/arm-mve-intrinsics/vcvt.c Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index dfc0ee87bb2f..5b20f23c75c7 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -400,6 +400,23 @@ foreach half = [ "b", "t" ] in { } // params = [f32], pnt = PNT_None } // loop over half = "b", "t" +multiclass float_int_conversions { + defvar FVector = VecOf; + defvar IVector = VecOf; + + let params = [IScalar], pnt = PNT_2Type in +def : Intrinsic, + NameOverride<"vcvtq_" # FScalar>; + let params = [FScalar], pnt = PNT_None in +def : Intrinsic, + NameOverride<"vcvtq_" # IScalar>; +} + +defm : float_int_conversions; +defm : float_int_conversions; +defm : float_int_conversions; +defm : float_int_conversions; + multiclass compare_with_pred { // Make the predicated and unpredicated versions of a single comparison. diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index 2d080f2653aa..c2e4a4232c23 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -121,6 +121,10 @@ def fcmp_le: IRBuilder<"CreateFCmpOLE">; def splat: CGHelperFn<"ARMMVEVectorSplat">; def select: IRBuilder<"CreateSelect">; def fneg: IRBuilder<"CreateFNeg">; +def sitofp: IRBuilder<"CreateSIToFP">; +def uitofp: IRBuilder<"CreateUIToFP">; +def fptosi: IRBuilder<"CreateFPToSI">; +def fptoui: IRBuilder<"CreateFPToUI">; // A node that makes an Address out of a pointer-typed Value, by // providing an alignment as the second argument. diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c index a1c99de62ebb..3220100d7b89 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c @@ -4,6 +4,102 @@ #include +// CHECK-LABEL: @test_vcvtq_f16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = sitofp <8 x i16> [[A:%.*]] to <8 x half> +// CHECK-NEXT:ret <8 x half> [[TMP0]] +// +float16x8_t test_vcvtq_f16_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC +return vcvtq(a); +#else /* POLYMORPHIC */ +return vcvtq_f16_s16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_f16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = uitofp <8 x i16> [[A:%.*]] to <8 x half> +// CHECK-NEXT:ret <8 x half> [[TMP0]] +// +float16x8_t test_vcvtq_f16_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC +return vcvtq(a); +#else /* POLYMORPHIC */ +return vcvtq_f16_u16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_f32_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-NEXT:ret <4 x float> [[TMP0]] +// +float32x4_t test_vcvtq_f32_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC +return vcvtq(a); +#else /* POLYMORPHIC */ +return vcvtq_f32_s32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_f32_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float> +// CHECK-NEXT:ret <4 x float> [[TMP0]] +// +float32x4_t test_vcvtq_f32_u32(uint32x4_t a) +{ +#ifdef POLYMORPHIC +return vcvtq(a); +#else /* POLYMORPHIC */ +return vcvtq_f32_u32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_s16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = fptosi <8 x half> [[A:%.*]] to <8 x i16> +// CHECK-NEXT:ret <8 x i16> [[TMP0]] +// +int16x8_t test_vcvtq_s16_f16(float16x8_t a) +{ +return vcvtq_s16_f16(a); +} + +// CHECK-LABEL: @test_vcvtq_s32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = fptosi <4 x float> [[A:%.*]] to <4 x i32> +// CHECK-NEXT:ret <4 x i32> [[TMP0]] +// +int32x4_t test_vcvtq_s32_f32(float32x4_t a) +{ +return vcvtq_s32_f32(a); +} + +// CHECK-LABEL: @test_vcvtq_
[clang] 68b49f7 - [ARM,MVE] Add intrinsics vclzq and vclsq.
Author: Simon Tatham Date: 2020-02-18T09:34:50Z New Revision: 68b49f7ef49eec068b7ddcf86c868e2a193e64e1 URL: https://github.com/llvm/llvm-project/commit/68b49f7ef49eec068b7ddcf86c868e2a193e64e1 DIFF: https://github.com/llvm/llvm-project/commit/68b49f7ef49eec068b7ddcf86c868e2a193e64e1.diff LOG: [ARM,MVE] Add intrinsics vclzq and vclsq. Summary: vclzq maps nicely to the existing target-independent @llvm.ctlz IR intrinsic. But vclsq ('count leading sign bits') has no corresponding target-independent intrinsic, so I've made up @llvm.arm.mve.vcls. This commit adds the unpredicated forms only. Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: miyuki Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D74335 Added: clang/test/CodeGen/arm-mve-intrinsics/vclz.c llvm/test/CodeGen/Thumb2/mve-intrinsics/vcls.ll Modified: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 126c2e2214ae..21801b4d448e 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -237,8 +237,11 @@ let params = T.Unsigned in { let params = T.Int in { def vmvnq: Intrinsic; + def vclzq: Intrinsic $a, (i1 0))>; } let params = T.Signed in { + def vclsq: Intrinsic $a)>; def vnegq: Intrinsic; def vabsq: Intrinsic { let special_params = [IRBuilderIntParam<1, "unsigned">]; } +// Helper for making boolean flags in IR +def i1: IRBuilderBase { + let prefix = "llvm::ConstantInt::get(Builder.getInt1Ty(), "; + let special_params = [IRBuilderIntParam<0, "bool">]; +} + // A node that makes an Address out of a pointer-typed Value, by // providing an alignment as the second argument. def address; diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vclz.c b/clang/test/CodeGen/arm-mve-intrinsics/vclz.c new file mode 100644 index ..7a2ebe0a627a --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vclz.c @@ -0,0 +1,132 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vclzq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[A:%.*]], i1 false) +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +int8x16_t test_vclzq_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC +return vclzq(a); +#else /* POLYMORPHIC */ +return vclzq_s8(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[A:%.*]], i1 false) +// CHECK-NEXT:ret <8 x i16> [[TMP0]] +// +int16x8_t test_vclzq_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC +return vclzq(a); +#else /* POLYMORPHIC */ +return vclzq_s16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[A:%.*]], i1 false) +// CHECK-NEXT:ret <4 x i32> [[TMP0]] +// +int32x4_t test_vclzq_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC +return vclzq(a); +#else /* POLYMORPHIC */ +return vclzq_s32(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> [[A:%.*]], i1 false) +// CHECK-NEXT:ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vclzq_u8(uint8x16_t a) +{ +#ifdef POLYMORPHIC +return vclzq(a); +#else /* POLYMORPHIC */ +return vclzq_u8(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> [[A:%.*]], i1 false) +// CHECK-NEXT:ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vclzq_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC +return vclzq(a); +#else /* POLYMORPHIC */ +return vclzq_u16(a); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[A:%.*]], i1 false) +// CHECK-NEXT:ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vclzq_u32(uint32x4_t a) +{ +#ifdef POLYMORPHIC +return vclzq(a); +#else /* POLYMORPHIC */ +ret
[clang] 5e97940 - [ARM, MVE] Add the vmovlbq, vmovltq intrinsic family.
Author: Simon Tatham Date: 2020-02-18T09:34:50Z New Revision: 5e97940cd27961a0b872ff551fc98135507288b3 URL: https://github.com/llvm/llvm-project/commit/5e97940cd27961a0b872ff551fc98135507288b3 DIFF: https://github.com/llvm/llvm-project/commit/5e97940cd27961a0b872ff551fc98135507288b3.diff LOG: [ARM,MVE] Add the vmovlbq,vmovltq intrinsic family. Summary: These intrinsics take a vector of 2n elements, and return a vector of n wider elements obtained by sign- or zero-extending every other element of the input vector. They're represented in IR as a shufflevector that extracts the odd or even elements of the input, followed by a sext or zext. Existing LLVM codegen already matches this pattern and generates the VMOVLB instruction (which widens the even-index input lanes). But no existing isel rule was generating VMOVLT, so I've added some. However, the new rules currently only work in little-endian MVE, because the pattern they expect from isel lowering includes a bitconvert which doesn't have the right semantics in big-endian. The output of one existing codegen test is improved by those new rules. This commit adds the unpredicated forms only. Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D74336 Added: clang/test/CodeGen/arm-mve-intrinsics/vmovl.c llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll Modified: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/lib/CodeGen/CGBuiltin.cpp llvm/lib/Target/ARM/ARMInstrMVE.td llvm/test/CodeGen/Thumb2/mve-shuffleext.ll Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 21801b4d448e..55ddfc22aa3d 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -420,6 +420,13 @@ defm : float_int_conversions; defm : float_int_conversions; defm : float_int_conversions; +let params = [s8, u8, s16, u16] in { + def vmovlbq: Intrinsic; + def vmovltq: Intrinsic; +} + let params = T.Float in { def vrndq: Intrinsic $a)>; diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index 9e5b7b32c511..f6816cdf45c9 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -128,6 +128,9 @@ def fptoui: IRBuilder<"CreateFPToUI">; def vrev: CGHelperFn<"ARMMVEVectorElementReverse"> { let special_params = [IRBuilderIntParam<1, "unsigned">]; } +def unzip: CGHelperFn<"VectorUnzip"> { + let special_params = [IRBuilderIntParam<1, "bool">]; +} // Helper for making boolean flags in IR def i1: IRBuilderBase { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 788f14b37123..b30a79a0bf10 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7056,6 +7056,17 @@ static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy &Builder, } } +static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) { + // Make a shufflevector that extracts every other element of a vector (evens + // or odds, as desired). + SmallVector Indices; + unsigned InputElements = V->getType()->getVectorNumElements(); + for (unsigned i = 0; i < InputElements; i += 2) +Indices.push_back(i + Odd); + return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()), + Indices); +} + template static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) { // MVE-specific helper function to make a vector splat of a constant such as diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c b/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c new file mode 100644 index ..0b8ef596faed --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c @@ -0,0 +1,126 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vmovlbq_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> undef, <8 x i32> +// CHECK-NEXT:[[TMP1:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i16> +// CHECK-NEXT:ret <8 x i16> [[TMP1]] +// +int16x8_t test_vmovlbq_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC +return vmovlbq(a); +#else /* P
[clang] c32af44 - [ARM, MVE] Add the vmovnbq, vmovntq intrinsic family.
Author: Simon Tatham Date: 2020-02-18T09:34:50Z New Revision: c32af4447f79f5e7f246917fe1c3f58b2f6fc2a6 URL: https://github.com/llvm/llvm-project/commit/c32af4447f79f5e7f246917fe1c3f58b2f6fc2a6 DIFF: https://github.com/llvm/llvm-project/commit/c32af4447f79f5e7f246917fe1c3f58b2f6fc2a6.diff LOG: [ARM,MVE] Add the vmovnbq,vmovntq intrinsic family. Summary: These are in some sense the inverse of vmovl[bt]q: they take a vector of n wide elements and truncate each to half its width. So they only write half a vector's worth of output data, and therefore they also take an 'inactive' parameter to provide the other half of the data in the output vector. So vmovnb overwrites the even lanes of 'inactive' with the narrowed values from the main input, and vmovnt overwrites the odd lanes. LLVM had existing codegen which generates these MVE instructions in response to IR that takes two vectors of wide elements, or two vectors of narrow ones. But in this case, we have one vector of each. So my clang codegen strategy is to narrow the input vector of wide elements by simply reinterpreting it as the output type, and then we have two narrow vectors and can represent the operation as a vector shuffle that interleaves lanes from both of them. Even so, not all the cases I needed ended up being selected as a single MVE instruction, so I've added a couple more patterns that spot combinations of the 'MVEvmovn' and 'ARMvrev32' SDNodes which can be generated as a VMOVN instruction with operands swapped. This commit adds the unpredicated forms only. Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard Reviewed By: dmgreen Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D74337 Added: clang/test/CodeGen/arm-mve-intrinsics/vmovn.c llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll Modified: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/lib/CodeGen/CGBuiltin.cpp clang/utils/TableGen/MveEmitter.cpp llvm/lib/Target/ARM/ARMInstrMVE.td Removed: diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 55ddfc22aa3d..3a6b63199e39 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -427,6 +427,14 @@ let params = [s8, u8, s16, u16] in { (extend (unzip $a, 1), DblVector, (unsignedflag Scalar))>; } +let params = [s16, u16, s32, u32] in { + def vmovnbq: Intrinsic; + def vmovntq: Intrinsic; +} + let params = T.Float in { def vrndq: Intrinsic $a)>; diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index f6816cdf45c9..7f8f717e8163 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -131,6 +131,7 @@ def vrev: CGHelperFn<"ARMMVEVectorElementReverse"> { def unzip: CGHelperFn<"VectorUnzip"> { let special_params = [IRBuilderIntParam<1, "bool">]; } +def zip: CGHelperFn<"VectorZip">; // Helper for making boolean flags in IR def i1: IRBuilderBase { @@ -187,6 +188,10 @@ def seq; // and 0 for a signed (or floating) one. def unsignedflag; +// 'bitsize' also takes a scalar type, and expands into an integer +// constant giving its size in bits. +def bitsize; + // If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it // indicates that the IR generation for that intrinsic is done by handwritten // C++ and not autogenerated at all. The effect in the MVE builtin codegen diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b30a79a0bf10..401c4d8e0539 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7067,6 +7067,19 @@ static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool Odd) Indices); } +static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0, + llvm::Value *V1) { + // Make a shufflevector that interleaves two vectors element by element. + assert(V0->getType() == V1->getType() && "Can't zip diff erent vector types"); + SmallVector Indices; + unsigned InputElements = V0->getType()->getVectorNumElements(); + for (unsigned i = 0; i < InputElements; i++) { +Indices.push_back(i); +Indices.push_back(i + InputElements); + } + return Builder.CreateShuffleVector(V0, V1, Indices); +} + template static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) { // MVE-specific helper function to make a vector splat of a constant such as diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c b/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c new file mode 100644 index ..5d157de0feb8 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c @@ -0,0 +1,199 @@ +// NOTE: Assert
[clang] 98ea4b3 - [ARM,MVE] Make the MVE intrinsics work in C++!
Author: Simon Tatham Date: 2020-01-23T14:10:27Z New Revision: 98ea4b30c2c4e122defce039e29f7023aa2663e7 URL: https://github.com/llvm/llvm-project/commit/98ea4b30c2c4e122defce039e29f7023aa2663e7 DIFF: https://github.com/llvm/llvm-project/commit/98ea4b30c2c4e122defce039e29f7023aa2663e7.diff LOG: [ARM,MVE] Make the MVE intrinsics work in C++! Summary: Apparently nobody has tried this in months of development. It turns out that `FunctionDecl::getBuiltinID` will never consider a function to be a builtin if it is in C++ and not extern "C". So none of the function declarations in are recognized as builtins when clang is compiling in C++ mode: it just emits calls to them as ordinary functions, which then turn out not to exist at link time. The trivial fix is to wrap most of arm_mve.h in an extern "C". Added a test in clang/test/CodeGen/arm-mve-intrinsics which checks basic functioning of the MVE header file in C++ mode. I've filled it with copies of existing test functions from other files in that directory, including a few moderately tricky cases of overloading (in particular one that relies on the strict-polymorphism attribute added in D72518). (I considered making //every// test in that directory compile in both C and C++ mode and check the code generation was identical. But I think that would increase testing time by more than the value it adds, and also update_cc_test_checks gets confused when the output function name varies between RUN lines.) Reviewers: LukeGeeson, MarkMurrayARM, miyuki, dmgreen Reviewed By: MarkMurrayARM Subscribers: kristof.beyls, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D73268 Added: clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp Modified: clang/utils/TableGen/MveEmitter.cpp Removed: diff --git a/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp b/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp new file mode 100644 index ..47df53839d15 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp @@ -0,0 +1,160 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @_Z16test_vbicq_n_s1617__simd128_int16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = and <8 x i16> [[A:%.*]], +// CHECK-NEXT:ret <8 x i16> [[TMP0]] +// +int16x8_t test_vbicq_n_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC +return vbicq(a, 0xd500); +#else /* POLYMORPHIC */ +return vbicq_n_s16(a, 0xd500); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @_Z16test_vbicq_n_u3218__simd128_uint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = and <4 x i32> [[A:%.*]], +// CHECK-NEXT:ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vbicq_n_u32(uint32x4_t a) +{ +#ifdef POLYMORPHIC +return vbicq(a, 0x2000); +#else /* POLYMORPHIC */ +return vbicq_n_u32(a, 0x2000); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @_Z16test_vorrq_n_s3217__simd128_int32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = or <4 x i32> [[A:%.*]], +// CHECK-NEXT:ret <4 x i32> [[TMP0]] +// +int32x4_t test_vorrq_n_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC +return vorrq(a, 0x1); +#else /* POLYMORPHIC */ +return vorrq_n_s32(a, 0x1); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @_Z16test_vorrq_n_u1618__simd128_uint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = or <8 x i16> [[A:%.*]], +// CHECK-NEXT:ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vorrq_n_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC +return vorrq(a, 0xf000); +#else /* POLYMORPHIC */ +return vorrq_n_u16(a, 0xf000); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @_Z16test_vcmpeqq_f1619__simd128_float16_tS_( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT:[[TMP1:%.*]] = tail call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 x i1> [[TMP0]]), !range !3 +// CHECK-NEXT:[[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 +// CHECK-NEXT:ret i16 [[TMP2]] +// +mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b) +{ +#ifdef POLYMORPHIC +return vcmpeqq(a, b); +#else /* POLYMORPHIC */ +return vcmpeqq_f16(a, b); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @_Z18test_vcmpeqq_n_f1619__simd128_float16_tDh( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32 +// CHECK-NEXT:[[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 +
[clang] fe0d1b6 - [Clang] Warn about 'z' printf modifier in old MSVC.
Author: Simon Tatham Date: 2020-01-28T09:04:45Z New Revision: fe0d1b6a8ac5048b8007e5e7cc2aeb4e3291bda0 URL: https://github.com/llvm/llvm-project/commit/fe0d1b6a8ac5048b8007e5e7cc2aeb4e3291bda0 DIFF: https://github.com/llvm/llvm-project/commit/fe0d1b6a8ac5048b8007e5e7cc2aeb4e3291bda0.diff LOG: [Clang] Warn about 'z' printf modifier in old MSVC. Summary: The 'z' length modifier, signalling that an integer format specifier takes a `size_t` sized integer, is only supported by the C library of MSVC 2015 and later. Earlier versions don't recognize the 'z' at all, and respond to `printf("%zu", x)` by just printing "zu". So, if the MS compatibility version is set to a value earlier than MSVC2015, it's useful to warn about 'z' modifiers in printf format strings we check. Reviewers: aaron.ballman, lebedev.ri, rnk, majnemer, zturner Reviewed By: aaron.ballman Subscribers: amccarth, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D73457 Added: Modified: clang/lib/AST/FormatString.cpp clang/test/Sema/format-strings-ms.c Removed: diff --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp index fcc0b3b11e25..2ca8fee67bf0 100644 --- a/clang/lib/AST/FormatString.cpp +++ b/clang/lib/AST/FormatString.cpp @@ -748,6 +748,15 @@ bool FormatSpecifier::hasValidLengthModifier(const TargetInfo &Target, case LengthModifier::AsIntMax: case LengthModifier::AsSizeT: case LengthModifier::AsPtrDiff: + if (LM.getKind() == LengthModifier::AsSizeT && + Target.getTriple().isOSMSVCRT() && + !LO.isCompatibleWithMSVC(LangOptions::MSVC2015)) { +// The standard libraries before MSVC2015 didn't support the 'z' length +// modifier for size_t. So if the MS compatibility version is less than +// that, reject. +return false; + } + switch (CS.getKind()) { case ConversionSpecifier::dArg: case ConversionSpecifier::DArg: diff --git a/clang/test/Sema/format-strings-ms.c b/clang/test/Sema/format-strings-ms.c index 56a349051d42..c4d3e5664db0 100644 --- a/clang/test/Sema/format-strings-ms.c +++ b/clang/test/Sema/format-strings-ms.c @@ -1,4 +1,6 @@ // RUN: %clang_cc1 -fsyntax-only -verify -fms-compatibility -triple=i386-pc-win32 %s +// RUN: %clang_cc1 -fsyntax-only -verify -fms-compatibility -triple=i386-pc-win32 -fms-compatibility-version=18 %s +// RUN: %clang_cc1 -fsyntax-only -verify -fms-compatibility -triple=i386-pc-win32 -fms-compatibility-version=19 -DSIZE_T_OK %s // RUN: %clang_cc1 -fsyntax-only -verify -fms-compatibility -triple=i386-pc-win32 -Wformat-non-iso -DNON_ISO_WARNING %s int printf(const char *format, ...) __attribute__((format(printf, 1, 2))); @@ -85,4 +87,11 @@ void z_test(void *p) { scanf("%Z", p); // expected-warning{{invalid conversion specifier 'Z'}} } +void size_t_test(size_t s) { + printf("%zu", s); +#ifndef SIZE_T_OK + // expected-warning@-2 {{length modifier 'z' results in undefined behavior or no effect with 'u' conversion specifier}} +#endif +} + #endif ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 5fba4c4 - [AArch64] Don't #define __ARM_FP when there's no FPU.
Author: Simon Tatham Date: 2023-03-13T16:43:25Z New Revision: 5fba4c4d08bdb38d0df2fd43afa4bec4f3809b66 URL: https://github.com/llvm/llvm-project/commit/5fba4c4d08bdb38d0df2fd43afa4bec4f3809b66 DIFF: https://github.com/llvm/llvm-project/commit/5fba4c4d08bdb38d0df2fd43afa4bec4f3809b66.diff LOG: [AArch64] Don't #define __ARM_FP when there's no FPU. On some R-profile CPUs, leaving out the FPU is an option. Clang will accept `-march=armv8-r+nofp`, but it's currently not possible to find out via the preprocessor whether it's in that mode (e.g. to change or disable inline asm statements in your code). The __ARM_FP macro, which has a bit set for each size of floating point number supported by the hardware, is the natural thing to test. But Clang was defining it unconditionally on AArch64. Now it checks for FP support before defining it at all. Reviewed By: tmatheson, DavidSpickett Differential Revision: https://reviews.llvm.org/D145781 Added: Modified: clang/lib/Basic/Targets/AArch64.cpp clang/lib/Basic/Targets/AArch64.h clang/test/Preprocessor/aarch64-target-features.c Removed: diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 7f331004348f1..b274dd2672268 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -373,7 +373,8 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts, Builder.defineMacro("__ARM_ALIGN_MAX_STACK_PWR", "4"); // 0xe implies support for half, single and double precision operations. - Builder.defineMacro("__ARM_FP", "0xE"); + if (FPU & FPUMode) +Builder.defineMacro("__ARM_FP", "0xE"); // PCS specifies this for SysV variants, which is all we support. Other ABIs // may choose __ARM_FP16_FORMAT_ALTERNATIVE. @@ -709,6 +710,8 @@ void AArch64TargetInfo::setFeatureEnabled(llvm::StringMap &Features, bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, DiagnosticsEngine &Diags) { for (const auto &Feature : Features) { +if (Feature == "-fp-armv8") + HasNoFP = true; if (Feature == "-neon") HasNoNeon = true; if (Feature == "-sve") @@ -937,6 +940,11 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, setDataLayout(); setArchFeatures(); + if (HasNoFP) { +FPU &= ~FPUMode; +FPU &= ~NeonMode; +FPU &= ~SveMode; + } if (HasNoNeon) { FPU &= ~NeonMode; FPU &= ~SveMode; diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index ee2c179d7c3df..f6e12176a77ec 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -26,7 +26,11 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { static const TargetInfo::GCCRegAlias GCCRegAliases[]; static const char *const GCCRegNames[]; - enum FPUModeEnum { FPUMode, NeonMode = (1 << 0), SveMode = (1 << 1) }; + enum FPUModeEnum { +FPUMode = (1 << 0), +NeonMode = (1 << 1), +SveMode = (1 << 2), + }; unsigned FPU = FPUMode; bool HasCRC = false; @@ -73,6 +77,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { bool HasWFxT = false; bool HasJSCVT = false; bool HasFCMA = false; + bool HasNoFP = false; bool HasNoNeon = false; bool HasNoSVE = false; bool HasFMV = true; diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c index 2a2f7efe34130..09f464466a56c 100644 --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -341,6 +341,10 @@ // CHECK-MARCH-2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "-fp-armv8" "-target-feature" "-neon" "-target-feature" "-crc" "-target-feature" "-crypto" // CHECK-MARCH-3: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "-neon" +// While we're checking +nofp, also make sure it stops defining __ARM_FP +// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-r+nofp -x c -E -dM %s -o - | FileCheck -check-prefix=CHECK-NOFP %s +// CHECK-NOFP-NOT: #define __ARM_FP{{ }} + // Check +sm4: // // RUN: %clang -target aarch64 -march=armv8.2a+sm4 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-SM4 %s ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] 20d6dee - -fsanitize=function: fix alignment fault on Arm targets.
Author: Simon Tatham Date: 2023-05-25T09:22:45+01:00 New Revision: 20d6dee40d507d467d3312d5e7dfdf088f106d31 URL: https://github.com/llvm/llvm-project/commit/20d6dee40d507d467d3312d5e7dfdf088f106d31 DIFF: https://github.com/llvm/llvm-project/commit/20d6dee40d507d467d3312d5e7dfdf088f106d31.diff LOG: -fsanitize=function: fix alignment fault on Arm targets. Function pointers are checked by loading a prefix structure from just before the function's entry point. However, on Arm, the function pointer is not always exactly equal to the address of the entry point, because Thumb function pointers have the low bit set to tell the BX instruction to enter them in Thumb state. So the generated code loads from an odd address and suffers an alignment fault. Fixed by clearing the low bit of the function pointer before subtracting 8. Differential Revision: https://reviews.llvm.org/D151308 Added: Modified: clang/lib/CodeGen/CGExpr.cpp clang/test/CodeGen/ubsan-function.cpp Removed: diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 2c219d6e8411..c074732df2a7 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5364,8 +5364,30 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, const CGCallee &OrigCallee llvm::Value *CalleePtr = Callee.getFunctionPointer(); + // On 32-bit Arm, the low bit of a function pointer indicates whether + // it's using the Arm or Thumb instruction set. The actual first + // instruction lives at the same address either way, so we must clear + // that low bit before using the function address to find the prefix + // structure. + // + // This applies to both Arm and Thumb target triples, because + // either one could be used in an interworking context where it + // might be passed function pointers of both types. + llvm::Value *AlignedCalleePtr; + if (CGM.getTriple().isARM() || CGM.getTriple().isThumb()) { +llvm::Value *CalleeAddress = +Builder.CreatePtrToInt(CalleePtr, IntPtrTy); +llvm::Value *Mask = llvm::ConstantInt::get(IntPtrTy, ~1); +llvm::Value *AlignedCalleeAddress = +Builder.CreateAnd(CalleeAddress, Mask); +AlignedCalleePtr = +Builder.CreateIntToPtr(AlignedCalleeAddress, CalleePtr->getType()); + } else { +AlignedCalleePtr = CalleePtr; + } + llvm::Value *CalleePrefixStruct = Builder.CreateBitCast( - CalleePtr, llvm::PointerType::getUnqual(PrefixStructTy)); + AlignedCalleePtr, llvm::PointerType::getUnqual(PrefixStructTy)); llvm::Value *CalleeSigPtr = Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, -1, 0); llvm::Value *CalleeSig = diff --git a/clang/test/CodeGen/ubsan-function.cpp b/clang/test/CodeGen/ubsan-function.cpp index fc9f60f5b205..ba55ee021cc9 100644 --- a/clang/test/CodeGen/ubsan-function.cpp +++ b/clang/test/CodeGen/ubsan-function.cpp @@ -1,11 +1,15 @@ -// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all | FileCheck %s -// RUN: %clang_cc1 -triple aarch64_be-linux-gnu -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all | FileCheck %s --check-prefixes=CHECK,64 +// RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all | FileCheck %s --check-prefixes=CHECK,64 +// RUN: %clang_cc1 -triple aarch64_be-linux-gnu -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all | FileCheck %s --check-prefixes=CHECK,64 +// RUN: %clang_cc1 -triple arm-none-eabi -emit-llvm -o - %s -fsanitize=function -fno-sanitize-recover=all | FileCheck %s --check-prefixes=CHECK,ARM,32 // CHECK: define{{.*}} void @_Z3funv() #0 !func_sanitize ![[FUNCSAN:.*]] { void fun() {} // CHECK-LABEL: define{{.*}} void @_Z6callerPFvvE(ptr noundef %f) +// ARM: ptrtoint ptr {{.*}} to i32, !nosanitize !5 +// ARM: and i32 {{.*}}, -2, !nosanitize !5 +// ARM: inttoptr i32 {{.*}} to ptr, !nosanitize !5 // CHECK: getelementptr <{ i32, i32 }>, ptr {{.*}}, i32 -1, i32 0, !nosanitize // CHECK: load i32, ptr {{.*}}, align {{.*}}, !nosanitize // CHECK: icmp eq i32 {{.*}}, -1056584962, !nosanitize @@ -16,7 +20,8 @@ void fun() {} // CHECK: icmp eq i32 {{.*}}, -1522505972, !nosanitize // CHECK: br i1 {{.*}}, label %[[LABEL3:.*]], label %[[LABEL2:[^,]*]], {{.*}}!nosanitize // CHECK: [[LABEL2]]: -// CHECK: call void @__ubsan_handle_function_type_mismatch_abort(ptr @[[#]], i64 %[[#]]) #[[#]], !nosanitize +// 64:call void @__ubsan_handle_function_type_misma
[clang] 10e4228 - [ARM,AArch64] Add a full set of -mtp= options.
Author: Simon Tatham Date: 2023-06-15T09:27:41+01:00 New Revision: 10e42281144ecca019764b554f3f0f709bba0f71 URL: https://github.com/llvm/llvm-project/commit/10e42281144ecca019764b554f3f0f709bba0f71 DIFF: https://github.com/llvm/llvm-project/commit/10e42281144ecca019764b554f3f0f709bba0f71.diff LOG: [ARM,AArch64] Add a full set of -mtp= options. AArch64 has five system registers intended to be useful as thread pointers: one for each exception level which is RW at that level and inaccessible to lower ones, and the special TPIDRRO_EL0 which is readable but not writable at EL0. AArch32 has three, corresponding to the AArch64 ones that aren't specific to EL2 or EL3. Currently clang supports only a subset of these registers, and not even a consistent subset between AArch64 and AArch32: - For AArch64, clang permits you to choose between the four TPIDR_ELn thread registers, but not the fifth one, TPIDRRO_EL0. - In AArch32, on the other hand, the //only// thread register you can choose (apart from 'none, use a function call') is TPIDRURO, which corresponds to (the bottom 32 bits of) AArch64's TPIDRRO_EL0. So there is no thread register that you can currently use in both targets! For custom and bare-metal purposes, users might very reasonably want to use any of these thread registers. There's no reason they shouldn't all be supported as options, even if the default choices follow existing practice on typical operating systems. This commit extends the range of values acceptable to the `-mtp=` clang option, so that you can specify any of these registers by (the lower-case version of) their official names in the ArmARM: - For AArch64: tpidr_el0, tpidrro_el0, tpidr_el1, tpidr_el2, tpidr_el3 - For AArch32: tpidrurw, tpidruro, tpidrprw All existing values of the option are still supported and behave the same as before. Defaults are also unchanged. No command line that worked already should change behaviour as a result of this. The new values for the `-mtp=` option have been agreed with Arm's gcc developers (although I don't know whether they plan to implement them in the near future). Reviewed By: nickdesaulniers Differential Revision: https://reviews.llvm.org/D152433 Added: clang/test/Driver/aarch64-thread-pointer.c clang/test/Driver/arm-thread-pointer.c Modified: clang/include/clang/Driver/Options.td clang/lib/Driver/ToolChains/Arch/AArch64.cpp clang/lib/Driver/ToolChains/Arch/ARM.cpp clang/lib/Driver/ToolChains/Arch/ARM.h clang/lib/Driver/ToolChains/Clang.cpp clang/test/Driver/clang-translation.c llvm/lib/Target/AArch64/AArch64.td llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp llvm/lib/Target/ARM/ARM.td llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp llvm/lib/Target/ARM/ARMInstrInfo.td llvm/lib/Target/ARM/ARMInstrThumb2.td llvm/lib/Target/ARM/ARMPredicates.td llvm/lib/Target/ARM/ARMSubtarget.h llvm/test/CodeGen/AArch64/arm64-builtins-linux.ll llvm/test/CodeGen/ARM/readtp.ll llvm/test/CodeGen/ARM/stack-guard-tls.ll llvm/test/CodeGen/ARM/thread_pointer.ll Removed: diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 750b6ab343852..06f02a05b7f13 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3595,8 +3595,10 @@ def mexecute_only : Flag<["-"], "mexecute-only">, Group, def mno_execute_only : Flag<["-"], "mno-execute-only">, Group, HelpText<"Allow generation of data access to code sections (ARM only)">; let Flags = [TargetSpecific] in { -def mtp_mode_EQ : Joined<["-"], "mtp=">, Group, Values<"soft,cp15,el0,el1,el2,el3">, - HelpText<"Thread pointer access method (AArch32/AArch64 only)">; +def mtp_mode_EQ : Joined<["-"], "mtp=">, Group, Values<"soft,cp15,tpidrurw,tpidruro,tpidrprw,el0,el1,el2,el3,tpidr_el0,tpidr_el1,tpidr_el2,tpidr_el3,tpidrro_el0">, + HelpText<"Thread pointer access method. " + "For AArch32: 'soft' uses a function call, or 'tpidrurw', 'tpidruro' or 'tpidrprw' use the three CP15 registers. 'cp15' is an alias for 'tpidruro'. " + "For AArch64: 'tpidr_el0', 'tpidr_el1', 'tpidr_el2', 'tpidr_el3' or 'tpidrro_el0' use the five system registers. 'elN' is an alias for 'tpidr_elN'.">; def mpure_code : Flag<["-"], "mpure-code">, Alias; // Alias for GCC compatibility def mno_pure_code : Flag<["-"], "mno-pure-code">, Alias; def mtvos_version_min_EQ : Joined<["-"], "mtvos-version-min=">, Group; diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index f3bc00188c784..3547031635795 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -291,13 +291,15 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, if (Arg *A = Args.getLastArg(options::OPT_mtp_mode_EQ)) { String
[clang] [Clang][Driver] Skip empty strings in getAArch64MultilibFlags (PR #97827)
https://github.com/statham-arm created https://github.com/llvm/llvm-project/pull/97827 In a multilib setting, if you compile with a command line such as `clang --target=aarch64-none-elf -march=armv8.9-a+rcpc3`, `getAArch64MultilibFlags` returns an ill-formed string containing two consecutive `+` signs, of the form `...+rcpc++rcpc3+...`, causing later stages of multilib selection to get confused. The `++` arises from the entry in `AArch64::Extensions` for the SubtargetFeature `rcpc-immo`, which is a dependency of the `rcpc3` SubtargetFeature, but doesn't have an _extension_ name for the purposes of the `-march=foo+bar` option. So its `UserVisibleName` field is the empty string. To fix this, I've excluded extensions from consideration in `getAArch64MultilibFlags` if they have an empty `UserVisibleName`. Since the input to this function is not derived from a completely general set of SubtargetFeatures, but from a set that has only just been converted _from_ a clang driver command line, the only extensions skipped by this check should be cases like this one, where the anonymous extension was only included because it was a dependency of one mentioned explicitly. I've also made the analogous change in `getARMMultilibFlags`. I don't think it's necessary right now, because the architecture extensions for ARM (defined in `ARMTargetParser.def` rather than Tablegen) don't include any anonymous ones. But it seems sensible to add the check anyway, in case future refactoring introduces anonymous array elements in the same way that AArch64 did, and also in case someone writes a function for another platform by using either of these as example code. >From 8b39cbbdbe3646062dd1cdb60eab18339f9ca490 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Fri, 5 Jul 2024 11:57:19 +0100 Subject: [PATCH] [Clang][Driver] Skip empty strings in getAArch64MultilibFlags In a multilib setting, if you compile with a command line such as `clang --target=aarch64-none-elf -march=armv8.9-a+rcpc3`, `getAArch64MultilibFlags` returns an ill-formed string containing two consecutive `+` signs, of the form `...+rcpc++rcpc3+...`, causing later stages of multilib selection to get confused. The `++` arises from the entry in `AArch64::Extensions` for the SubtargetFeature `rcpc-immo`, which is a dependency of the `rcpc3` SubtargetFeature, but doesn't have an _extension_ name for the purposes of the `-march=foo+bar` option. So its `UserVisibleName` field is the empty string. To fix this, I've excluded extensions from consideration in `getAArch64MultilibFlags` if they have an empty `UserVisibleName`. Since the input to this function is not derived from a completely general set of SubtargetFeatures, but from a set that has only just been converted _from_ a clang driver command line, the only extensions skipped by this check should be cases like this one, where the anonymous extension was only included because it was a dependency of one mentioned explicitly. I've also made the analogous change in `getARMMultilibFlags`. I don't think it's necessary right now, because the architecture extensions for ARM (defined in `ARMTargetParser.def` rather than Tablegen) don't include any anonymous ones. But it seems sensible to add the check anyway, in case future refactoring introduces anonymous array elements in the same way that AArch64 did, and also in case someone writes a function for another platform by using either of these as example code. --- clang/lib/Driver/ToolChain.cpp | 20 clang/test/Driver/aarch64-multilib-rcpc3.c | 4 2 files changed, 16 insertions(+), 8 deletions(-) create mode 100644 clang/test/Driver/aarch64-multilib-rcpc3.c diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 977e08390800d..9ac428caab3b9 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -195,11 +195,13 @@ static void getAArch64MultilibFlags(const Driver &D, UnifiedFeatures.end()); std::vector MArch; for (const auto &Ext : AArch64::Extensions) -if (FeatureSet.contains(Ext.PosTargetFeature)) - MArch.push_back(Ext.UserVisibleName.str()); +if (!Ext.UserVisibleName.empty()) + if (FeatureSet.contains(Ext.PosTargetFeature)) +MArch.push_back(Ext.UserVisibleName.str()); for (const auto &Ext : AArch64::Extensions) -if (FeatureSet.contains(Ext.NegTargetFeature)) - MArch.push_back(("no" + Ext.UserVisibleName).str()); +if (!Ext.UserVisibleName.empty()) + if (FeatureSet.contains(Ext.NegTargetFeature)) +MArch.push_back(("no" + Ext.UserVisibleName).str()); StringRef ArchName; for (const auto &ArchInfo : AArch64::ArchInfos) if (FeatureSet.contains(ArchInfo->ArchFeature)) @@ -221,11 +223,13 @@ static void getARMMultilibFlags(const Driver &D, UnifiedFeatures.end()); std::vector MArch; for (const auto &Ext : ARM::AR
[clang] [Clang][Driver] Skip empty strings in getAArch64MultilibFlags (PR #97827)
https://github.com/statham-arm updated https://github.com/llvm/llvm-project/pull/97827 >From 81d77bf87dd47684683492ab70cc45ab6eb4364e Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Fri, 5 Jul 2024 11:57:19 +0100 Subject: [PATCH] [Clang][Driver] Skip empty strings in getAArch64MultilibFlags In a multilib setting, if you compile with a command line such as `clang --target=aarch64-none-elf -march=armv8.9-a+rcpc3`, `getAArch64MultilibFlags` returns an ill-formed string containing two consecutive `+` signs, of the form `...+rcpc++rcpc3+...`, causing later stages of multilib selection to get confused. The `++` arises from the entry in `AArch64::Extensions` for the SubtargetFeature `rcpc-immo`, which is a dependency of the `rcpc3` SubtargetFeature, but doesn't have an _extension_ name for the purposes of the `-march=foo+bar` option. So its `UserVisibleName` field is the empty string. To fix this, I've excluded extensions from consideration in `getAArch64MultilibFlags` if they have an empty `UserVisibleName`. Since the input to this function is not derived from a completely general set of SubtargetFeatures, but from a set that has only just been converted _from_ a clang driver command line, the only extensions skipped by this check should be cases like this one, where the anonymous extension was only included because it was a dependency of one mentioned explicitly. I've also made the analogous change in `getARMMultilibFlags`. I don't think it's necessary right now, because the architecture extensions for ARM (defined in `ARMTargetParser.def` rather than Tablegen) don't include any anonymous ones. But it seems sensible to add the check anyway, in case future refactoring introduces anonymous array elements in the same way that AArch64 did, and also in case someone writes a function for another platform by using either of these as example code. --- clang/lib/Driver/ToolChain.cpp | 20 clang/test/Driver/aarch64-multilib-rcpc3.c | 4 2 files changed, 16 insertions(+), 8 deletions(-) create mode 100644 clang/test/Driver/aarch64-multilib-rcpc3.c diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 977e08390800d7..85ae4d2a26fee2 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -195,11 +195,13 @@ static void getAArch64MultilibFlags(const Driver &D, UnifiedFeatures.end()); std::vector MArch; for (const auto &Ext : AArch64::Extensions) -if (FeatureSet.contains(Ext.PosTargetFeature)) - MArch.push_back(Ext.UserVisibleName.str()); +if (!Ext.UserVisibleName.empty()) + if (FeatureSet.contains(Ext.PosTargetFeature)) +MArch.push_back(Ext.UserVisibleName.str()); for (const auto &Ext : AArch64::Extensions) -if (FeatureSet.contains(Ext.NegTargetFeature)) - MArch.push_back(("no" + Ext.UserVisibleName).str()); +if (!Ext.UserVisibleName.empty()) + if (FeatureSet.contains(Ext.NegTargetFeature)) +MArch.push_back(("no" + Ext.UserVisibleName).str()); StringRef ArchName; for (const auto &ArchInfo : AArch64::ArchInfos) if (FeatureSet.contains(ArchInfo->ArchFeature)) @@ -221,11 +223,13 @@ static void getARMMultilibFlags(const Driver &D, UnifiedFeatures.end()); std::vector MArch; for (const auto &Ext : ARM::ARCHExtNames) -if (FeatureSet.contains(Ext.Feature)) - MArch.push_back(Ext.Name.str()); +if (!Ext.Name.empty()) + if (FeatureSet.contains(Ext.Feature)) +MArch.push_back(Ext.Name.str()); for (const auto &Ext : ARM::ARCHExtNames) -if (FeatureSet.contains(Ext.NegFeature)) - MArch.push_back(("no" + Ext.Name).str()); +if (!Ext.Name.empty()) + if (FeatureSet.contains(Ext.NegFeature)) +MArch.push_back(("no" + Ext.Name).str()); MArch.insert(MArch.begin(), ("-march=" + Triple.getArchName()).str()); Result.push_back(llvm::join(MArch, "+")); diff --git a/clang/test/Driver/aarch64-multilib-rcpc3.c b/clang/test/Driver/aarch64-multilib-rcpc3.c new file mode 100644 index 00..b839079e0442d6 --- /dev/null +++ b/clang/test/Driver/aarch64-multilib-rcpc3.c @@ -0,0 +1,4 @@ +// RUN: %clang --target=aarch64-none-elf -march=armv8.9-a+rcpc3 -print-multi-flags-experimental -c %s 2>&1 | FileCheck %s + +// CHECK: -march=armv8.9-a +// CHECK-SAME: +rcpc+rcpc3+ ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][Driver] Skip empty strings in getAArch64MultilibFlags (PR #97827)
https://github.com/statham-arm updated https://github.com/llvm/llvm-project/pull/97827 >From 81d77bf87dd47684683492ab70cc45ab6eb4364e Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Fri, 5 Jul 2024 11:57:19 +0100 Subject: [PATCH 1/2] [Clang][Driver] Skip empty strings in getAArch64MultilibFlags In a multilib setting, if you compile with a command line such as `clang --target=aarch64-none-elf -march=armv8.9-a+rcpc3`, `getAArch64MultilibFlags` returns an ill-formed string containing two consecutive `+` signs, of the form `...+rcpc++rcpc3+...`, causing later stages of multilib selection to get confused. The `++` arises from the entry in `AArch64::Extensions` for the SubtargetFeature `rcpc-immo`, which is a dependency of the `rcpc3` SubtargetFeature, but doesn't have an _extension_ name for the purposes of the `-march=foo+bar` option. So its `UserVisibleName` field is the empty string. To fix this, I've excluded extensions from consideration in `getAArch64MultilibFlags` if they have an empty `UserVisibleName`. Since the input to this function is not derived from a completely general set of SubtargetFeatures, but from a set that has only just been converted _from_ a clang driver command line, the only extensions skipped by this check should be cases like this one, where the anonymous extension was only included because it was a dependency of one mentioned explicitly. I've also made the analogous change in `getARMMultilibFlags`. I don't think it's necessary right now, because the architecture extensions for ARM (defined in `ARMTargetParser.def` rather than Tablegen) don't include any anonymous ones. But it seems sensible to add the check anyway, in case future refactoring introduces anonymous array elements in the same way that AArch64 did, and also in case someone writes a function for another platform by using either of these as example code. --- clang/lib/Driver/ToolChain.cpp | 20 clang/test/Driver/aarch64-multilib-rcpc3.c | 4 2 files changed, 16 insertions(+), 8 deletions(-) create mode 100644 clang/test/Driver/aarch64-multilib-rcpc3.c diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 977e08390800d..85ae4d2a26fee 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -195,11 +195,13 @@ static void getAArch64MultilibFlags(const Driver &D, UnifiedFeatures.end()); std::vector MArch; for (const auto &Ext : AArch64::Extensions) -if (FeatureSet.contains(Ext.PosTargetFeature)) - MArch.push_back(Ext.UserVisibleName.str()); +if (!Ext.UserVisibleName.empty()) + if (FeatureSet.contains(Ext.PosTargetFeature)) +MArch.push_back(Ext.UserVisibleName.str()); for (const auto &Ext : AArch64::Extensions) -if (FeatureSet.contains(Ext.NegTargetFeature)) - MArch.push_back(("no" + Ext.UserVisibleName).str()); +if (!Ext.UserVisibleName.empty()) + if (FeatureSet.contains(Ext.NegTargetFeature)) +MArch.push_back(("no" + Ext.UserVisibleName).str()); StringRef ArchName; for (const auto &ArchInfo : AArch64::ArchInfos) if (FeatureSet.contains(ArchInfo->ArchFeature)) @@ -221,11 +223,13 @@ static void getARMMultilibFlags(const Driver &D, UnifiedFeatures.end()); std::vector MArch; for (const auto &Ext : ARM::ARCHExtNames) -if (FeatureSet.contains(Ext.Feature)) - MArch.push_back(Ext.Name.str()); +if (!Ext.Name.empty()) + if (FeatureSet.contains(Ext.Feature)) +MArch.push_back(Ext.Name.str()); for (const auto &Ext : ARM::ARCHExtNames) -if (FeatureSet.contains(Ext.NegFeature)) - MArch.push_back(("no" + Ext.Name).str()); +if (!Ext.Name.empty()) + if (FeatureSet.contains(Ext.NegFeature)) +MArch.push_back(("no" + Ext.Name).str()); MArch.insert(MArch.begin(), ("-march=" + Triple.getArchName()).str()); Result.push_back(llvm::join(MArch, "+")); diff --git a/clang/test/Driver/aarch64-multilib-rcpc3.c b/clang/test/Driver/aarch64-multilib-rcpc3.c new file mode 100644 index 0..b839079e0442d --- /dev/null +++ b/clang/test/Driver/aarch64-multilib-rcpc3.c @@ -0,0 +1,4 @@ +// RUN: %clang --target=aarch64-none-elf -march=armv8.9-a+rcpc3 -print-multi-flags-experimental -c %s 2>&1 | FileCheck %s + +// CHECK: -march=armv8.9-a +// CHECK-SAME: +rcpc+rcpc3+ >From 7179fc771dcf15c4ab5be24b985fd7be56960cc9 Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Mon, 8 Jul 2024 17:30:56 +0100 Subject: [PATCH 2/2] fixup! [Clang][Driver] Skip empty strings in getAArch64MultilibFlags --- clang/test/Driver/aarch64-multilib-rcpc3.c | 13 + 1 file changed, 13 insertions(+) diff --git a/clang/test/Driver/aarch64-multilib-rcpc3.c b/clang/test/Driver/aarch64-multilib-rcpc3.c index b839079e0442d..88b23de5a6510 100644 --- a/clang/test/Driver/aarch64-multilib-rcpc3.c +++ b/clang/test/Driver/aarch64-multilib-rcpc3.c @@ -1,4 +
[clang] [Clang][Driver] Skip empty strings in getAArch64MultilibFlags (PR #97827)
@@ -0,0 +1,4 @@ +// RUN: %clang --target=aarch64-none-elf -march=armv8.9-a+rcpc3 -print-multi-flags-experimental -c %s 2>&1 | FileCheck %s + +// CHECK: -march=armv8.9-a +// CHECK-SAME: +rcpc+rcpc3+ statham-arm wrote: Done. I wasn't sure whether it would need to come before or after the `rcpc+rcpc3` check. The answer turns out to be both. https://github.com/llvm/llvm-project/pull/97827 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][Driver] Add a custom error option in multilib.yaml. (PR #105684)
statham-arm wrote: To be clear, are you asking _me_ to make a followup PR to change that identifier in this already-landed patch, or are you going to do it? (Just to avoid the situation where both of us do it, or both of us think the other is going to) https://github.com/llvm/llvm-project/pull/105684 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Fix silent truncation of inline ASM `srcloc` cookie when going through a `DiagnosticInfoSrcMgr` (PR #84559)
https://github.com/statham-arm approved this pull request. LGTM, thanks! https://github.com/llvm/llvm-project/pull/84559 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Fix silent truncation of inline ASM `srcloc` cookie when going through a `DiagnosticInfoSrcMgr` (PR #84559)
https://github.com/statham-arm closed https://github.com/llvm/llvm-project/pull/84559 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][Driver] Skip empty strings in getAArch64MultilibFlags (PR #97827)
statham-arm wrote: @asmok-g , I'm confused. This commit doesn't have anything to do with the processing of `-W` options on the clang command line. Are you sure you've commented on the right PR? If you have, can you provide a full example command line? What was the behaviour before, and what is it afterwards? Why do you say that _this_ commit has caused the change? https://github.com/llvm/llvm-project/pull/97827 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang][Driver] Add a custom error option in multilib.yaml. (PR #105684)
https://github.com/statham-arm updated https://github.com/llvm/llvm-project/pull/105684 >From 806ac0bee0478fda32ec0bf5bfb9e28e1bef618d Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Wed, 21 Aug 2024 15:50:32 +0100 Subject: [PATCH 1/3] [clang][Driver] Add a custom error option in multilib.yaml. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sometimes a collection of multilibs has a gap in it, where a set of driver command-line options can't work with any of the available libraries. For example, the Arm MVE extension requires special startup code (you need to initialize FPSCR.LTPSIZE), and also benefits greatly from -mfloat-abi=hard. So a multilib provider might build a library for systems without MVE, and another for MVE with -mfloat-abi=hard, anticipating that that's what most MVE users would want. But then if a user compiles for MVE _without_ -mfloat-abi=hard, thhey can't use either of those libraries – one has an ABI mismatch, and the other will fail to set up LTPSIZE. In that situation, it's useful to include a multilib.yaml entry for the unworkable intermediate situation, and have it map to a fatal error message rather than a set of actual libraries. Then the user gets a build failure with a sensible explanation, instead of selecting an unworkable library and silently generating bad output. The new regression test demonstrates this case. This patch introduces extra syntax into multilib.yaml, so that a record in the `Variants` list can omit the `Dir` key, and in its place, provide a `FatalError` key. Then, if that variant is selected, the error message is emitted as a clang diagnostic, and multilib selection fails. In order to emit the error message in `MultilibSet::select`, I had to pass a `Driver &` to that function, which involved plumbing one through to every call site, and in the unit tests, constructing one specially. --- clang/docs/Multilib.rst | 6 ++ .../clang/Basic/DiagnosticDriverKinds.td | 2 + clang/include/clang/Driver/Multilib.h | 17 +++- clang/lib/Driver/Driver.cpp | 3 +- clang/lib/Driver/Multilib.cpp | 47 +++--- clang/lib/Driver/ToolChains/BareMetal.cpp | 9 +- clang/lib/Driver/ToolChains/Fuchsia.cpp | 2 +- clang/lib/Driver/ToolChains/Gnu.cpp | 57 ++-- clang/lib/Driver/ToolChains/OHOS.cpp | 7 +- .../baremetal-multilib-custom-error.yaml | 63 ++ .../unittests/Driver/MultilibBuilderTest.cpp | 9 +- clang/unittests/Driver/MultilibTest.cpp | 87 +++ .../Driver/SimpleDiagnosticConsumer.h | 16 13 files changed, 236 insertions(+), 89 deletions(-) create mode 100644 clang/test/Driver/baremetal-multilib-custom-error.yaml diff --git a/clang/docs/Multilib.rst b/clang/docs/Multilib.rst index 063fe9a336f2fe..6d77fda3623b20 100644 --- a/clang/docs/Multilib.rst +++ b/clang/docs/Multilib.rst @@ -200,6 +200,12 @@ For a more comprehensive example see # to be a match. Flags: [--target=thumbv7m-none-eabi, -mfpu=fpv4-sp-d16] + # If there is no multilib available for a particular set of flags, and the + # other multilibs are not adequate fallbacks, then you can define a variant + # record with a FatalError key in place of the Dir key. + - FatalError: this multilib collection has no hard-float ABI support +Flags: [--target=thumbv7m-none-eabi, -mfloat-abi=hard] + # The second section of the file is a list of regular expressions that are # used to map from flags generated from command line options to custom flags. diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index ba90742fbdaabc..97573fcf20c1fb 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -810,6 +810,8 @@ def warn_drv_missing_multilib : Warning< InGroup>; def note_drv_available_multilibs : Note< "available multilibs are:%0">; +def err_drv_multilib_custom_error : Error< + "multilib configuration error: %0">; def err_drv_experimental_crel : Error< "-Wa,--allow-experimental-crel must be specified to use -Wa,--crel. " diff --git a/clang/include/clang/Driver/Multilib.h b/clang/include/clang/Driver/Multilib.h index 9a2cc9bb1ba134..2b6a64187f7783 100644 --- a/clang/include/clang/Driver/Multilib.h +++ b/clang/include/clang/Driver/Multilib.h @@ -18,6 +18,7 @@ #include "llvm/Support/SourceMgr.h" #include #include +#include #include #include #include @@ -25,6 +26,8 @@ namespace clang { namespace driver { +class Driver; + /// This corresponds to a single GCC Multilib, or a segment of one controlled /// by a command line flag. /// See also MultilibBuilder for building a multilib by mutating it @@ -48,13 +51,19 @@ class Multilib { // directory is not mutually exclusive with anything else. std::string ExclusiveGroup; + // So