r340390 - [clang-tblgen] Add -print-records and -dump-json modes.

2018-08-22 Thread Simon Tatham via cfe-commits
Author: statham
Date: Wed Aug 22 02:20:39 2018
New Revision: 340390

URL: http://llvm.org/viewvc/llvm-project?rev=340390&view=rev
Log:
[clang-tblgen] Add -print-records and -dump-json modes.

Currently, if clang-tblgen is run without a mode option, it defaults
to the first mode in its 'enum Action', which happens to be
-gen-clang-attr-classes. I think it makes more sense for it to behave
the same way as llvm-tblgen, i.e. print a diagnostic dump if it's not
given any more specific instructions.

I've also added the same -dump-json that llvm-tblgen supports. This
means any tblgen command line (whether llvm- or clang-) can be
mechanically turned into one that processes the same input into JSON.

Reviewers: nhaehnle

Reviewed By: nhaehnle

Subscribers: cfe-commits

Differential Revision: https://reviews.llvm.org/D50771

Modified:
cfe/trunk/utils/TableGen/TableGen.cpp

Modified: cfe/trunk/utils/TableGen/TableGen.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/TableGen.cpp?rev=340390&r1=340389&r2=340390&view=diff
==
--- cfe/trunk/utils/TableGen/TableGen.cpp (original)
+++ cfe/trunk/utils/TableGen/TableGen.cpp Wed Aug 22 02:20:39 2018
@@ -23,6 +23,8 @@ using namespace llvm;
 using namespace clang;
 
 enum ActionType {
+  PrintRecords,
+  DumpJSON,
   GenClangAttrClasses,
   GenClangAttrParserStringSwitches,
   GenClangAttrSubjectMatchRulesParserStringSwitches,
@@ -66,6 +68,10 @@ namespace {
 cl::opt Action(
 cl::desc("Action to perform:"),
 cl::values(
+clEnumValN(PrintRecords, "print-records",
+   "Print all records to stdout (default)"),
+clEnumValN(DumpJSON, "dump-json",
+   "Dump all records as machine-readable JSON"),
 clEnumValN(GenClangAttrClasses, "gen-clang-attr-classes",
"Generate clang attribute clases"),
 clEnumValN(GenClangAttrParserStringSwitches,
@@ -164,6 +170,12 @@ ClangComponent("clang-component",
 
 bool ClangTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   switch (Action) {
+  case PrintRecords:
+OS << Records;   // No argument, dump all contents
+break;
+  case DumpJSON:
+EmitJSON(Records, OS);
+break;
   case GenClangAttrClasses:
 EmitClangAttrClass(Records, OS);
 break;


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r333513 - Support __iso_volatile_load8 etc on aarch64-win32.

2018-05-30 Thread Simon Tatham via cfe-commits
Author: statham
Date: Wed May 30 00:54:05 2018
New Revision: 333513

URL: http://llvm.org/viewvc/llvm-project?rev=333513&view=rev
Log:
Support __iso_volatile_load8 etc on aarch64-win32.

These intrinsics are used by MSVC's header files on AArch64 Windows as
well as AArch32, so we should support them for both targets. I've
factored them out of CodeGenFunction::EmitARMBuiltinExpr into separate
functions that EmitAArch64BuiltinExpr can call as well.

Reviewers: javed.absar, mstorsjo

Reviewed By: mstorsjo

Subscribers: kristof.beyls, cfe-commits

Differential Revision: https://reviews.llvm.org/D47476

Added:
cfe/trunk/test/CodeGen/ms-volatile-aarch64.c   (with props)
Modified:
cfe/trunk/include/clang/Basic/BuiltinsAArch64.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/CodeGen/CodeGenFunction.h

Modified: cfe/trunk/include/clang/Basic/BuiltinsAArch64.def
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsAArch64.def?rev=333513&r1=333512&r2=333513&view=diff
==
--- cfe/trunk/include/clang/Basic/BuiltinsAArch64.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsAArch64.def Wed May 30 00:54:05 2018
@@ -69,5 +69,15 @@ LANGBUILTIN(__dmb, "vUi", "nc", ALL_MS_L
 LANGBUILTIN(__dsb, "vUi", "nc", ALL_MS_LANGUAGES)
 LANGBUILTIN(__isb, "vUi", "nc", ALL_MS_LANGUAGES)
 
+// MSVC intrinsics for volatile but non-acquire/release loads and stores
+LANGBUILTIN(__iso_volatile_load8,   "ccCD*", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(__iso_volatile_load16,  "ssCD*", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(__iso_volatile_load32,  "iiCD*", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(__iso_volatile_load64,  "LLiLLiCD*", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(__iso_volatile_store8,  "vcD*c", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(__iso_volatile_store16, "vsD*s", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(__iso_volatile_store32, "viD*i", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(__iso_volatile_store64, "vLLiD*LLi", "n", ALL_MS_LANGUAGES)
+
 #undef BUILTIN
 #undef LANGBUILTIN

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=333513&r1=333512&r2=333513&view=diff
==
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed May 30 00:54:05 2018
@@ -5179,6 +5179,34 @@ static bool HasExtraNeonArgument(unsigne
   return true;
 }
 
+Value *CodeGenFunction::EmitISOVolatileLoad(const CallExpr *E) {
+  Value *Ptr = EmitScalarExpr(E->getArg(0));
+  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
+  CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
+  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
+   LoadSize.getQuantity() * 8);
+  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
+  llvm::LoadInst *Load =
+Builder.CreateAlignedLoad(Ptr, LoadSize);
+  Load->setVolatile(true);
+  return Load;
+}
+
+Value *CodeGenFunction::EmitISOVolatileStore(const CallExpr *E) {
+  Value *Ptr = EmitScalarExpr(E->getArg(0));
+  Value *Value = EmitScalarExpr(E->getArg(1));
+  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
+  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
+  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
+   StoreSize.getQuantity() * 8);
+  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
+  llvm::StoreInst *Store =
+Builder.CreateAlignedStore(Value, Ptr,
+   StoreSize);
+  Store->setVolatile(true);
+  return Store;
+}
+
 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
const CallExpr *E,
llvm::Triple::ArchType Arch) {
@@ -5421,35 +5449,13 @@ Value *CodeGenFunction::EmitARMBuiltinEx
   case ARM::BI__iso_volatile_load8:
   case ARM::BI__iso_volatile_load16:
   case ARM::BI__iso_volatile_load32:
-  case ARM::BI__iso_volatile_load64: {
-Value *Ptr = EmitScalarExpr(E->getArg(0));
-QualType ElTy = E->getArg(0)->getType()->getPointeeType();
-CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
-llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
- LoadSize.getQuantity() * 8);
-Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
-llvm::LoadInst *Load =
-  Builder.CreateAlignedLoad(Ptr, LoadSize);
-Load->setVolatile(true);
-return Load;
-  }
+  case ARM::BI__iso_volatile_load64:
+return EmitISOVolatileLoad(E);
   case ARM::BI__iso_volatile_store8:
   case ARM::BI__iso_volatile_store16:
   case ARM::BI__iso_volatile_store32:
-  case ARM::BI__iso_volatile_store64: {
-Value *Ptr = EmitScalarExpr(E->getArg(0));
-Value *Value = EmitScalarExpr(E->getArg(1

r362380 - [ARM] Fix recent breakage of -mfpu=none.

2019-06-03 Thread Simon Tatham via cfe-commits
Author: statham
Date: Mon Jun  3 04:02:53 2019
New Revision: 362380

URL: http://llvm.org/viewvc/llvm-project?rev=362380&view=rev
Log:
[ARM] Fix recent breakage of -mfpu=none.

The recent change D60691 introduced a bug in clang when handling
option combinations such as `-mcpu=cortex-m4 -mfpu=none`. Those
options together should select Cortex-M4 but disable all use of
hardware FP, but in fact, now hardware FP instructions can still be
generated in that mode.

The reason is because the handling of FPUVersion::NONE disables all
the same feature names it used to, of which the base one is `vfp2`.
But now there are further features below that, like `vfp2d16fp` and
(following D60694) `fpregs`, which also need to be turned off to
disable hardware FP completely.

Added a tiny test which double-checks that compiling a simple FP
function doesn't access the FP registers.

Reviewers: SjoerdMeijer, dmgreen

Reviewed By: dmgreen

Subscribers: lebedev.ri, javed.absar, kristof.beyls, hiraditya, cfe-commits, 
llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D62729

Added:
cfe/trunk/test/CodeGen/arm-mfpu-none.c   (with props)
Modified:
cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp
cfe/trunk/test/Driver/arm-mfpu.c

Modified: cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp?rev=362380&r1=362379&r2=362380&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp Mon Jun  3 04:02:53 2019
@@ -430,8 +430,8 @@ fp16_fml_fallthrough:
 llvm::ARM::getFPUFeatures(llvm::ARM::FK_NONE, Features);
 
 // Disable hardware FP features which have been enabled.
-// FIXME: Disabling vfp2 and neon should be enough as all the other
-//features are dependent on these 2 features in LLVM. However
+// FIXME: Disabling fpregs should be enough all by itself, since all
+//the other FP features are dependent on it. However
 //there is currently no easy way to test this in clang, so for
 //now just be explicit and disable all known dependent features
 //as well.
@@ -439,6 +439,11 @@ fp16_fml_fallthrough:
 "neon", "crypto", "dotprod", "fp16fml"})
   if (std::find(std::begin(Features), std::end(Features), "+" + Feature) 
!= std::end(Features))
 Features.push_back(Args.MakeArgString("-" + Feature));
+
+// Disable the base feature unconditionally, even if it was not
+// explicitly in the features list (e.g. if we had +vfp3, which
+// implies it).
+Features.push_back("-fpregs");
   }
 
   // En/disable crc code generation.

Added: cfe/trunk/test/CodeGen/arm-mfpu-none.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-mfpu-none.c?rev=362380&view=auto
==
--- cfe/trunk/test/CodeGen/arm-mfpu-none.c (added)
+++ cfe/trunk/test/CodeGen/arm-mfpu-none.c Mon Jun  3 04:02:53 2019
@@ -0,0 +1,8 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang -target arm-none-eabi -mcpu=cortex-m4 -mfpu=none -S -o - %s | 
FileCheck %s
+
+// CHECK-LABEL: compute
+// CHECK-NOT: {{s[0-9]}}
+float compute(float a, float b) {
+  return (a+b) * (a-b);
+}

Propchange: cfe/trunk/test/CodeGen/arm-mfpu-none.c
--
svn:eol-style = native

Propchange: cfe/trunk/test/CodeGen/arm-mfpu-none.c
--
svn:keywords = Rev Date Author URL Id

Modified: cfe/trunk/test/Driver/arm-mfpu.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/arm-mfpu.c?rev=362380&r1=362379&r2=362380&view=diff
==
--- cfe/trunk/test/Driver/arm-mfpu.c (original)
+++ cfe/trunk/test/Driver/arm-mfpu.c Mon Jun  3 04:02:53 2019
@@ -318,6 +318,7 @@
 // RUN:   | FileCheck --check-prefix=CHECK-NO-FP %s
 // CHECK-NO-FP-NOT: "-target-feature" "+soft-float"
 // CHECK-NO-FP: "-target-feature" "+soft-float-abi"
+// CHECK-NO-FP: "-target-feature" "-fpregs"
 // CHECK-NO-FP: "-target-feature" "-vfp2"
 // CHECK-NO-FP: "-target-feature" "-vfp3"
 // CHECK-NO-FP: "-target-feature" "-vfp4"
@@ -363,6 +364,7 @@
 // CHECK-SOFT-ABI-FP: "-target-feature" "-fp-armv8"
 // CHECK-SOFT-ABI-FP: "-target-feature" "-neon"
 // CHECK-SOFT-ABI-FP: "-target-feature" "-crypto"
+// CHECK-SOFT-ABI-FP: "-target-feature" "-fpregs"
 
 // RUN: %clang -target arm-linux-androideabi21 %s -### -c 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-ARM5-ANDROID-FP-DEFAULT %s


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r362791 - [ARM] Fix bugs introduced by the fp64/d32 rework.

2019-06-07 Thread Simon Tatham via cfe-commits
Author: statham
Date: Fri Jun  7 05:42:54 2019
New Revision: 362791

URL: http://llvm.org/viewvc/llvm-project?rev=362791&view=rev
Log:
[ARM] Fix bugs introduced by the fp64/d32 rework.

Change D60691 caused some knock-on failures that weren't caught by the
existing tests. Firstly, selecting a CPU that should have had a
restricted FPU (e.g. `-mcpu=cortex-m4`, which should have 16 d-regs
and no double precision) could give the unrestricted version, because
`ARM::getFPUFeatures` returned a list of features including subtracted
ones (here `-fp64`,`-d32`), but `ARMTargetInfo::initFeatureMap` threw
away all the ones that didn't start with `+`. Secondly, the
preprocessor macros didn't reliably match the actual compilation
settings: for example, `-mfpu=softvfp` could still set `__ARM_FP` as
if hardware FP was available, because the list of features on the cc1
command line would include things like `+vfp4`,`-vfp4d16` and clang
didn't realise that one of those cancelled out the other.

I've fixed both of these issues by rewriting `ARM::getFPUFeatures` so
that it returns a list that enables every FP-related feature
compatible with the selected FPU and disables every feature not
compatible, which is more verbose but means clang doesn't have to
understand the dependency relationships between the backend features.
Meanwhile, `ARMTargetInfo::handleTargetFeatures` is testing for all
the various forms of the FP feature names, so that it won't miss cases
where it should have set `HW_FP` to feed into feature test macros.

That in turn caused an ordering problem when handling `-mcpu=foo+bar`
together with `-mfpu=something_that_turns_off_bar`. To fix that, I've
arranged that the `+bar` suffixes on the end of `-mcpu` and `-march`
cause feature names to be put into a separate vector which is
concatenated after the output of `getFPUFeatures`.

Another side effect of all this is to fix a bug where `clang -target
armv8-eabi` by itself would fail to set `__ARM_FEATURE_FMA`, even
though `armv8` (aka Arm v8-A) implies FP-Armv8 which has FMA. That was
because `HW_FP` was being set to a value including only the `FPARMV8`
bit, but that feature test macro was testing only the `VFP4FPU` bit.
Now `HW_FP` ends up with all the bits set, so it gives the right
answer.

Changes to tests included in this patch:

* `arm-target-features.c`: I had to change basically all the expected
  results. (The Cortex-M4 test in there should function as a
  regression test for the accidental double-precision bug.)
* `arm-mfpu.c`, `armv8.1m.main.c`: switched to using `CHECK-DAG`
  everywhere so that those tests are no longer sensitive to the order
  of cc1 feature options on the command line.
* `arm-acle-6.5.c`: been updated to expect the right answer to that
  FMA test.
* `Preprocessor/arm-target-features.c`: added a regression test for
  the `mfpu=softvfp` issue.

Reviewers: SjoerdMeijer, dmgreen, ostannard, samparker, JamesNagurne

Reviewed By: ostannard

Subscribers: srhines, javed.absar, kristof.beyls, hiraditya, cfe-commits, 
llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D62998

Modified:
cfe/trunk/lib/Basic/Targets/ARM.cpp
cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp
cfe/trunk/test/CodeGen/arm-target-features.c
cfe/trunk/test/Driver/arm-mfpu.c
cfe/trunk/test/Driver/armv8.1m.main.c
cfe/trunk/test/Preprocessor/arm-acle-6.5.c
cfe/trunk/test/Preprocessor/arm-target-features.c

Modified: cfe/trunk/lib/Basic/Targets/ARM.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/ARM.cpp?rev=362791&r1=362790&r2=362791&view=diff
==
--- cfe/trunk/lib/Basic/Targets/ARM.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/ARM.cpp Fri Jun  7 05:42:54 2019
@@ -408,18 +408,30 @@ bool ARMTargetInfo::handleTargetFeatures
   SoftFloat = true;
 } else if (Feature == "+soft-float-abi") {
   SoftFloatABI = true;
-} else if (Feature == "+vfp2") {
+} else if (Feature == "+vfp2sp" || Feature == "+vfp2d16sp" ||
+   Feature == "+vfp2" || Feature == "+vfp2d16") {
   FPU |= VFP2FPU;
   HW_FP |= HW_FP_SP;
-} else if (Feature == "+vfp3") {
+  if (Feature == "+vfp2" || Feature == "+vfp2d16")
+  HW_FP |= HW_FP_DP;
+} else if (Feature == "+vfp3sp" || Feature == "+vfp3d16sp" ||
+   Feature == "+vfp3" || Feature == "+vfp3d16") {
   FPU |= VFP3FPU;
   HW_FP |= HW_FP_SP;
-} else if (Feature == "+vfp4") {
+  if (Feature == "+vfp3" || Feature == "+vfp3d16")
+  HW_FP |= HW_FP_DP;
+} else if (Feature == "+vfp4sp" || Feature == "+vfp4d16sp" ||
+   Feature == "+vfp4" || Feature == "+vfp4d16") {
   FPU |= VFP4FPU;
   HW_FP |= HW_FP_SP | HW_FP_HP;
-} else if (Feature == "+fp-armv8") {
+  if (Feature == "+vfp4" || Feature == "+vfp4d16")
+  HW_FP |= HW_FP_DP;
+} else if (Feature == "+fp-armv8sp" || Feature == "+fp-ar

r364331 - [ARM] Support inline assembler constraints for MVE.

2019-06-25 Thread Simon Tatham via cfe-commits
Author: statham
Date: Tue Jun 25 09:49:32 2019
New Revision: 364331

URL: http://llvm.org/viewvc/llvm-project?rev=364331&view=rev
Log:
[ARM] Support inline assembler constraints for MVE.

"To" selects an odd-numbered GPR, and "Te" an even one. There are some
8.1-M instructions that have one too few bits in their register fields
and require registers of particular parity, without necessarily using
a consecutive even/odd pair.

Also, the constraint letter "t" should select an MVE q-register, when
MVE is present. This didn't need any source changes, but some extra
tests have been added.

Reviewers: dmgreen, samparker, SjoerdMeijer

Subscribers: javed.absar, eraman, kristof.beyls, hiraditya, cfe-commits, 
llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D60709

Modified:
cfe/trunk/lib/Basic/Targets/ARM.cpp
cfe/trunk/test/CodeGen/arm-asm.c

Modified: cfe/trunk/lib/Basic/Targets/ARM.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/ARM.cpp?rev=364331&r1=364330&r2=364331&view=diff
==
--- cfe/trunk/lib/Basic/Targets/ARM.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/ARM.cpp Tue Jun 25 09:49:32 2019
@@ -900,6 +900,16 @@ bool ARMTargetInfo::validateAsmConstrain
   case 'Q': // A memory address that is a single base register.
 Info.setAllowsMemory();
 return true;
+  case 'T':
+switch (Name[1]) {
+default:
+  break;
+case 'e': // Even general-purpose register
+case 'o': // Odd general-purpose register
+  Info.setAllowsRegister();
+  Name++;
+  return true;
+}
   case 'U': // a memory reference...
 switch (Name[1]) {
 case 'q': // ...ARMV4 ldrsb
@@ -923,6 +933,7 @@ std::string ARMTargetInfo::convertConstr
   std::string R;
   switch (*Constraint) {
   case 'U': // Two-character constraint; add "^" hint for later parsing.
+  case 'T':
 R = std::string("^") + std::string(Constraint, 2);
 Constraint++;
 break;

Modified: cfe/trunk/test/CodeGen/arm-asm.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-asm.c?rev=364331&r1=364330&r2=364331&view=diff
==
--- cfe/trunk/test/CodeGen/arm-asm.c (original)
+++ cfe/trunk/test/CodeGen/arm-asm.c Tue Jun 25 09:49:32 2019
@@ -6,3 +6,21 @@ int t1() {
 __asm__ volatile ("flds s15, %[k] \n" :: [k] "Uv" (k) : "s15");
 return 0;
 }
+
+// CHECK-LABEL: @even_reg_constraint_Te
+int even_reg_constraint_Te(void) {
+  int acc = 0;
+  // CHECK: vaddv{{.*\^Te}}
+  asm("vaddv.s8 %0, Q0"
+  : "+Te" (acc));
+  return acc;
+}
+
+// CHECK-LABEL: @odd_reg_constraint_To
+int odd_reg_constraint_To(void) {
+  int eacc = 0, oacc = 0;
+  // CHECK: vaddlv{{.*\^To}}
+  asm("vaddlv.s8 %0, %1, Q0"
+  : "+Te" (eacc), "+To" (oacc));
+  return oacc;
+}


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r361845 - [ARM] Replace fp-only-sp and d16 with fp64 and d32.

2019-05-28 Thread Simon Tatham via cfe-commits
Author: statham
Date: Tue May 28 09:13:20 2019
New Revision: 361845

URL: http://llvm.org/viewvc/llvm-project?rev=361845&view=rev
Log:
[ARM] Replace fp-only-sp and d16 with fp64 and d32.

Those two subtarget features were awkward because their semantics are
reversed: each one indicates the _lack_ of support for something in
the architecture, rather than the presence. As a consequence, you
don't get the behavior you want if you combine two sets of feature
bits.

Each SubtargetFeature for an FP architecture version now comes in four
versions, one for each combination of those options. So you can still
say (for example) '+vfp2' in a feature string and it will mean what
it's always meant, but there's a new string '+vfp2d16sp' meaning the
version without those extra options.

A lot of this change is just mechanically replacing positive checks
for the old features with negative checks for the new ones. But one
more interesting change is that I've rearranged getFPUFeatures() so
that the main FPU feature is appended to the output list *before*
rather than after the features derived from the Restriction field, so
that -fp64 and -d32 can override defaults added by the main feature.

Reviewers: dmgreen, samparker, SjoerdMeijer

Subscribers: srhines, javed.absar, eraman, kristof.beyls, hiraditya, zzheng, 
Petar.Avramovic, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D60691


Modified:
cfe/trunk/lib/Basic/Targets/ARM.cpp
cfe/trunk/test/CodeGen/arm-target-features.c
cfe/trunk/test/Driver/arm-mfpu.c

Modified: cfe/trunk/lib/Basic/Targets/ARM.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Basic/Targets/ARM.cpp?rev=361845&r1=361844&r2=361845&view=diff
==
--- cfe/trunk/lib/Basic/Targets/ARM.cpp (original)
+++ cfe/trunk/lib/Basic/Targets/ARM.cpp Tue May 28 09:13:20 2019
@@ -400,8 +400,7 @@ bool ARMTargetInfo::handleTargetFeatures
   HasFloat16 = true;
 
   // This does not diagnose illegal cases like having both
-  // "+vfpv2" and "+vfpv3" or having "+neon" and "+fp-only-sp".
-  uint32_t HW_FP_remove = 0;
+  // "+vfpv2" and "+vfpv3" or having "+neon" and "-fp64".
   for (const auto &Feature : Features) {
 if (Feature == "+soft-float") {
   SoftFloat = true;
@@ -409,19 +408,19 @@ bool ARMTargetInfo::handleTargetFeatures
   SoftFloatABI = true;
 } else if (Feature == "+vfp2") {
   FPU |= VFP2FPU;
-  HW_FP |= HW_FP_SP | HW_FP_DP;
+  HW_FP |= HW_FP_SP;
 } else if (Feature == "+vfp3") {
   FPU |= VFP3FPU;
-  HW_FP |= HW_FP_SP | HW_FP_DP;
+  HW_FP |= HW_FP_SP;
 } else if (Feature == "+vfp4") {
   FPU |= VFP4FPU;
-  HW_FP |= HW_FP_SP | HW_FP_DP | HW_FP_HP;
+  HW_FP |= HW_FP_SP | HW_FP_HP;
 } else if (Feature == "+fp-armv8") {
   FPU |= FPARMV8;
-  HW_FP |= HW_FP_SP | HW_FP_DP | HW_FP_HP;
+  HW_FP |= HW_FP_SP | HW_FP_HP;
 } else if (Feature == "+neon") {
   FPU |= NeonFPU;
-  HW_FP |= HW_FP_SP | HW_FP_DP;
+  HW_FP |= HW_FP_SP;
 } else if (Feature == "+hwdiv") {
   HWDiv |= HWDivThumb;
 } else if (Feature == "+hwdiv-arm") {
@@ -432,8 +431,8 @@ bool ARMTargetInfo::handleTargetFeatures
   Crypto = 1;
 } else if (Feature == "+dsp") {
   DSP = 1;
-} else if (Feature == "+fp-only-sp") {
-  HW_FP_remove |= HW_FP_DP;
+} else if (Feature == "+fp64") {
+  HW_FP |= HW_FP_DP;
 } else if (Feature == "+8msecext") {
   if (CPUProfile != "M" || ArchVersion != 8) {
 Diags.Report(diag::err_target_unsupported_mcmse) << CPU;
@@ -449,7 +448,6 @@ bool ARMTargetInfo::handleTargetFeatures
   DotProd = true;
 }
   }
-  HW_FP &= ~HW_FP_remove;
 
   switch (ArchVersion) {
   case 6:

Modified: cfe/trunk/test/CodeGen/arm-target-features.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-target-features.c?rev=361845&r1=361844&r2=361845&view=diff
==
--- cfe/trunk/test/CodeGen/arm-target-features.c (original)
+++ cfe/trunk/test/CodeGen/arm-target-features.c Tue May 28 09:13:20 2019
@@ -1,23 +1,23 @@
 // REQUIRES: arm-registered-target
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a8 
-emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP3
-// CHECK-VFP3: "target-features"="+armv7-a,+dsp,+neon,+thumb-mode,+vfp3"
+// CHECK-VFP3: 
"target-features"="+armv7-a,+d32,+dsp,+fp64,+neon,+thumb-mode,+vfp3"
 
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a5 
-emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4
-// CHECK-VFP4: "target-features"="+armv7-a,+dsp,+neon,+thumb-mode,+vfp4"
+// CHECK-VFP4: 
"target-features"="+armv7-a,+d32,+dsp,+fp64,+neon,+thumb-mode,+vfp4"
 
 
 // RUN: %clang_cc1 -triple thumbv7-linux-gnueabihf -target-cpu cortex-a7 
-emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-VFP4-DIV
 // RUN

r373744 - [clang] Prevent false positives in arm-mfpu-none codegen test.

2019-10-04 Thread Simon Tatham via cfe-commits
Author: statham
Date: Fri Oct  4 06:01:41 2019
New Revision: 373744

URL: http://llvm.org/viewvc/llvm-project?rev=373744&view=rev
Log:
[clang] Prevent false positives in arm-mfpu-none codegen test.

A user pointed out to me in private email that this test will fail if
it sees the letter 's' followed by a digit in any part of clang's
assembly output after the function label. That includes the .ident at
the end, which can include a full pathname or hostname or both from
the system clang was built on. So if that path or hostname includes
any text like 's5' then it will cause the test to fail.

Fixed by adding a check for `.fnend`, to limit the scope of the
`CHECK-NOT` to only the actual generated code for the test function.

(Committed without prior review on the basis that it's a simple and
obvious pure test-suite fix and also in a test I contributed myself.)

Modified:
cfe/trunk/test/CodeGen/arm-mfpu-none.c

Modified: cfe/trunk/test/CodeGen/arm-mfpu-none.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-mfpu-none.c?rev=373744&r1=373743&r2=373744&view=diff
==
--- cfe/trunk/test/CodeGen/arm-mfpu-none.c (original)
+++ cfe/trunk/test/CodeGen/arm-mfpu-none.c Fri Oct  4 06:01:41 2019
@@ -3,6 +3,7 @@
 
 // CHECK-LABEL: compute
 // CHECK-NOT: {{s[0-9]}}
+// CHECK: .fnend
 float compute(float a, float b) {
   return (a+b) * (a-b);
 }


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


r375001 - [Driver,ARM] Make -mfloat-abi=soft turn off MVE.

2019-10-16 Thread Simon Tatham via cfe-commits
Author: statham
Date: Wed Oct 16 06:23:39 2019
New Revision: 375001

URL: http://llvm.org/viewvc/llvm-project?rev=375001&view=rev
Log:
[Driver,ARM] Make -mfloat-abi=soft turn off MVE.

Since `-mfloat-abi=soft` is taken to mean turning off all uses of the
FP registers, it should turn off the MVE vector instructions as well
as NEON and scalar FP. But it wasn't doing so.

So the options `-march=armv8.1-m.main+mve.fp+fp.dp -mfloat-abi=soft`
would cause the underlying LLVM to //not// support MVE (because it
knows the real target feature relationships and turned off MVE when
the `fpregs` feature was removed), but the clang layer still thought
it //was// supported, and would misleadingly define the feature macro
`__ARM_FEATURE_MVE`.

The ARM driver code already has a long list of feature names to turn
off when `-mfloat-abi=soft` is selected. The fix is to add the missing
entries `mve` and `mve.fp` to that list.

Reviewers: dmgreen

Subscribers: kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D69025

Modified:
cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp
cfe/trunk/test/Driver/arm-mfpu.c

Modified: cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp?rev=375001&r1=375000&r2=375001&view=diff
==
--- cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp (original)
+++ cfe/trunk/lib/Driver/ToolChains/Arch/ARM.cpp Wed Oct 16 06:23:39 2019
@@ -465,6 +465,7 @@ fp16_fml_fallthrough:
 "vfp4", "vfp4sp", "vfp4d16", "vfp4d16sp",
 "fp-armv8", "fp-armv8sp", "fp-armv8d16", "fp-armv8d16sp",
 "fullfp16", "neon", "crypto", "dotprod", "fp16fml",
+"mve", "mve.fp",
 "fp64", "d32", "fpregs"})
   Features.push_back(Args.MakeArgString("-" + Feature));
   }

Modified: cfe/trunk/test/Driver/arm-mfpu.c
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Driver/arm-mfpu.c?rev=375001&r1=375000&r2=375001&view=diff
==
--- cfe/trunk/test/Driver/arm-mfpu.c (original)
+++ cfe/trunk/test/Driver/arm-mfpu.c Wed Oct 16 06:23:39 2019
@@ -397,3 +397,9 @@
 // CHECK-ARM7-ANDROID-FP-D16-NOT: "-target-feature" "+fp-armv8"
 // CHECK-ARM7-ANDROID-FP-D16-NOT: "-target-feature" "+neon"
 // CHECK-ARM7-ANDROID-FP-D16-NOT: "-target-feature" "+crypto"
+
+// RUN: %clang -target arm-none-none-eabi %s 
-march=armv8.1-m.main+mve.fp+fp.dp -mfloat-abi=soft -### -c 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-SOFTFLOATABI-INHIBITS-MVE %s
+// CHECK-SOFTFLOATABI-INHIBITS-MVE-NOT: "-target-feature" "+mve"
+// CHECK-SOFTFLOATABI-INHIBITS-MVE-DAG: "-target-feature" "-mve"
+// CHECK-SOFTFLOATABI-INHIBITS-MVE-DAG: "-target-feature" "-mve.fp"


___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 24ef631 - Fix file-ordering nit in D67161.

2019-10-25 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2019-10-25T09:22:07+01:00
New Revision: 24ef631f4333120abd6b66c1e8466a582b60779f

URL: 
https://github.com/llvm/llvm-project/commit/24ef631f4333120abd6b66c1e8466a582b60779f
DIFF: 
https://github.com/llvm/llvm-project/commit/24ef631f4333120abd6b66c1e8466a582b60779f.diff

LOG: Fix file-ordering nit in D67161.

Re-sorted the module names in clang/utils/TableGen/CMakeLists.txt back
into alphabetical order.

Added: 


Modified: 
clang/utils/TableGen/CMakeLists.txt

Removed: 




diff  --git a/clang/utils/TableGen/CMakeLists.txt 
b/clang/utils/TableGen/CMakeLists.txt
index 407cf8a57f9a..c685a2c0c076 100644
--- a/clang/utils/TableGen/CMakeLists.txt
+++ b/clang/utils/TableGen/CMakeLists.txt
@@ -13,8 +13,8 @@ add_tablegen(clang-tblgen CLANG
   ClangOptionDocEmitter.cpp
   ClangSACheckersEmitter.cpp
   ClangTypeNodesEmitter.cpp
-  NeonEmitter.cpp
   MveEmitter.cpp
+  NeonEmitter.cpp
   TableGen.cpp
   )
 set_target_properties(clang-tblgen PROPERTIES FOLDER "Clang tablegenning")



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 11ce19d - [clang] Switch arm-mve-intrinsics tests to use %clang_cc1.

2019-10-25 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2019-10-25T12:00:38+01:00
New Revision: 11ce19d2119e0870b2bf53eb23d215aa83cd5540

URL: 
https://github.com/llvm/llvm-project/commit/11ce19d2119e0870b2bf53eb23d215aa83cd5540
DIFF: 
https://github.com/llvm/llvm-project/commit/11ce19d2119e0870b2bf53eb23d215aa83cd5540.diff

LOG: [clang] Switch arm-mve-intrinsics tests to use %clang_cc1.

It isn't really necessary for them to run the clang driver, and it's
more efficient not to (and also more stable against driver changes).
Now they invoke cc1 directly, more like the analogous NEON tests.

Reviewers: dmgreen

Subscribers: kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D69426

Added: 


Modified: 
clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
clang/test/CodeGen/arm-mve-intrinsics/vadc.c
clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
clang/test/CodeGen/arm-mve-intrinsics/vld24.c
clang/test/CodeGen/arm-mve-intrinsics/vldr.c
clang/test/CodeGen/arm-mve-intrinsics/vminvq.c

Removed: 




diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c 
b/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
index ec9a47f18eb9..0eead7a973f0 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -fno-discard-value-names -S 
-emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S 
-mem2reg | FileCheck %s
 
 #include 
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vadc.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vadc.c
index 6b77eac9ca54..58a47fc42bcb 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vadc.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vadc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -fno-discard-value-names -S 
-emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone 
-fno-discard-value-names -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S 
-mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o 
- %s | opt -S -mem2reg | FileCheck %s
 
 #include 
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
index 970ac53cefc6..1f18d5b57880 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -fno-discard-value-names -S 
-emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -DPOLYMORPHIC -O0 -Xclang -disable-O0-optnone 
-fno-discard-value-names -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S 
-mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o 
- %s | opt -S -mem2reg | FileCheck %s
 
 #include 
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
index 1aae36619dfa..ed3ecd3ee62e 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang --target=arm-arm-none-eabi -march=armv8.1m.main+mve.fp 
-mfloat-abi=hard -O0 -Xclang -disable-O0-optnone -fno-discard-value-names -S 
-emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S 
-mem2reg | FileCheck %s
 
 #inc

[clang] 9e6f19f - Fix missing build dependency on omp_gen.

2020-07-02 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-07-02T09:16:15+01:00
New Revision: 9e6f19fd8390d39a0351941da1582f888d18c369

URL: 
https://github.com/llvm/llvm-project/commit/9e6f19fd8390d39a0351941da1582f888d18c369
DIFF: 
https://github.com/llvm/llvm-project/commit/9e6f19fd8390d39a0351941da1582f888d18c369.diff

LOG: Fix missing build dependency on omp_gen.

Summary:
`include/llvm/Frontend/OpenMP/CMakeLists.txt` creates a new target
called `omp_gen` which builds the generated include file `OMP.h.inc`.
This target must therefore be a dependency of every compilation step
whose transitive #include dependencies contain `OMP.h.inc`, or else
it's possible for builds to fail if Ninja (or make or whatever)
schedules that compilation step before building `OMP.h.inc` at all.

A few of those dependencies are currently missing, which leads to
intermittent build failures, depending on the order that Ninja (or
whatever) happens to schedule its commands. As far as I can see,
compiles in `clang/lib/CodeGen`, `clang/lib/Frontend`, and
`clang/examples` all depend transitivily on `OMP.h.inc` (usually via
`clang/AST/AST.h`), but don't have the formal dependency in the ninja
graph.

Adding `omp_gen` to the dependencies of `clang-tablegen-targets` seems
to be the way to get the missing dependency into the `clang/examples`
subdirectory. This also fixes the other two clang subdirectories, as
far as I can see.

Reviewers: clementval, thakis, chandlerc, jdoerfert

Reviewed By: clementval

Subscribers: cfe-commits, jdenny, mgorny, sstefan1, llvm-commits

Tags: #llvm, #clang

Differential Revision: https://reviews.llvm.org/D82659

Added: 


Modified: 
clang/CMakeLists.txt

Removed: 




diff  --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 5a5e34aacbeb..83c30528499c 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -517,7 +517,10 @@ add_subdirectory(include)
 
 # All targets below may depend on all tablegen'd files.
 get_property(CLANG_TABLEGEN_TARGETS GLOBAL PROPERTY CLANG_TABLEGEN_TARGETS)
-add_custom_target(clang-tablegen-targets DEPENDS ${CLANG_TABLEGEN_TARGETS})
+add_custom_target(clang-tablegen-targets
+  DEPENDS
+  omp_gen
+  ${CLANG_TABLEGEN_TARGETS})
 set_target_properties(clang-tablegen-targets PROPERTIES FOLDER "Misc")
 list(APPEND LLVM_COMMON_DEPENDS clang-tablegen-targets)
 



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] ed0e4c7 - [clang][ARM] Add name-mangling test for direct __fp16 arguments.

2020-08-03 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-08-03T13:30:50+01:00
New Revision: ed0e4c70c99d3afd87fb202ab03bda40512677e7

URL: 
https://github.com/llvm/llvm-project/commit/ed0e4c70c99d3afd87fb202ab03bda40512677e7
DIFF: 
https://github.com/llvm/llvm-project/commit/ed0e4c70c99d3afd87fb202ab03bda40512677e7.diff

LOG: [clang][ARM] Add name-mangling test for direct __fp16 arguments.

`clang/test/CodeGenCXX/fp16-mangle.cpp` tests pointers to __fp16, but
if you give the `-fallow-half-arguments-and-returns` option, then
clang can also leave an __fp16 unmodified as a function argument or
return type. This regression test checks the name-mangling of that.

Reviewed By: miyuki

Differential Revision: https://reviews.llvm.org/D85010

Added: 
clang/test/CodeGenCXX/fp16-mangle-arg-return.cpp

Modified: 


Removed: 




diff  --git a/clang/test/CodeGenCXX/fp16-mangle-arg-return.cpp 
b/clang/test/CodeGenCXX/fp16-mangle-arg-return.cpp
new file mode 100644
index ..15214e13ad8a
--- /dev/null
+++ b/clang/test/CodeGenCXX/fp16-mangle-arg-return.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -emit-llvm -o - -triple arm-arm-none-eabi 
-fallow-half-arguments-and-returns %s | FileCheck %s
+// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-arm-none-eabi 
-fallow-half-arguments-and-returns %s | FileCheck %s
+
+// Test name-mangling of __fp16 passed directly as a function argument
+// (when that is permitted).
+
+// CHECK: define {{.*}}void @_Z13fp16_argumentDh(half %{{.*}})
+void fp16_argument(__fp16 arg) {}
+
+// Test name-mangling of __fp16 as a return type. The return type of
+// fp16_return itself isn't mentioned in the mangled name, so to test
+// this, we have to pass it a function pointer and make __fp16 the
+// return type of that.
+
+// CHECK: define {{.*}}void @_Z11fp16_returnPFDhvE(half ()* %{{.*}})
+void fp16_return(__fp16 (*func)(void)) {}



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 1d78294 - [Sema][BFloat] Forbid arithmetic on vectors of bfloat.

2020-08-07 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-08-07T11:25:19+01:00
New Revision: 1d782942500b2cbc9765ccf16264bb498850cefb

URL: 
https://github.com/llvm/llvm-project/commit/1d782942500b2cbc9765ccf16264bb498850cefb
DIFF: 
https://github.com/llvm/llvm-project/commit/1d782942500b2cbc9765ccf16264bb498850cefb.diff

LOG: [Sema][BFloat] Forbid arithmetic on vectors of bfloat.

Vectors of bfloat are a storage format only; you're supposed to
explicitly convert them to a wider type to do arithmetic on them.
But currently, if you write something like

  bfloat16x4_t test(bfloat16x4_t a, bfloat16x4_t b) { return a + b; }

then the clang frontend accepts it without error, and (ARM or AArch64)
isel fails to generate code for it.

Added a rule in Sema that forbids the attempt from even being made,
and tests that check it. In particular, we also outlaw arithmetic
between vectors of bfloat and any other vector type.

Patch by Luke Cheeseman.

Reviewed By: LukeGeeson

Differential Revision: https://reviews.llvm.org/D85009

Added: 


Modified: 
clang/lib/Sema/SemaExpr.cpp
clang/test/Sema/arm-bfloat.cpp

Removed: 




diff  --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 4931cf46cffd..b681c930b2a7 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -9789,6 +9789,10 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, 
ExprResult &RHS,
   const VectorType *RHSVecType = RHSType->getAs();
   assert(LHSVecType || RHSVecType);
 
+  if ((LHSVecType && LHSVecType->getElementType()->isBFloat16Type()) ||
+  (RHSVecType && RHSVecType->getElementType()->isBFloat16Type()))
+return InvalidOperands(Loc, LHS, RHS);
+
   // AltiVec-style "vector bool op vector bool" combinations are allowed
   // for some operators but not others.
   if (!AllowBothBool &&

diff  --git a/clang/test/Sema/arm-bfloat.cpp b/clang/test/Sema/arm-bfloat.cpp
index f7ee3c596eb8..ce3fc44baa39 100644
--- a/clang/test/Sema/arm-bfloat.cpp
+++ b/clang/test/Sema/arm-bfloat.cpp
@@ -27,3 +27,21 @@ void test(bool b) {
   fp16 = bf16; // expected-error {{assigning to '__fp16' from incompatible 
type '__bf16'}}
   bf16 + (b ? fp16 : bf16); // expected-error {{incompatible operand types 
('__fp16' and '__bf16')}}
 }
+
+#include 
+
+void test_vector(bfloat16x4_t a, bfloat16x4_t b, float16x4_t c) {
+  a + b; // expected-error {{invalid operands to binary expression 
('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}}
+  a - b; // expected-error {{invalid operands to binary expression 
('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}}
+  a * b; // expected-error {{invalid operands to binary expression 
('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}}
+  a / b; // expected-error {{invalid operands to binary expression 
('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}}
+
+  a + c; // expected-error {{invalid operands to binary expression 
('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'float16x4_t' (vector of 
4 'float16_t' values))}}
+  a - c; // expected-error {{invalid operands to binary expression 
('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'float16x4_t' (vector of 
4 'float16_t' values))}}
+  a * c; // expected-error {{invalid operands to binary expression 
('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'float16x4_t' (vector of 
4 'float16_t' values))}}
+  a / c; // expected-error {{invalid operands to binary expression 
('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'float16x4_t' (vector of 
4 'float16_t' values))}}
+  c + b; // expected-error {{invalid operands to binary expression 
('float16x4_t' (vector of 4 'float16_t' values) and 'bfloat16x4_t' (vector of 4 
'bfloat16_t' values))}}
+  c - b; // expected-error {{invalid operands to binary expression 
('float16x4_t' (vector of 4 'float16_t' values) and 'bfloat16x4_t' (vector of 4 
'bfloat16_t' values))}}
+  c * b; // expected-error {{invalid operands to binary expression 
('float16x4_t' (vector of 4 'float16_t' values) and 'bfloat16x4_t' (vector of 4 
'bfloat16_t' values))}}
+  c / b; // expected-error {{invalid operands to binary expression 
('float16x4_t' (vector of 4 'float16_t' values) and 'bfloat16x4_t' (vector of 4 
'bfloat16_t' values))}}
+}



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-11-06 Thread Simon Tatham via cfe-commits

statham-arm wrote:

@petrhosek, do you have any further comments? I'll merge this change based on 
@MaskRay's approval if I haven't heard back in another week.

https://github.com/llvm/llvm-project/pull/69447
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-11-14 Thread Simon Tatham via cfe-commits

statham-arm wrote:

> my only concern is to make sure we don't unintentionally make it harder to 
> integrate potential future extensions such as the mutually dependent groups.

Hmmm. So if you had both ME and MD groups, you might also need a _group_ to be 
able to be a member of another group? That way you could specify hierarchies 
such as "must have all of: A, B, and exactly one of C,D" (a MD group one of 
whose members is a ME group), or "must have at most one of: (all of A,B,C) or 
(all of U,V,W)" (a ME group containing MD groups).

I suppose that makes sense, and the only change it needs to your structure is 
that maybe later a group record might also need to have a `Group:` or `Parent:` 
header. But there's no need to put that part in now, only to make sure there's 
room to add it in future if needed.

Would you accept `Type: Exclusive` instead of `Exclusive: True`? It seems more 
plausible to me that there might be three kinds of group that _can't_ go 
together than three group-type flags that you can have in any combination.

> although that may not necessarily be a bad thing since you could also warn if 
> someone accidentally tries to use a group that wasn't previously defined 
> (e.g. when making a typo).

That is true.

https://github.com/llvm/llvm-project/pull/69447
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-11-16 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm updated 
https://github.com/llvm/llvm-project/pull/69447

>From 2a65ae75e8c8e62e7275a439849837919599e896 Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Thu, 14 Sep 2023 14:51:17 +0100
Subject: [PATCH 1/4] [Driver] Add ExclusiveGroup feature to multilib.yaml.

This allows a YAML-based multilib configuration to specify explicitly
that a subset of its library directories are alternatives to each
other, i.e. at most one of that subset should be selected.

So if you have multiple sysroots each including a full set of headers
and libraries, you can mark them as members of the same
ExclusiveGroup, and then you'll be sure that only one of them is
selected, even if two or more are compatible with the compile options.

This is particularly important in multilib setups including the libc++
headers, where selecting the include directories from two different
sysroots can cause an actual build failure. This occurs when including
, for example: libc++'s stdio.h is included first, and will
try to use `#include_next` to fetch the underlying libc's version. But
if there are two include directories from separate multilibs, then
both of their C++ include directories will end up on the include path
first, followed by both the C directories. So the `#include_next` from
the first libc++ stdio.h will include the second libc++ stdio.h, which
will do nothing because it has the same include guard macro, and the
libc header won't ever be included at all.

If more than one of the options in an ExclusiveGroup matches the given
flags, the last one wins.
---
 clang/include/clang/Driver/Multilib.h | 16 -
 clang/lib/Driver/Multilib.cpp | 49 ++---
 .../baremetal-multilib-exclusive-group.yaml   | 69 +++
 3 files changed, 122 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml

diff --git a/clang/include/clang/Driver/Multilib.h 
b/clang/include/clang/Driver/Multilib.h
index 1416559414f894b..6a9533e6dd831f1 100644
--- a/clang/include/clang/Driver/Multilib.h
+++ b/clang/include/clang/Driver/Multilib.h
@@ -39,13 +39,22 @@ class Multilib {
   std::string IncludeSuffix;
   flags_list Flags;
 
+  // Optionally, a multilib can be assigned a string tag indicating that it's
+  // part of a group of mutually exclusive possibilities. If two or more
+  // multilibs have the same non-empty value of ExclusiveGroup, then only the
+  // last matching one of them will be selected.
+  //
+  // Setting this to the empty string is a special case, indicating that the
+  // directory is not mutually exclusive with anything else.
+  std::string ExclusiveGroup;
+
 public:
   /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the
   /// sysroot string so they must either be empty or begin with a '/' 
character.
   /// This is enforced with an assert in the constructor.
   Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {},
-   StringRef IncludeSuffix = {},
-   const flags_list &Flags = flags_list());
+   StringRef IncludeSuffix = {}, const flags_list &Flags = 
flags_list(),
+   StringRef ExclusiveGroup = {});
 
   /// Get the detected GCC installation path suffix for the multi-arch
   /// target variant. Always starts with a '/', unless empty
@@ -63,6 +72,9 @@ class Multilib {
   /// All elements begin with either '-' or '!'
   const flags_list &flags() const { return Flags; }
 
+  /// Get the exclusive group label.
+  const std::string &exclusiveGroup() const { return ExclusiveGroup; }
+
   LLVM_DUMP_METHOD void dump() const;
   /// print summary of the Multilib
   void print(raw_ostream &OS) const;
diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp
index 48a494d9fa38db5..085ccee7b25752e 100644
--- a/clang/lib/Driver/Multilib.cpp
+++ b/clang/lib/Driver/Multilib.cpp
@@ -29,9 +29,10 @@ using namespace driver;
 using namespace llvm::sys;
 
 Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix,
-   StringRef IncludeSuffix, const flags_list &Flags)
+   StringRef IncludeSuffix, const flags_list &Flags,
+   StringRef ExclusiveGroup)
 : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix),
-  Flags(Flags) {
+  Flags(Flags), ExclusiveGroup(ExclusiveGroup) {
   assert(GCCSuffix.empty() ||
  (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1));
   assert(OSSuffix.empty() ||
@@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags,
  llvm::SmallVector &Selected) const {
   llvm::StringSet<> FlagSet(expandFlags(Flags));
   Selected.clear();
-  llvm::copy_if(Multilibs, std::back_inserter(Selected),
-[&FlagSet](const Multilib &M) {
-  for (const std::string &F : M.flags())
-if (!FlagSet.contains(F))
-  return false;
-  return 

[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-11-16 Thread Simon Tatham via cfe-commits

statham-arm wrote:

OK, here's a version with the syntax that way. I've added another test to 
demonstrate the new error checks.

The implementation of exclusion is still done by having an `ExclusiveGroup` 
field in the actual `Multilib` class. Implementing mutually-dependent groups or 
nested groups is enough extra effort that I'd rather leave it until we actually 
need it! But now the user-facing syntax in `multilib.yaml` is futureproof 
against wanting to add those features later, so _only_ the implementation 
should need to change.

https://github.com/llvm/llvm-project/pull/69447
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-11-16 Thread Simon Tatham via cfe-commits

statham-arm wrote:

(btw, that `squash!` commit contains the revised commit message I plan to put 
on the final version, so I need to not forget to do the squash by hand to get 
that right)

https://github.com/llvm/llvm-project/pull/69447
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-10-30 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm updated 
https://github.com/llvm/llvm-project/pull/69447

>From 2a65ae75e8c8e62e7275a439849837919599e896 Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Thu, 14 Sep 2023 14:51:17 +0100
Subject: [PATCH 1/3] [Driver] Add ExclusiveGroup feature to multilib.yaml.

This allows a YAML-based multilib configuration to specify explicitly
that a subset of its library directories are alternatives to each
other, i.e. at most one of that subset should be selected.

So if you have multiple sysroots each including a full set of headers
and libraries, you can mark them as members of the same
ExclusiveGroup, and then you'll be sure that only one of them is
selected, even if two or more are compatible with the compile options.

This is particularly important in multilib setups including the libc++
headers, where selecting the include directories from two different
sysroots can cause an actual build failure. This occurs when including
, for example: libc++'s stdio.h is included first, and will
try to use `#include_next` to fetch the underlying libc's version. But
if there are two include directories from separate multilibs, then
both of their C++ include directories will end up on the include path
first, followed by both the C directories. So the `#include_next` from
the first libc++ stdio.h will include the second libc++ stdio.h, which
will do nothing because it has the same include guard macro, and the
libc header won't ever be included at all.

If more than one of the options in an ExclusiveGroup matches the given
flags, the last one wins.
---
 clang/include/clang/Driver/Multilib.h | 16 -
 clang/lib/Driver/Multilib.cpp | 49 ++---
 .../baremetal-multilib-exclusive-group.yaml   | 69 +++
 3 files changed, 122 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml

diff --git a/clang/include/clang/Driver/Multilib.h 
b/clang/include/clang/Driver/Multilib.h
index 1416559414f894b..6a9533e6dd831f1 100644
--- a/clang/include/clang/Driver/Multilib.h
+++ b/clang/include/clang/Driver/Multilib.h
@@ -39,13 +39,22 @@ class Multilib {
   std::string IncludeSuffix;
   flags_list Flags;
 
+  // Optionally, a multilib can be assigned a string tag indicating that it's
+  // part of a group of mutually exclusive possibilities. If two or more
+  // multilibs have the same non-empty value of ExclusiveGroup, then only the
+  // last matching one of them will be selected.
+  //
+  // Setting this to the empty string is a special case, indicating that the
+  // directory is not mutually exclusive with anything else.
+  std::string ExclusiveGroup;
+
 public:
   /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the
   /// sysroot string so they must either be empty or begin with a '/' 
character.
   /// This is enforced with an assert in the constructor.
   Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {},
-   StringRef IncludeSuffix = {},
-   const flags_list &Flags = flags_list());
+   StringRef IncludeSuffix = {}, const flags_list &Flags = 
flags_list(),
+   StringRef ExclusiveGroup = {});
 
   /// Get the detected GCC installation path suffix for the multi-arch
   /// target variant. Always starts with a '/', unless empty
@@ -63,6 +72,9 @@ class Multilib {
   /// All elements begin with either '-' or '!'
   const flags_list &flags() const { return Flags; }
 
+  /// Get the exclusive group label.
+  const std::string &exclusiveGroup() const { return ExclusiveGroup; }
+
   LLVM_DUMP_METHOD void dump() const;
   /// print summary of the Multilib
   void print(raw_ostream &OS) const;
diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp
index 48a494d9fa38db5..085ccee7b25752e 100644
--- a/clang/lib/Driver/Multilib.cpp
+++ b/clang/lib/Driver/Multilib.cpp
@@ -29,9 +29,10 @@ using namespace driver;
 using namespace llvm::sys;
 
 Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix,
-   StringRef IncludeSuffix, const flags_list &Flags)
+   StringRef IncludeSuffix, const flags_list &Flags,
+   StringRef ExclusiveGroup)
 : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix),
-  Flags(Flags) {
+  Flags(Flags), ExclusiveGroup(ExclusiveGroup) {
   assert(GCCSuffix.empty() ||
  (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1));
   assert(OSSuffix.empty() ||
@@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags,
  llvm::SmallVector &Selected) const {
   llvm::StringSet<> FlagSet(expandFlags(Flags));
   Selected.clear();
-  llvm::copy_if(Multilibs, std::back_inserter(Selected),
-[&FlagSet](const Multilib &M) {
-  for (const std::string &F : M.flags())
-if (!FlagSet.contains(F))
-  return false;
-  return 

[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-10-30 Thread Simon Tatham via cfe-commits


@@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags,
  llvm::SmallVector &Selected) const {
   llvm::StringSet<> FlagSet(expandFlags(Flags));
   Selected.clear();
-  llvm::copy_if(Multilibs, std::back_inserter(Selected),
-[&FlagSet](const Multilib &M) {
-  for (const std::string &F : M.flags())
-if (!FlagSet.contains(F))
-  return false;
-  return true;
-});
+
+  // Decide which multilibs we're going to select at all
+  std::vector IsSelected(Multilibs.size(), false);
+  std::map ExclusiveGroupMembers;
+  for (size_t i = 0, e = Multilibs.size(); i < e; ++i) {
+const Multilib &M = Multilibs[i];
+
+// If this multilib doesn't match all our flags, don't select it
+if (!llvm::all_of(M.flags(), [&FlagSet](const std::string &F) {
+  return FlagSet.contains(F);
+}))
+  continue;
+
+// If this multilib has the same ExclusiveGroup as one we've already
+// selected, de-select the previous one
+const std::string &group = M.exclusiveGroup();
+if (!group.empty()) {

statham-arm wrote:

`insert`, actually – `try_emplace` appears in `DenseMap` but not `DenseSet`. 
But otherwise, done.

https://github.com/llvm/llvm-project/pull/69447
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-10-30 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm edited 
https://github.com/llvm/llvm-project/pull/69447
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-12-01 Thread Simon Tatham via cfe-commits


@@ -138,10 +164,34 @@ static const VersionTuple MultilibVersionCurrent(1, 0);
 struct MultilibSerialization {
   std::string Dir;
   std::vector Flags;
+  std::string Group;
+};
+
+struct MultilibGroupSerialization {
+  /*
+   * Future directions:
+   *
+   * If it's needed in future, we could introduce additional group types by
+   * permitting Type to contain strings other than "Exclusive". Another
+   * possibility is a group of library directories that are mutually
+   * _dependent_ rather than mutually exclusive: if you include one you must
+   * include them all.
+   *
+   * It might also be useful to allow groups to be members of other groups, so
+   * that a mutually exclusive group could contain a mutually dependent set of
+   * library directories, or vice versa.
+   *
+   * These additional features would need changes in the implementation, but
+   * the YAML schema is set up so they can be added without requiring changes
+   * in existing users' multilib.yaml files.
+   */
+  std::string Name;
+  std::string Type;

statham-arm wrote:

Yes, apparently we can. I hadn't found that part of the `llvm::yaml` API yet, 
but defining a `ScalarEnumerationTraits` for the enum type seems to be the way 
to make it Just Work during decoding. Thanks.

https://github.com/llvm/llvm-project/pull/69447
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-12-01 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm updated 
https://github.com/llvm/llvm-project/pull/69447

>From 1140903195e555643ee1a6b9f671b47b0c307f9e Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Thu, 14 Sep 2023 14:51:17 +0100
Subject: [PATCH] [Driver] Add ExclusiveGroup feature to multilib.yaml.

This allows a YAML-based multilib configuration to specify explicitly
that a subset of its library directories are alternatives to each
other, i.e. at most one of that subset should be selected.

So if you have multiple sysroots each including a full set of headers
and libraries, you can mark them as members of the same mutually
exclusive group, and then you'll be sure that only one of them is
selected, even if two or more are compatible with the compile options.

This is particularly important in multilib setups including the libc++
headers, where selecting the include directories from two different
sysroots can cause an actual build failure. This occurs when including
, for example: libc++'s stdio.h is included first, and will
try to use `#include_next` to fetch the underlying libc's version. But
if there are two include directories from separate multilibs, then
both of their C++ include directories will end up on the include path
first, followed by both the C directories. So the `#include_next` from
the first libc++ stdio.h will include the second libc++ stdio.h, which
will do nothing because it has the same include guard macro, and the
libc header won't ever be included at all.

If more than one of the options in an exclusive group matches the
given flags, the last one wins.

The syntax for specifying this in multilib.yaml is to define a Groups
section in which you specify your group names, and for each one,
declare it to have Type: Exclusive. (This reserves space in the syntax
for maybe adding other group types later, such as a group of mutually
_dependent_ things that you must have all or none of.) Then each
Variant record that's a member of a group has a Group: property giving
that group's name.
---
 clang/include/clang/Driver/Multilib.h |  16 ++-
 clang/lib/Driver/Multilib.cpp | 108 --
 .../baremetal-multilib-exclusive-group.yaml   |  79 +
 .../baremetal-multilib-group-error.yaml   |  27 +
 4 files changed, 218 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml
 create mode 100644 clang/test/Driver/baremetal-multilib-group-error.yaml

diff --git a/clang/include/clang/Driver/Multilib.h 
b/clang/include/clang/Driver/Multilib.h
index 1416559414f894b..6a9533e6dd831f1 100644
--- a/clang/include/clang/Driver/Multilib.h
+++ b/clang/include/clang/Driver/Multilib.h
@@ -39,13 +39,22 @@ class Multilib {
   std::string IncludeSuffix;
   flags_list Flags;
 
+  // Optionally, a multilib can be assigned a string tag indicating that it's
+  // part of a group of mutually exclusive possibilities. If two or more
+  // multilibs have the same non-empty value of ExclusiveGroup, then only the
+  // last matching one of them will be selected.
+  //
+  // Setting this to the empty string is a special case, indicating that the
+  // directory is not mutually exclusive with anything else.
+  std::string ExclusiveGroup;
+
 public:
   /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the
   /// sysroot string so they must either be empty or begin with a '/' 
character.
   /// This is enforced with an assert in the constructor.
   Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {},
-   StringRef IncludeSuffix = {},
-   const flags_list &Flags = flags_list());
+   StringRef IncludeSuffix = {}, const flags_list &Flags = 
flags_list(),
+   StringRef ExclusiveGroup = {});
 
   /// Get the detected GCC installation path suffix for the multi-arch
   /// target variant. Always starts with a '/', unless empty
@@ -63,6 +72,9 @@ class Multilib {
   /// All elements begin with either '-' or '!'
   const flags_list &flags() const { return Flags; }
 
+  /// Get the exclusive group label.
+  const std::string &exclusiveGroup() const { return ExclusiveGroup; }
+
   LLVM_DUMP_METHOD void dump() const;
   /// print summary of the Multilib
   void print(raw_ostream &OS) const;
diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp
index 48a494d9fa38db5..7681c1a3ce6756f 100644
--- a/clang/lib/Driver/Multilib.cpp
+++ b/clang/lib/Driver/Multilib.cpp
@@ -9,6 +9,7 @@
 #include "clang/Driver/Multilib.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/Version.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Compiler.h"
@@ -29,9 +30,10 @@ using namespace driver;
 using namespace llvm::sys;
 
 Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix,
-   StringRef IncludeSuffix, const flags_list &Flags)
+   StringRef IncludeSuffix, const flags_list &Flags,

[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-12-01 Thread Simon Tatham via cfe-commits

statham-arm wrote:

(This final force-push is the squashed version of the previous stack, rebased 
to the current head of `main`, so that the builder can run a last test. Thanks 
both for the approvals; I'll merge it once the tests have finished.)

https://github.com/llvm/llvm-project/pull/69447
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-12-01 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm closed 
https://github.com/llvm/llvm-project/pull/69447
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[libunwind] 43c84e4 - [libunwind, EHABI, ARM] Fix get/set of RA_AUTH_CODE.

2022-06-27 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2022-06-27T09:36:21+01:00
New Revision: 43c84e463426ca35fe9fc2d38063d75fed944f23

URL: 
https://github.com/llvm/llvm-project/commit/43c84e463426ca35fe9fc2d38063d75fed944f23
DIFF: 
https://github.com/llvm/llvm-project/commit/43c84e463426ca35fe9fc2d38063d75fed944f23.diff

LOG: [libunwind,EHABI,ARM] Fix get/set of RA_AUTH_CODE.

According to EHABI32 §8.5.2, the PAC for the return address of a
function described in an exception table is supposed to be addressed
in the _Unwind_VRS_{Get,Set} API by setting regclass=_UVRSC_PSEUDO and
regno=0. (The space of 'regno' values is independent for each
regclass, and for _UVRSC_PSEUDO, there is only one valid regno so far.)

That is indeed what libunwind's _Unwind_VRS_{Get,Set} functions expect
to receive. But at two call sites, the wrong values are passed in:
regno is being set to UNW_ARM_RA_AUTH_CODE (0x8F) instead of 0, and in
one case, regclass is _UVRSC_CORE instead of _UVRSC_PSEUDO.

As a result, those calls to _Unwind_VRS_{Get,Set} return
_UVRSR_FAILED, which their callers ignore. So if you compile in the
AUTG instruction that actually validates the PAC, it will try to
validate what's effectively an uninitialised register as an
authentication code, and trigger a CPU fault even on correct exception
unwinding.

Reviewed By: danielkiss

Differential Revision: https://reviews.llvm.org/D128522

Added: 


Modified: 
libunwind/src/Unwind-EHABI.cpp

Removed: 




diff  --git a/libunwind/src/Unwind-EHABI.cpp b/libunwind/src/Unwind-EHABI.cpp
index 6ac09adfb8fe..f203887567b6 100644
--- a/libunwind/src/Unwind-EHABI.cpp
+++ b/libunwind/src/Unwind-EHABI.cpp
@@ -432,8 +432,7 @@ _Unwind_VRS_Interpret(_Unwind_Context *context, const 
uint32_t *data,
   uint32_t sp;
   uint32_t pac;
   _Unwind_VRS_Get(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp);
-  _Unwind_VRS_Get(context, _UVRSC_PSEUDO, UNW_ARM_RA_AUTH_CODE,
-  _UVRSD_UINT32, &pac);
+  _Unwind_VRS_Get(context, _UVRSC_PSEUDO, 0, _UVRSD_UINT32, &pac);
   __asm__ __volatile__("autg %0, %1, %2" : : "r"(pac), "r"(lr), "r"(sp) :);
 }
 #else
@@ -1138,8 +1137,7 @@ _Unwind_VRS_Pop(_Unwind_Context *context, 
_Unwind_VRS_RegClass regclass,
   }
   uint32_t pac = *sp++;
   _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_SP, _UVRSD_UINT32, &sp);
-  return _Unwind_VRS_Set(context, _UVRSC_CORE, UNW_ARM_RA_AUTH_CODE,
- _UVRSD_UINT32, &pac);
+  return _Unwind_VRS_Set(context, _UVRSC_PSEUDO, 0, _UVRSD_UINT32, &pac);
 }
   }
   _LIBUNWIND_ABORT("unsupported register class");



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] cef56d5 - [clang] Change set type used for SourceLocation.

2021-07-19 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2021-07-19T13:36:36+01:00
New Revision: cef56d58dbbb3bc993531c14af5e3edd2841029d

URL: 
https://github.com/llvm/llvm-project/commit/cef56d58dbbb3bc993531c14af5e3edd2841029d
DIFF: 
https://github.com/llvm/llvm-project/commit/cef56d58dbbb3bc993531c14af5e3edd2841029d.diff

LOG: [clang] Change set type used for SourceLocation.

This is part of a patch series working towards the ability to make
SourceLocation into a 64-bit type to handle larger translation units.

If clang is built for a 32-bit platform and SourceLocation is 64 bits
wide, then a SourceLocation will be larger than a pointer, so it won't
be possible to keep them in a SmallPtrSet any more. Switch to
SmallDenseSet instead.

Patch originally by Mikhail Maltsev.

Differential Revision: https://reviews.llvm.org/D105493

Added: 


Modified: 
clang/include/clang/Basic/SourceLocation.h
clang/include/clang/Lex/Preprocessor.h

Removed: 




diff  --git a/clang/include/clang/Basic/SourceLocation.h 
b/clang/include/clang/Basic/SourceLocation.h
index fc722b1d563db..0ba0f9bd3ddf2 100644
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -16,7 +16,6 @@
 
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/PointerLikeTypeTraits.h"
 #include 
 #include 
 #include 
@@ -510,20 +509,6 @@ namespace llvm {
 static void Profile(const clang::SourceLocation &X, FoldingSetNodeID &ID);
   };
 
-  // Teach SmallPtrSet how to handle SourceLocation.
-  template<>
-  struct PointerLikeTypeTraits {
-static constexpr int NumLowBitsAvailable = 0;
-
-static void *getAsVoidPointer(clang::SourceLocation L) {
-  return L.getPtrEncoding();
-}
-
-static clang::SourceLocation getFromVoidPointer(void *P) {
-  return clang::SourceLocation::getFromRawEncoding((unsigned)(uintptr_t)P);
-}
-  };
-
 } // namespace llvm
 
 #endif // LLVM_CLANG_BASIC_SOURCELOCATION_H

diff  --git a/clang/include/clang/Lex/Preprocessor.h 
b/clang/include/clang/Lex/Preprocessor.h
index be345d4f5b4ea..7ab13640ce2c0 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -783,8 +783,7 @@ class Preprocessor {
   /// deserializing from PCH, we don't need to deserialize identifier & macros
   /// just so that we can report that they are unused, we just warn using
   /// the SourceLocations of this set (that will be filled by the ASTReader).
-  /// We are using SmallPtrSet instead of a vector for faster removal.
-  using WarnUnusedMacroLocsTy = llvm::SmallPtrSet;
+  using WarnUnusedMacroLocsTy = llvm::SmallDenseSet;
   WarnUnusedMacroLocsTy WarnUnusedMacroLocs;
 
   /// A "freelist" of MacroArg objects that can be



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 21401a7 - [clang] Introduce SourceLocation::[U]IntTy typedefs.

2021-07-21 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2021-07-21T10:45:46+01:00
New Revision: 21401a72629cc591bab7ec6816f03e6c550f3fb3

URL: 
https://github.com/llvm/llvm-project/commit/21401a72629cc591bab7ec6816f03e6c550f3fb3
DIFF: 
https://github.com/llvm/llvm-project/commit/21401a72629cc591bab7ec6816f03e6c550f3fb3.diff

LOG: [clang] Introduce SourceLocation::[U]IntTy typedefs.

This is part of a patch series working towards the ability to make
SourceLocation into a 64-bit type to handle larger translation units.

NFC: this patch introduces typedefs for the integer type used by
SourceLocation and makes all the boring changes to use the typedefs
everywhere, but for the moment, they are unconditionally defined to
uint32_t.

Patch originally by Mikhail Maltsev.

Reviewed By: tmatheson

Differential Revision: https://reviews.llvm.org/D105492

Added: 


Modified: 
clang/include/clang/AST/DeclarationName.h
clang/include/clang/Basic/SourceLocation.h
clang/include/clang/Basic/SourceManager.h
clang/include/clang/Lex/Token.h
clang/include/clang/Serialization/ASTBitCodes.h
clang/include/clang/Serialization/ASTReader.h
clang/include/clang/Serialization/ASTWriter.h
clang/include/clang/Serialization/ModuleFile.h
clang/lib/ARCMigrate/TransEmptyStatementsAndDealloc.cpp
clang/lib/AST/NestedNameSpecifier.cpp
clang/lib/Basic/SourceLocation.cpp
clang/lib/Basic/SourceManager.cpp
clang/lib/CodeGen/CGOpenMPRuntime.cpp
clang/lib/Lex/Lexer.cpp
clang/lib/Lex/ModuleMap.cpp
clang/lib/Lex/PPCaching.cpp
clang/lib/Lex/TokenLexer.cpp
clang/lib/Serialization/ASTReader.cpp
clang/lib/Serialization/ASTWriter.cpp
clang/tools/libclang/CIndex.cpp

Removed: 




diff  --git a/clang/include/clang/AST/DeclarationName.h 
b/clang/include/clang/AST/DeclarationName.h
index acf7e243da46b..38da6fc727fbd 100644
--- a/clang/include/clang/AST/DeclarationName.h
+++ b/clang/include/clang/AST/DeclarationName.h
@@ -660,13 +660,13 @@ class DeclarationNameLoc {
 
   // The location (if any) of the operator keyword is stored elsewhere.
   struct CXXOpName {
-unsigned BeginOpNameLoc;
-unsigned EndOpNameLoc;
+SourceLocation::UIntTy BeginOpNameLoc;
+SourceLocation::UIntTy EndOpNameLoc;
   };
 
   // The location (if any) of the operator keyword is stored elsewhere.
   struct CXXLitOpName {
-unsigned OpNameLoc;
+SourceLocation::UIntTy OpNameLoc;
   };
 
   // struct {} CXXUsingDirective;

diff  --git a/clang/include/clang/Basic/SourceLocation.h 
b/clang/include/clang/Basic/SourceLocation.h
index 0ba0f9bd3ddf2..540de23b9f55e 100644
--- a/clang/include/clang/Basic/SourceLocation.h
+++ b/clang/include/clang/Basic/SourceLocation.h
@@ -91,11 +91,14 @@ class SourceLocation {
   friend class SourceManager;
   friend struct llvm::FoldingSetTrait;
 
-  unsigned ID = 0;
+public:
+  using UIntTy = uint32_t;
+  using IntTy = int32_t;
 
-  enum : unsigned {
-MacroIDBit = 1U << 31
-  };
+private:
+  UIntTy ID = 0;
+
+  enum : UIntTy { MacroIDBit = 1ULL << (8 * sizeof(UIntTy) - 1) };
 
 public:
   bool isFileID() const  { return (ID & MacroIDBit) == 0; }
@@ -111,18 +114,16 @@ class SourceLocation {
 
 private:
   /// Return the offset into the manager's global input view.
-  unsigned getOffset() const {
-return ID & ~MacroIDBit;
-  }
+  UIntTy getOffset() const { return ID & ~MacroIDBit; }
 
-  static SourceLocation getFileLoc(unsigned ID) {
+  static SourceLocation getFileLoc(UIntTy ID) {
 assert((ID & MacroIDBit) == 0 && "Ran out of source locations!");
 SourceLocation L;
 L.ID = ID;
 return L;
   }
 
-  static SourceLocation getMacroLoc(unsigned ID) {
+  static SourceLocation getMacroLoc(UIntTy ID) {
 assert((ID & MacroIDBit) == 0 && "Ran out of source locations!");
 SourceLocation L;
 L.ID = MacroIDBit | ID;
@@ -132,7 +133,7 @@ class SourceLocation {
 public:
   /// Return a source location with the specified offset from this
   /// SourceLocation.
-  SourceLocation getLocWithOffset(int Offset) const {
+  SourceLocation getLocWithOffset(IntTy Offset) const {
 assert(((getOffset()+Offset) & MacroIDBit) == 0 && "offset overflow");
 SourceLocation L;
 L.ID = ID+Offset;
@@ -144,13 +145,13 @@ class SourceLocation {
   ///
   /// This should only be passed to SourceLocation::getFromRawEncoding, it
   /// should not be inspected directly.
-  unsigned getRawEncoding() const { return ID; }
+  UIntTy getRawEncoding() const { return ID; }
 
   /// Turn a raw encoding of a SourceLocation object into
   /// a real SourceLocation.
   ///
   /// \see getRawEncoding.
-  static SourceLocation getFromRawEncoding(unsigned Encoding) {
+  static SourceLocation getFromRawEncoding(UIntTy Encoding) {
 SourceLocation X;
 X.ID = Encoding;
 return X;
@@ -170,7 +171,7 @@ class SourceLocation {
   /// Turn a pointer encoding of a SourceLocation object back
   /// into a real SourceL

[clang] bd41136 - [clang] Use i64 for the !srcloc metadata on asm IR nodes.

2021-07-22 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2021-07-22T10:24:52+01:00
New Revision: bd41136746a0b47882914cee5a8d1ac6714288d1

URL: 
https://github.com/llvm/llvm-project/commit/bd41136746a0b47882914cee5a8d1ac6714288d1
DIFF: 
https://github.com/llvm/llvm-project/commit/bd41136746a0b47882914cee5a8d1ac6714288d1.diff

LOG: [clang] Use i64 for the !srcloc metadata on asm IR nodes.

This is part of a patch series working towards the ability to make
SourceLocation into a 64-bit type to handle larger translation units.

!srcloc is generated in clang codegen, and pulled back out by llvm
functions like AsmPrinter::emitInlineAsm that need to report errors in
the inline asm. From there it goes to LLVMContext::emitError, is
stored in DiagnosticInfoInlineAsm, and ends up back in clang, at
BackendConsumer::InlineAsmDiagHandler(), which reconstitutes a true
clang::SourceLocation from the integer cookie.

Throughout this code path, it's now 64-bit rather than 32, which means
that if SourceLocation is expanded to a 64-bit type, this error report
won't lose half of the data.

The compiler will tolerate both of i32 and i64 !srcloc metadata in
input IR without faulting. Test added in llvm/MC. (The semantic
accuracy of the metadata is another matter, but I don't know of any
situation where that matters: if you're reading an IR file written by
a previous run of clang, you don't have the SourceManager that can
relate those source locations back to the original source files.)

Original version of the patch by Mikhail Maltsev.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D105491

Added: 


Modified: 
clang/lib/CodeGen/CGStmt.cpp
llvm/include/llvm/IR/DiagnosticInfo.h
llvm/include/llvm/IR/LLVMContext.h
llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
llvm/lib/CodeGen/MachineInstr.cpp
llvm/lib/IR/LLVMContext.cpp
llvm/test/MC/ARM/inline-asm-srcloc.ll

Removed: 




diff  --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index 6f6dcfa58a7f1..aeb319ca15819 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -2158,7 +2158,7 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral 
*Str,
   SmallVector Locs;
   // Add the location of the first line to the MDNode.
   Locs.push_back(llvm::ConstantAsMetadata::get(llvm::ConstantInt::get(
-  CGF.Int32Ty, Str->getBeginLoc().getRawEncoding(;
+  CGF.Int64Ty, Str->getBeginLoc().getRawEncoding(;
   StringRef StrVal = Str->getString();
   if (!StrVal.empty()) {
 const SourceManager &SM = CGF.CGM.getContext().getSourceManager();
@@ -2173,7 +2173,7 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral 
*Str,
   SourceLocation LineLoc = Str->getLocationOfByte(
   i + 1, SM, LangOpts, CGF.getTarget(), &StartToken, &ByteOffset);
   Locs.push_back(llvm::ConstantAsMetadata::get(
-  llvm::ConstantInt::get(CGF.Int32Ty, LineLoc.getRawEncoding(;
+  llvm::ConstantInt::get(CGF.Int64Ty, LineLoc.getRawEncoding(;
 }
   }
 
@@ -2210,8 +2210,8 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, 
bool HasSideEffect,
getAsmSrcLocInfo(gccAsmStmt->getAsmString(), CGF));
   else {
 // At least put the line number on MS inline asm blobs.
-llvm::Constant *Loc = llvm::ConstantInt::get(CGF.Int32Ty,
-S.getAsmLoc().getRawEncoding());
+llvm::Constant *Loc =
+llvm::ConstantInt::get(CGF.Int64Ty, S.getAsmLoc().getRawEncoding());
 Result.setMetadata("srcloc",
llvm::MDNode::get(CGF.getLLVMContext(),
  llvm::ConstantAsMetadata::get(Loc)));

diff  --git a/llvm/include/llvm/IR/DiagnosticInfo.h 
b/llvm/include/llvm/IR/DiagnosticInfo.h
index 9134ca12600b2..5064f4f4edf77 100644
--- a/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -131,7 +131,7 @@ using DiagnosticHandlerFunction = std::function;
 class DiagnosticInfoInlineAsm : public DiagnosticInfo {
 private:
   /// Optional line information. 0 if not set.
-  unsigned LocCookie = 0;
+  uint64_t LocCookie = 0;
   /// Message to be reported.
   const Twine &MsgStr;
   /// Optional origin of the problem.
@@ -149,7 +149,7 @@ class DiagnosticInfoInlineAsm : public DiagnosticInfo {
   /// \p MsgStr gives the message.
   /// This class does not copy \p MsgStr, therefore the reference must be valid
   /// for the whole life time of the Diagnostic.
-  DiagnosticInfoInlineAsm(unsigned LocCookie, const Twine &MsgStr,
+  DiagnosticInfoInlineAsm(uint64_t LocCookie, const Twine &MsgStr,
   DiagnosticSeverity Severity = DS_Error)
   : DiagnosticInfo(DK_InlineAsm, Severity), LocCookie(LocCookie),
 MsgStr(MsgStr) {}
@@ -162,7 +162,7 @@ class DiagnosticInfoInlineAsm : public DiagnosticInfo {
   DiagnosticInfoInlineAsm(const Instructi

[clang] [Modules] No transitive source location change (PR #86912)

2024-04-02 Thread Simon Tatham via cfe-commits

statham-arm wrote:

> Let's see if @statham-arm (who introduced the `SourceLocation::[U]IntTy` 
> typedefs) wants to weight in here.

I'm afraid my knowledge of C++ modules is very close to zero. They were 
mentioned in a training course I did last year, but not in much detail.

On 64-bit SourceLocation in general: our patch series to implement those as an 
option in clang was never fully landed, because the second half of it stalled 
in review. I'd still like to see it finished off, though I'm sure it would need 
some vigorous rebasing and retesting by now. The 32-bit SourceLocation limit is 
a problem for at least some of our users, apparently because there's a library 
of header files that break the limit all by themselves. (I'm not sure how; I 
haven't seen them. Maybe by including each other multiple times with different 
`#define`s?)

But I have no idea whether the same considerations would apply to modules, 
because I don't really know enough about modules, sorry!

https://github.com/llvm/llvm-project/pull/86912
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-10-18 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm created 
https://github.com/llvm/llvm-project/pull/69447

This allows a YAML-based multilib configuration to specify explicitly that a 
subset of its library directories are alternatives to each other, i.e. at most 
one of that subset should be selected.

So if you have multiple sysroots each including a full set of headers and 
libraries, you can mark them as members of the same ExclusiveGroup, and then 
you'll be sure that only one of them is selected, even if two or more are 
compatible with the compile options.

This is particularly important in multilib setups including the libc++ headers, 
where selecting the include directories from two different sysroots can cause 
an actual build failure. This occurs when including , for example: 
libc++'s stdio.h is included first, and will try to use `#include_next` to 
fetch the underlying libc's version. But if there are two include directories 
from separate multilibs, then both of their C++ include directories will end up 
on the include path first, followed by both the C directories. So the 
`#include_next` from the first libc++ stdio.h will include the second libc++ 
stdio.h, which will do nothing because it has the same include guard macro, and 
the libc header won't ever be included at all.

If more than one of the options in an ExclusiveGroup matches the given flags, 
the last one wins.

>From 5b3289a7ad40850cbe1c438345a181b01c500639 Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Thu, 14 Sep 2023 14:51:17 +0100
Subject: [PATCH] [Driver] Add ExclusiveGroup feature to multilib.yaml.

This allows a YAML-based multilib configuration to specify explicitly
that a subset of its library directories are alternatives to each
other, i.e. at most one of that subset should be selected.

So if you have multiple sysroots each including a full set of headers
and libraries, you can mark them as members of the same
ExclusiveGroup, and then you'll be sure that only one of them is
selected, even if two or more are compatible with the compile options.

This is particularly important in multilib setups including the libc++
headers, where selecting the include directories from two different
sysroots can cause an actual build failure. This occurs when including
, for example: libc++'s stdio.h is included first, and will
try to use `#include_next` to fetch the underlying libc's version. But
if there are two include directories from separate multilibs, then
both of their C++ include directories will end up on the include path
first, followed by both the C directories. So the `#include_next` from
the first libc++ stdio.h will include the second libc++ stdio.h, which
will do nothing because it has the same include guard macro, and the
libc header won't ever be included at all.

If more than one of the options in an ExclusiveGroup matches the given
flags, the last one wins.
---
 clang/include/clang/Driver/Multilib.h | 15 +++-
 clang/lib/Driver/Multilib.cpp | 49 ++---
 .../baremetal-multilib-exclusive-group.yaml   | 69 +++
 3 files changed, 122 insertions(+), 11 deletions(-)
 create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml

diff --git a/clang/include/clang/Driver/Multilib.h 
b/clang/include/clang/Driver/Multilib.h
index 1416559414f894b..46f23a2ff5fabac 100644
--- a/clang/include/clang/Driver/Multilib.h
+++ b/clang/include/clang/Driver/Multilib.h
@@ -39,13 +39,23 @@ class Multilib {
   std::string IncludeSuffix;
   flags_list Flags;
 
+  // Optionally, a multilib can be assigned a string tag indicating that it's
+  // part of a group of mutually exclusive possibilities. If two or more
+  // multilibs have the same non-empty value of ExclusiveGroup, then only the
+  // last matching one of them will be selected.
+  //
+  // Setting this to the empty string is a special case, indicating that the
+  // directory is not mutually exclusive with anything else.
+  std::string ExclusiveGroup;
+
 public:
   /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the
   /// sysroot string so they must either be empty or begin with a '/' 
character.
   /// This is enforced with an assert in the constructor.
   Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {},
StringRef IncludeSuffix = {},
-   const flags_list &Flags = flags_list());
+   const flags_list &Flags = flags_list(),
+   StringRef ExclusiveGroup = {});
 
   /// Get the detected GCC installation path suffix for the multi-arch
   /// target variant. Always starts with a '/', unless empty
@@ -63,6 +73,9 @@ class Multilib {
   /// All elements begin with either '-' or '!'
   const flags_list &flags() const { return Flags; }
 
+  /// Get the exclusive group label.
+  const std::string &exclusiveGroup() const { return ExclusiveGroup; }
+
   LLVM_DUMP_METHOD void dump() const;
   /// print summary of the Multilib
   void print(raw_ostream &OS) const;
diff --git a/clang/lib/

[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-10-18 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm updated 
https://github.com/llvm/llvm-project/pull/69447

>From 3a0481134343339ce8132419fde875ac9977b734 Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Thu, 14 Sep 2023 14:51:17 +0100
Subject: [PATCH] [Driver] Add ExclusiveGroup feature to multilib.yaml.

This allows a YAML-based multilib configuration to specify explicitly
that a subset of its library directories are alternatives to each
other, i.e. at most one of that subset should be selected.

So if you have multiple sysroots each including a full set of headers
and libraries, you can mark them as members of the same
ExclusiveGroup, and then you'll be sure that only one of them is
selected, even if two or more are compatible with the compile options.

This is particularly important in multilib setups including the libc++
headers, where selecting the include directories from two different
sysroots can cause an actual build failure. This occurs when including
, for example: libc++'s stdio.h is included first, and will
try to use `#include_next` to fetch the underlying libc's version. But
if there are two include directories from separate multilibs, then
both of their C++ include directories will end up on the include path
first, followed by both the C directories. So the `#include_next` from
the first libc++ stdio.h will include the second libc++ stdio.h, which
will do nothing because it has the same include guard macro, and the
libc header won't ever be included at all.

If more than one of the options in an ExclusiveGroup matches the given
flags, the last one wins.
---
 clang/include/clang/Driver/Multilib.h | 16 -
 clang/lib/Driver/Multilib.cpp | 49 ++---
 .../baremetal-multilib-exclusive-group.yaml   | 69 +++
 3 files changed, 122 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml

diff --git a/clang/include/clang/Driver/Multilib.h 
b/clang/include/clang/Driver/Multilib.h
index 1416559414f894b..6a9533e6dd831f1 100644
--- a/clang/include/clang/Driver/Multilib.h
+++ b/clang/include/clang/Driver/Multilib.h
@@ -39,13 +39,22 @@ class Multilib {
   std::string IncludeSuffix;
   flags_list Flags;
 
+  // Optionally, a multilib can be assigned a string tag indicating that it's
+  // part of a group of mutually exclusive possibilities. If two or more
+  // multilibs have the same non-empty value of ExclusiveGroup, then only the
+  // last matching one of them will be selected.
+  //
+  // Setting this to the empty string is a special case, indicating that the
+  // directory is not mutually exclusive with anything else.
+  std::string ExclusiveGroup;
+
 public:
   /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the
   /// sysroot string so they must either be empty or begin with a '/' 
character.
   /// This is enforced with an assert in the constructor.
   Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {},
-   StringRef IncludeSuffix = {},
-   const flags_list &Flags = flags_list());
+   StringRef IncludeSuffix = {}, const flags_list &Flags = 
flags_list(),
+   StringRef ExclusiveGroup = {});
 
   /// Get the detected GCC installation path suffix for the multi-arch
   /// target variant. Always starts with a '/', unless empty
@@ -63,6 +72,9 @@ class Multilib {
   /// All elements begin with either '-' or '!'
   const flags_list &flags() const { return Flags; }
 
+  /// Get the exclusive group label.
+  const std::string &exclusiveGroup() const { return ExclusiveGroup; }
+
   LLVM_DUMP_METHOD void dump() const;
   /// print summary of the Multilib
   void print(raw_ostream &OS) const;
diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp
index ba466af39e2dcaf..a8eff30f1416852 100644
--- a/clang/lib/Driver/Multilib.cpp
+++ b/clang/lib/Driver/Multilib.cpp
@@ -29,9 +29,10 @@ using namespace driver;
 using namespace llvm::sys;
 
 Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix,
-   StringRef IncludeSuffix, const flags_list &Flags)
+   StringRef IncludeSuffix, const flags_list &Flags,
+   StringRef ExclusiveGroup)
 : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix),
-  Flags(Flags) {
+  Flags(Flags), ExclusiveGroup(ExclusiveGroup) {
   assert(GCCSuffix.empty() ||
  (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1));
   assert(OSSuffix.empty() ||
@@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags,
  llvm::SmallVector &Selected) const {
   llvm::StringSet<> FlagSet(expandFlags(Flags));
   Selected.clear();
-  llvm::copy_if(Multilibs, std::back_inserter(Selected),
-[&FlagSet](const Multilib &M) {
-  for (const std::string &F : M.flags())
-if (!FlagSet.contains(F))
-  return false;
-  return true

[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-10-24 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm updated 
https://github.com/llvm/llvm-project/pull/69447

>From e4d860c2968e4bf2e0ca198bdfe00dad4e985d40 Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Thu, 14 Sep 2023 14:51:17 +0100
Subject: [PATCH] [Driver] Add ExclusiveGroup feature to multilib.yaml.

This allows a YAML-based multilib configuration to specify explicitly
that a subset of its library directories are alternatives to each
other, i.e. at most one of that subset should be selected.

So if you have multiple sysroots each including a full set of headers
and libraries, you can mark them as members of the same
ExclusiveGroup, and then you'll be sure that only one of them is
selected, even if two or more are compatible with the compile options.

This is particularly important in multilib setups including the libc++
headers, where selecting the include directories from two different
sysroots can cause an actual build failure. This occurs when including
, for example: libc++'s stdio.h is included first, and will
try to use `#include_next` to fetch the underlying libc's version. But
if there are two include directories from separate multilibs, then
both of their C++ include directories will end up on the include path
first, followed by both the C directories. So the `#include_next` from
the first libc++ stdio.h will include the second libc++ stdio.h, which
will do nothing because it has the same include guard macro, and the
libc header won't ever be included at all.

If more than one of the options in an ExclusiveGroup matches the given
flags, the last one wins.
---
 clang/include/clang/Driver/Multilib.h | 16 -
 clang/lib/Driver/Multilib.cpp | 49 ++---
 .../baremetal-multilib-exclusive-group.yaml   | 69 +++
 3 files changed, 122 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml

diff --git a/clang/include/clang/Driver/Multilib.h 
b/clang/include/clang/Driver/Multilib.h
index 1416559414f894b..6a9533e6dd831f1 100644
--- a/clang/include/clang/Driver/Multilib.h
+++ b/clang/include/clang/Driver/Multilib.h
@@ -39,13 +39,22 @@ class Multilib {
   std::string IncludeSuffix;
   flags_list Flags;
 
+  // Optionally, a multilib can be assigned a string tag indicating that it's
+  // part of a group of mutually exclusive possibilities. If two or more
+  // multilibs have the same non-empty value of ExclusiveGroup, then only the
+  // last matching one of them will be selected.
+  //
+  // Setting this to the empty string is a special case, indicating that the
+  // directory is not mutually exclusive with anything else.
+  std::string ExclusiveGroup;
+
 public:
   /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the
   /// sysroot string so they must either be empty or begin with a '/' 
character.
   /// This is enforced with an assert in the constructor.
   Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {},
-   StringRef IncludeSuffix = {},
-   const flags_list &Flags = flags_list());
+   StringRef IncludeSuffix = {}, const flags_list &Flags = 
flags_list(),
+   StringRef ExclusiveGroup = {});
 
   /// Get the detected GCC installation path suffix for the multi-arch
   /// target variant. Always starts with a '/', unless empty
@@ -63,6 +72,9 @@ class Multilib {
   /// All elements begin with either '-' or '!'
   const flags_list &flags() const { return Flags; }
 
+  /// Get the exclusive group label.
+  const std::string &exclusiveGroup() const { return ExclusiveGroup; }
+
   LLVM_DUMP_METHOD void dump() const;
   /// print summary of the Multilib
   void print(raw_ostream &OS) const;
diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp
index 48a494d9fa38db5..085ccee7b25752e 100644
--- a/clang/lib/Driver/Multilib.cpp
+++ b/clang/lib/Driver/Multilib.cpp
@@ -29,9 +29,10 @@ using namespace driver;
 using namespace llvm::sys;
 
 Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix,
-   StringRef IncludeSuffix, const flags_list &Flags)
+   StringRef IncludeSuffix, const flags_list &Flags,
+   StringRef ExclusiveGroup)
 : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix),
-  Flags(Flags) {
+  Flags(Flags), ExclusiveGroup(ExclusiveGroup) {
   assert(GCCSuffix.empty() ||
  (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1));
   assert(OSSuffix.empty() ||
@@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags,
  llvm::SmallVector &Selected) const {
   llvm::StringSet<> FlagSet(expandFlags(Flags));
   Selected.clear();
-  llvm::copy_if(Multilibs, std::back_inserter(Selected),
-[&FlagSet](const Multilib &M) {
-  for (const std::string &F : M.flags())
-if (!FlagSet.contains(F))
-  return false;
-  return true

[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-10-24 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm updated 
https://github.com/llvm/llvm-project/pull/69447

>From 2a65ae75e8c8e62e7275a439849837919599e896 Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Thu, 14 Sep 2023 14:51:17 +0100
Subject: [PATCH] [Driver] Add ExclusiveGroup feature to multilib.yaml.

This allows a YAML-based multilib configuration to specify explicitly
that a subset of its library directories are alternatives to each
other, i.e. at most one of that subset should be selected.

So if you have multiple sysroots each including a full set of headers
and libraries, you can mark them as members of the same
ExclusiveGroup, and then you'll be sure that only one of them is
selected, even if two or more are compatible with the compile options.

This is particularly important in multilib setups including the libc++
headers, where selecting the include directories from two different
sysroots can cause an actual build failure. This occurs when including
, for example: libc++'s stdio.h is included first, and will
try to use `#include_next` to fetch the underlying libc's version. But
if there are two include directories from separate multilibs, then
both of their C++ include directories will end up on the include path
first, followed by both the C directories. So the `#include_next` from
the first libc++ stdio.h will include the second libc++ stdio.h, which
will do nothing because it has the same include guard macro, and the
libc header won't ever be included at all.

If more than one of the options in an ExclusiveGroup matches the given
flags, the last one wins.
---
 clang/include/clang/Driver/Multilib.h | 16 -
 clang/lib/Driver/Multilib.cpp | 49 ++---
 .../baremetal-multilib-exclusive-group.yaml   | 69 +++
 3 files changed, 122 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml

diff --git a/clang/include/clang/Driver/Multilib.h 
b/clang/include/clang/Driver/Multilib.h
index 1416559414f894b..6a9533e6dd831f1 100644
--- a/clang/include/clang/Driver/Multilib.h
+++ b/clang/include/clang/Driver/Multilib.h
@@ -39,13 +39,22 @@ class Multilib {
   std::string IncludeSuffix;
   flags_list Flags;
 
+  // Optionally, a multilib can be assigned a string tag indicating that it's
+  // part of a group of mutually exclusive possibilities. If two or more
+  // multilibs have the same non-empty value of ExclusiveGroup, then only the
+  // last matching one of them will be selected.
+  //
+  // Setting this to the empty string is a special case, indicating that the
+  // directory is not mutually exclusive with anything else.
+  std::string ExclusiveGroup;
+
 public:
   /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the
   /// sysroot string so they must either be empty or begin with a '/' 
character.
   /// This is enforced with an assert in the constructor.
   Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {},
-   StringRef IncludeSuffix = {},
-   const flags_list &Flags = flags_list());
+   StringRef IncludeSuffix = {}, const flags_list &Flags = 
flags_list(),
+   StringRef ExclusiveGroup = {});
 
   /// Get the detected GCC installation path suffix for the multi-arch
   /// target variant. Always starts with a '/', unless empty
@@ -63,6 +72,9 @@ class Multilib {
   /// All elements begin with either '-' or '!'
   const flags_list &flags() const { return Flags; }
 
+  /// Get the exclusive group label.
+  const std::string &exclusiveGroup() const { return ExclusiveGroup; }
+
   LLVM_DUMP_METHOD void dump() const;
   /// print summary of the Multilib
   void print(raw_ostream &OS) const;
diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp
index 48a494d9fa38db5..085ccee7b25752e 100644
--- a/clang/lib/Driver/Multilib.cpp
+++ b/clang/lib/Driver/Multilib.cpp
@@ -29,9 +29,10 @@ using namespace driver;
 using namespace llvm::sys;
 
 Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix,
-   StringRef IncludeSuffix, const flags_list &Flags)
+   StringRef IncludeSuffix, const flags_list &Flags,
+   StringRef ExclusiveGroup)
 : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix),
-  Flags(Flags) {
+  Flags(Flags), ExclusiveGroup(ExclusiveGroup) {
   assert(GCCSuffix.empty() ||
  (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1));
   assert(OSSuffix.empty() ||
@@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags,
  llvm::SmallVector &Selected) const {
   llvm::StringSet<> FlagSet(expandFlags(Flags));
   Selected.clear();
-  llvm::copy_if(Multilibs, std::back_inserter(Selected),
-[&FlagSet](const Multilib &M) {
-  for (const std::string &F : M.flags())
-if (!FlagSet.contains(F))
-  return false;
-  return true

[clang] [Driver] Add `--` to some test clang-cl command lines. (PR #70055)

2023-10-24 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm created 
https://github.com/llvm/llvm-project/pull/70055

If clang/test/Driver/cl-offload.cu is run on Unix in a directory whose absolute 
pathname starts with `/w`, such as the `/workspace` used by at least some 
Jenkins CI setups, then the file name on the clang command line is 
misinterpreted as some kind of MSVC warning-control option, and ignored by the 
catch-all `_SLASH_w` option in Options.td.

Other clang-cl tests take care to put a `--` before the input file name, to 
force clang to treat it as a filename even if it starts with a / and 
accidentally looks like a cl option. Do the same here.

>From e5b90488cdb8b2d8865c3ce434bd1adba16e0992 Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Tue, 24 Oct 2023 15:52:38 +0100
Subject: [PATCH] [Driver] Add `--` to some test clang-cl command lines.

If clang/test/Driver/cl-offload.cu is run on Unix in a directory whose
absolute pathname starts with `/w`, such as the `/workspace` used by
at least some Jenkins CI setups, then the file name on the clang
command line is misinterpreted as some kind of MSVC warning-control
option, and ignored by the catch-all `_SLASH_w` option in Options.td.

Other clang-cl tests take care to put a `--` before the input file
name, to force clang to treat it as a filename even if it starts with
a / and accidentally looks like a cl option. Do the same here.
---
 clang/test/Driver/cl-offload.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Driver/cl-offload.cu b/clang/test/Driver/cl-offload.cu
index 650c13da15b5b58..eaa4b58afa8878b 100644
--- a/clang/test/Driver/cl-offload.cu
+++ b/clang/test/Driver/cl-offload.cu
@@ -5,11 +5,11 @@
 
 // RUN: %clang_cl -### -target x86_64-pc-windows-msvc --offload-arch=sm_35 
-fgpu-rdc \
 // RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
-// RUN:   /Wall -x cuda %s 2>&1 \
+// RUN:   /Wall -x cuda -- %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=CUDA
 
 // RUN: %clang_cl -### -target x86_64-pc-windows-msvc --offload-arch=gfx1010 
-fgpu-rdc --hip-link \
-// RUN:   --rocm-path=%S/Inputs/rocm /Wall -x hip %s 2>&1 \
+// RUN:   --rocm-path=%S/Inputs/rocm /Wall -x hip -- %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=HIP
 
 // CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" 
"x86_64-pc-windows-msvc"

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Let clang-cl support CUDA/HIP (PR #68921)

2023-10-24 Thread Simon Tatham via cfe-commits

statham-arm wrote:

@yxsamliu I've just raised https://github.com/llvm/llvm-project/pull/70055 
which fixes an issue with the new test here. Perhaps it might also allow you to 
remove the exclusion for `system-darwin`?

https://github.com/llvm/llvm-project/pull/68921
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add `--` to some test clang-cl command lines. (PR #70055)

2023-10-24 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm updated 
https://github.com/llvm/llvm-project/pull/70055

>From 029eecc71b94130bb6d058c9f9d0779e32cd45f1 Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Tue, 24 Oct 2023 15:52:38 +0100
Subject: [PATCH] [Driver] Add `--` to some test clang-cl command lines.

If clang/test/Driver/cl-offload.cu is run on Unix in a directory whose
absolute pathname starts with `/w`, such as the `/workspace` used by
at least some Jenkins CI setups, then the file name on the clang
command line is misinterpreted as some kind of MSVC warning-control
option, and ignored by the catch-all `_SLASH_w` option in Options.td.

Other clang-cl tests take care to put a `--` before the input file
name, to force clang to treat it as a filename even if it starts with
a / and accidentally looks like a cl option. Do the same here.

This also allows the exclusion for `system-darwin` to be removed,
because that was trying to avoid a similar filename/option clash
involving `/Users`.
---
 clang/test/Driver/cl-offload.cu | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/clang/test/Driver/cl-offload.cu b/clang/test/Driver/cl-offload.cu
index 650c13da15b5b58..b05bf3b97b7eb71 100644
--- a/clang/test/Driver/cl-offload.cu
+++ b/clang/test/Driver/cl-offload.cu
@@ -1,15 +1,14 @@
-// REQUIRES: !system-darwin
 // REQUIRES: !system-solaris
 
 // The test cannot be run on Darwin because /Users will be treated as a MSVC 
option.
 
 // RUN: %clang_cl -### -target x86_64-pc-windows-msvc --offload-arch=sm_35 
-fgpu-rdc \
 // RUN:   --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
-// RUN:   /Wall -x cuda %s 2>&1 \
+// RUN:   /Wall -x cuda -- %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=CUDA
 
 // RUN: %clang_cl -### -target x86_64-pc-windows-msvc --offload-arch=gfx1010 
-fgpu-rdc --hip-link \
-// RUN:   --rocm-path=%S/Inputs/rocm /Wall -x hip %s 2>&1 \
+// RUN:   --rocm-path=%S/Inputs/rocm /Wall -x hip -- %s 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=HIP
 
 // CUDA: "-cc1" "-triple" "nvptx64-nvidia-cuda" "-aux-triple" 
"x86_64-pc-windows-msvc"

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add `--` to some test clang-cl command lines. (PR #70055)

2023-10-25 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm closed 
https://github.com/llvm/llvm-project/pull/70055
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-10-25 Thread Simon Tatham via cfe-commits


@@ -152,6 +180,7 @@ template <> struct 
llvm::yaml::MappingTraits {
   static void mapping(llvm::yaml::IO &io, MultilibSerialization &V) {
 io.mapRequired("Dir", V.Dir);
 io.mapRequired("Flags", V.Flags);
+io.mapOptional("ExclusiveGroup", V.ExclusiveGroup);

statham-arm wrote:

I'll rename it if you like, but I worry that that might be ambiguous, or at 
least unclear. Within the general context of linking and libraries, "group" 
need not mean a _mutually exclusive_ group; it could mean a grouping of 
libraries for other purposes too, like a mutually _dependent_ group (you must 
select all of these or none).

https://github.com/llvm/llvm-project/pull/69447
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-10-27 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm updated 
https://github.com/llvm/llvm-project/pull/69447

>From 2a65ae75e8c8e62e7275a439849837919599e896 Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Thu, 14 Sep 2023 14:51:17 +0100
Subject: [PATCH 1/2] [Driver] Add ExclusiveGroup feature to multilib.yaml.

This allows a YAML-based multilib configuration to specify explicitly
that a subset of its library directories are alternatives to each
other, i.e. at most one of that subset should be selected.

So if you have multiple sysroots each including a full set of headers
and libraries, you can mark them as members of the same
ExclusiveGroup, and then you'll be sure that only one of them is
selected, even if two or more are compatible with the compile options.

This is particularly important in multilib setups including the libc++
headers, where selecting the include directories from two different
sysroots can cause an actual build failure. This occurs when including
, for example: libc++'s stdio.h is included first, and will
try to use `#include_next` to fetch the underlying libc's version. But
if there are two include directories from separate multilibs, then
both of their C++ include directories will end up on the include path
first, followed by both the C directories. So the `#include_next` from
the first libc++ stdio.h will include the second libc++ stdio.h, which
will do nothing because it has the same include guard macro, and the
libc header won't ever be included at all.

If more than one of the options in an ExclusiveGroup matches the given
flags, the last one wins.
---
 clang/include/clang/Driver/Multilib.h | 16 -
 clang/lib/Driver/Multilib.cpp | 49 ++---
 .../baremetal-multilib-exclusive-group.yaml   | 69 +++
 3 files changed, 122 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml

diff --git a/clang/include/clang/Driver/Multilib.h 
b/clang/include/clang/Driver/Multilib.h
index 1416559414f894b..6a9533e6dd831f1 100644
--- a/clang/include/clang/Driver/Multilib.h
+++ b/clang/include/clang/Driver/Multilib.h
@@ -39,13 +39,22 @@ class Multilib {
   std::string IncludeSuffix;
   flags_list Flags;
 
+  // Optionally, a multilib can be assigned a string tag indicating that it's
+  // part of a group of mutually exclusive possibilities. If two or more
+  // multilibs have the same non-empty value of ExclusiveGroup, then only the
+  // last matching one of them will be selected.
+  //
+  // Setting this to the empty string is a special case, indicating that the
+  // directory is not mutually exclusive with anything else.
+  std::string ExclusiveGroup;
+
 public:
   /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the
   /// sysroot string so they must either be empty or begin with a '/' 
character.
   /// This is enforced with an assert in the constructor.
   Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {},
-   StringRef IncludeSuffix = {},
-   const flags_list &Flags = flags_list());
+   StringRef IncludeSuffix = {}, const flags_list &Flags = 
flags_list(),
+   StringRef ExclusiveGroup = {});
 
   /// Get the detected GCC installation path suffix for the multi-arch
   /// target variant. Always starts with a '/', unless empty
@@ -63,6 +72,9 @@ class Multilib {
   /// All elements begin with either '-' or '!'
   const flags_list &flags() const { return Flags; }
 
+  /// Get the exclusive group label.
+  const std::string &exclusiveGroup() const { return ExclusiveGroup; }
+
   LLVM_DUMP_METHOD void dump() const;
   /// print summary of the Multilib
   void print(raw_ostream &OS) const;
diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp
index 48a494d9fa38db5..085ccee7b25752e 100644
--- a/clang/lib/Driver/Multilib.cpp
+++ b/clang/lib/Driver/Multilib.cpp
@@ -29,9 +29,10 @@ using namespace driver;
 using namespace llvm::sys;
 
 Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix,
-   StringRef IncludeSuffix, const flags_list &Flags)
+   StringRef IncludeSuffix, const flags_list &Flags,
+   StringRef ExclusiveGroup)
 : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix),
-  Flags(Flags) {
+  Flags(Flags), ExclusiveGroup(ExclusiveGroup) {
   assert(GCCSuffix.empty() ||
  (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1));
   assert(OSSuffix.empty() ||
@@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags,
  llvm::SmallVector &Selected) const {
   llvm::StringSet<> FlagSet(expandFlags(Flags));
   Selected.clear();
-  llvm::copy_if(Multilibs, std::back_inserter(Selected),
-[&FlagSet](const Multilib &M) {
-  for (const std::string &F : M.flags())
-if (!FlagSet.contains(F))
-  return false;
-  return 

[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-10-27 Thread Simon Tatham via cfe-commits


@@ -0,0 +1,69 @@
+# REQUIRES: shell
+# UNSUPPORTED: system-windows
+
+# RUN: rm -rf %t
+
+# RUN: mkdir -p %t/baremetal_multilib/bin
+# RUN: ln -s %clang %t/baremetal_multilib/bin/clang
+
+# RUN: mkdir -p %t/baremetal_multilib/lib/clang-runtimes
+# RUN: ln -s %s %t/baremetal_multilib/lib/clang-runtimes/multilib.yaml
+
+# RUN: %t/baremetal_multilib/bin/clang -no-canonical-prefixes -x c++ %s -### 
-o %t.out --target=thumbv7em-none-unknown-eabi --sysroot= 2>%t.err
+
+# RUN: FileCheck -DSYSROOT=%t/baremetal_multilib %s < %t.err 
--check-prefix=TESTDIR1_NON_EXCLUSIVE
+# RUN: FileCheck -DSYSROOT=%t/baremetal_multilib %s < %t.err 
--check-prefix=TESTDIR2_NON_EXCLUSIVE
+# RUN: FileCheck -DSYSROOT=%t/baremetal_multilib %s < %t.err 
--check-prefix=TESTDIR1_EXCLUSIVE
+# RUN: FileCheck -DSYSROOT=%t/baremetal_multilib %s < %t.err 
--check-prefix=TESTDIR2_EXCLUSIVE
+# RUN: FileCheck -DSYSROOT=%t/baremetal_multilib %s < %t.err 
--check-prefix=TESTDIR1_OWN_GROUP
+# RUN: FileCheck -DSYSROOT=%t/baremetal_multilib %s < %t.err 
--check-prefix=TESTDIR2_OWN_GROUP
+
+# Expected results:
+#
+# Due to the Mappings section, all six of these library directories should
+# match the command-line flag --target=thumbv7em-none-unknown-eabi.
+#
+# The two "non_exclusive" directories, which don't have an ExclusiveGroup at
+# all, should both be selected. So should the two "own_group", each of which
+# specifies a different value of ExclusiveGroup. But the two "exclusive", which
+# have the _same_ ExclusiveGroup value, should not: the second one wins. So we
+# expect five of these six directories to show up in the clang-cc1 command
+# line, but not testdir1_exclusive.
+
+# TESTDIR1_NON_EXCLUSIVE: "-internal-isystem" 
"[[SYSROOT]]/bin/../lib/clang-runtimes/testdir1_non_exclusive/include/c++/v1"
+# TESTDIR2_NON_EXCLUSIVE: "-internal-isystem" 
"[[SYSROOT]]/bin/../lib/clang-runtimes/testdir2_non_exclusive/include/c++/v1"
+# TESTDIR2_EXCLUSIVE: "-internal-isystem" 
"[[SYSROOT]]/bin/../lib/clang-runtimes/testdir2_exclusive/include/c++/v1"
+# TESTDIR1_OWN_GROUP: "-internal-isystem" 
"[[SYSROOT]]/bin/../lib/clang-runtimes/testdir1_own_group/include/c++/v1"
+# TESTDIR2_OWN_GROUP: "-internal-isystem" 
"[[SYSROOT]]/bin/../lib/clang-runtimes/testdir2_own_group/include/c++/v1"

statham-arm wrote:

Thanks. Yes, I agree it would be nice to have a convenient way to share headers 
in cases where they don't have to vary. But for the cases where they do have 
to, this feature is still vital. (And for the cases where we just haven't got 
round to it yet it's still _useful_ :-)

https://github.com/llvm/llvm-project/pull/69447
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-10-27 Thread Simon Tatham via cfe-commits


@@ -152,6 +180,7 @@ template <> struct 
llvm::yaml::MappingTraits {
   static void mapping(llvm::yaml::IO &io, MultilibSerialization &V) {
 io.mapRequired("Dir", V.Dir);
 io.mapRequired("Flags", V.Flags);
+io.mapOptional("ExclusiveGroup", V.ExclusiveGroup);

statham-arm wrote:

Thanks, @MaskRay. I've left it as `ExclusiveGroup` for now.

https://github.com/llvm/llvm-project/pull/69447
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Driver] Add ExclusiveGroup feature to multilib.yaml. (PR #69447)

2023-10-27 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm updated 
https://github.com/llvm/llvm-project/pull/69447

>From 2a65ae75e8c8e62e7275a439849837919599e896 Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Thu, 14 Sep 2023 14:51:17 +0100
Subject: [PATCH 1/2] [Driver] Add ExclusiveGroup feature to multilib.yaml.

This allows a YAML-based multilib configuration to specify explicitly
that a subset of its library directories are alternatives to each
other, i.e. at most one of that subset should be selected.

So if you have multiple sysroots each including a full set of headers
and libraries, you can mark them as members of the same
ExclusiveGroup, and then you'll be sure that only one of them is
selected, even if two or more are compatible with the compile options.

This is particularly important in multilib setups including the libc++
headers, where selecting the include directories from two different
sysroots can cause an actual build failure. This occurs when including
, for example: libc++'s stdio.h is included first, and will
try to use `#include_next` to fetch the underlying libc's version. But
if there are two include directories from separate multilibs, then
both of their C++ include directories will end up on the include path
first, followed by both the C directories. So the `#include_next` from
the first libc++ stdio.h will include the second libc++ stdio.h, which
will do nothing because it has the same include guard macro, and the
libc header won't ever be included at all.

If more than one of the options in an ExclusiveGroup matches the given
flags, the last one wins.
---
 clang/include/clang/Driver/Multilib.h | 16 -
 clang/lib/Driver/Multilib.cpp | 49 ++---
 .../baremetal-multilib-exclusive-group.yaml   | 69 +++
 3 files changed, 122 insertions(+), 12 deletions(-)
 create mode 100644 clang/test/Driver/baremetal-multilib-exclusive-group.yaml

diff --git a/clang/include/clang/Driver/Multilib.h 
b/clang/include/clang/Driver/Multilib.h
index 1416559414f894b..6a9533e6dd831f1 100644
--- a/clang/include/clang/Driver/Multilib.h
+++ b/clang/include/clang/Driver/Multilib.h
@@ -39,13 +39,22 @@ class Multilib {
   std::string IncludeSuffix;
   flags_list Flags;
 
+  // Optionally, a multilib can be assigned a string tag indicating that it's
+  // part of a group of mutually exclusive possibilities. If two or more
+  // multilibs have the same non-empty value of ExclusiveGroup, then only the
+  // last matching one of them will be selected.
+  //
+  // Setting this to the empty string is a special case, indicating that the
+  // directory is not mutually exclusive with anything else.
+  std::string ExclusiveGroup;
+
 public:
   /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the
   /// sysroot string so they must either be empty or begin with a '/' 
character.
   /// This is enforced with an assert in the constructor.
   Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {},
-   StringRef IncludeSuffix = {},
-   const flags_list &Flags = flags_list());
+   StringRef IncludeSuffix = {}, const flags_list &Flags = 
flags_list(),
+   StringRef ExclusiveGroup = {});
 
   /// Get the detected GCC installation path suffix for the multi-arch
   /// target variant. Always starts with a '/', unless empty
@@ -63,6 +72,9 @@ class Multilib {
   /// All elements begin with either '-' or '!'
   const flags_list &flags() const { return Flags; }
 
+  /// Get the exclusive group label.
+  const std::string &exclusiveGroup() const { return ExclusiveGroup; }
+
   LLVM_DUMP_METHOD void dump() const;
   /// print summary of the Multilib
   void print(raw_ostream &OS) const;
diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp
index 48a494d9fa38db5..085ccee7b25752e 100644
--- a/clang/lib/Driver/Multilib.cpp
+++ b/clang/lib/Driver/Multilib.cpp
@@ -29,9 +29,10 @@ using namespace driver;
 using namespace llvm::sys;
 
 Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix,
-   StringRef IncludeSuffix, const flags_list &Flags)
+   StringRef IncludeSuffix, const flags_list &Flags,
+   StringRef ExclusiveGroup)
 : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix),
-  Flags(Flags) {
+  Flags(Flags), ExclusiveGroup(ExclusiveGroup) {
   assert(GCCSuffix.empty() ||
  (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1));
   assert(OSSuffix.empty() ||
@@ -96,13 +97,39 @@ bool MultilibSet::select(const Multilib::flags_list &Flags,
  llvm::SmallVector &Selected) const {
   llvm::StringSet<> FlagSet(expandFlags(Flags));
   Selected.clear();
-  llvm::copy_if(Multilibs, std::back_inserter(Selected),
-[&FlagSet](const Multilib &M) {
-  for (const std::string &F : M.flags())
-if (!FlagSet.contains(F))
-  return false;
-  return 

[clang] 4978296 - [ARM, MVE] Support -ve offsets in gather-load intrinsics.

2020-01-06 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-01-06T16:33:07Z
New Revision: 4978296cd8e4d10724cfa41f0308d256c0fd490c

URL: 
https://github.com/llvm/llvm-project/commit/4978296cd8e4d10724cfa41f0308d256c0fd490c
DIFF: 
https://github.com/llvm/llvm-project/commit/4978296cd8e4d10724cfa41f0308d256c0fd490c.diff

LOG: [ARM,MVE] Support -ve offsets in gather-load intrinsics.

Summary:
The ACLE intrinsics with `gather_base` or `scatter_base` in the name
are wrappers on the MVE load/store instructions that take a vector of
base addresses and an immediate offset. The immediate offset can be up
to 127 times the alignment unit, and it can be positive or negative.

At the MC layer, we got that right. But in the Sema error checking for
the wrapping intrinsics, the offset was erroneously constrained to be
positive.

To fix this I've adjusted the `imm_mem7bit` class in the Tablegen that
defines the intrinsics. But that causes integer literals like
`0xfe04` to appear in the autogenerated calls to
`SemaBuiltinConstantArgRange`, which provokes a compiler warning
because that's out of the non-overflowing range of an `int64_t`. So
I've also tweaked `MveEmitter` to emit that as `-0x1fc` instead.

Updated the tests of the Sema checks themselves, and also adjusted a
random sample of the CodeGen tests to actually use negative offsets
and prove they get all the way through code generation without causing
a crash.

Reviewers: dmgreen, miyuki, MarkMurrayARM

Reviewed By: dmgreen

Subscribers: kristof.beyls, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D72268

Added: 


Modified: 
clang/include/clang/Basic/arm_mve_defs.td
clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c
clang/test/Sema/arm-mve-immediates.c
clang/utils/TableGen/MveEmitter.cpp
llvm/test/CodeGen/Thumb2/mve-intrinsics/scatter-gather.ll

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index 939d5eb0cd6b..6fba88df34bf 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -345,9 +345,10 @@ def imm_1248 : Immediate> {
 
 // imm_mem7bit is a valid immediate offset for a load/store intrinsic whose
 // memory access size is n bytes (e.g. 1 for vldrb_[whatever], 2 for vldrh,
-// ...). The set of valid immediates for these is {0*n, 1*n, ..., 127*n}.
+// ...). The set of valid immediates for these is {-127*n, ..., -1*n, 0*n, 1*n,
+// ..., 127*n}.
 class imm_mem7bit
-  : Immediate> {
+  : Immediate> {
   let extra = !if(!eq(membytes, 1), ?, "Multiple");
   let extraarg = !cast(membytes);
 }

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c 
b/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c
index 8bf2111a9e63..564965acc04d 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c
@@ -196,12 +196,12 @@ int64x2_t test_vldrdq_gather_base_s64(uint64x2_t addr)
 
 // CHECK-LABEL: @test_vldrdq_gather_base_u64(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[TMP0:%.*]] = call <2 x i64> 
@llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> [[ADDR:%.*]], i32 336)
+// CHECK-NEXT:[[TMP0:%.*]] = call <2 x i64> 
@llvm.arm.mve.vldr.gather.base.v2i64.v2i64(<2 x i64> [[ADDR:%.*]], i32 -336)
 // CHECK-NEXT:ret <2 x i64> [[TMP0]]
 //
 uint64x2_t test_vldrdq_gather_base_u64(uint64x2_t addr)
 {
-return vldrdq_gather_base_u64(addr, 0x150);
+return vldrdq_gather_base_u64(addr, -0x150);
 }
 
 // CHECK-LABEL: @test_vldrdq_gather_base_wb_s64(
@@ -221,7 +221,7 @@ int64x2_t test_vldrdq_gather_base_wb_s64(uint64x2_t *addr)
 // CHECK-LABEL: @test_vldrdq_gather_base_wb_u64(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[TMP0:%.*]] = load <2 x i64>, <2 x i64>* [[ADDR:%.*]], 
align 8
-// CHECK-NEXT:[[TMP1:%.*]] = call { <2 x i64>, <2 x i64> } 
@llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 328)
+// CHECK-NEXT:[[TMP1:%.*]] = call { <2 x i64>, <2 x i64> } 
@llvm.arm.mve.vldr.gather.base.wb.v2i64.v2i64(<2 x i64> [[TMP0]], i32 -328)
 // CHECK-NEXT:[[TMP2:%.*]] = extractvalue { <2 x i64>, <2 x i64> } 
[[TMP1]], 1
 // CHECK-NEXT:store <2 x i64> [[TMP2]], <2 x i64>* [[ADDR]], align 8
 // CHECK-NEXT:[[TMP3:%.*]] = extractvalue { <2 x i64>, <2 x i64> } 
[[TMP1]], 0
@@ -229,7 +229,7 @@ int64x2_t test_vldrdq_gather_base_wb_s64(uint64x2_t *addr)
 //
 uint64x2_t test_vldrdq_gather_base_wb_u64(uint64x2_t *addr)
 {
-return vldrdq_gather_base_wb_u64(addr, 0x148);
+return vldrdq_gather_base_wb_u64(addr, -0x148);
 }
 
 // CHECK-LABEL: @test_vldrdq_gather_base_wb_z_s64(
@@ -280,12 +280,12 @@ int64x2_t test_vldrdq_gather_base_z_s64(uint64x2_t addr, 
mve_pred16_t p)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:[[TMP1:%.*]] = call <4 x i1> @llvm.ar

[clang] d857e11 - [ARM,MVE] Fix valid immediate range for vsliq_n.

2020-01-09 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-01-09T15:04:47Z
New Revision: d857e114b5e04f5143485a5aea7ad9b283768692

URL: 
https://github.com/llvm/llvm-project/commit/d857e114b5e04f5143485a5aea7ad9b283768692
DIFF: 
https://github.com/llvm/llvm-project/commit/d857e114b5e04f5143485a5aea7ad9b283768692.diff

LOG: [ARM,MVE] Fix valid immediate range for vsliq_n.

In common with most MVE immediate shift instructions, the left shift
takes an immediate in the range [0,n-1], while the right shift takes
one in the range [1,n]. I had absent-mindedly made them both the
latter.

While I'm here, I've added a set of regression tests checking both
ends of the immediate range for a representative sample of the
immediate shifts.

Added: 


Modified: 
clang/include/clang/Basic/arm_mve.td
clang/test/Sema/arm-mve-immediates.c

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 87091a325071..86a04e33ce76 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -684,7 +684,7 @@ let params = [s16, s32], pnt = PNT_NType in {
   defm vqrshrun : VSHRN;
 }
 let params = T.Int, pnt = PNT_NType in {
-  defm vsli : DyadicImmShift;
+  defm vsli : DyadicImmShift;
   defm vsri : DyadicImmShift;
 }
 

diff  --git a/clang/test/Sema/arm-mve-immediates.c 
b/clang/test/Sema/arm-mve-immediates.c
index 54cdb96efcd3..b8106fbb7028 100644
--- a/clang/test/Sema/arm-mve-immediates.c
+++ b/clang/test/Sema/arm-mve-immediates.c
@@ -110,3 +110,96 @@ void test_lane_indices(uint8x16_t v16, uint16x8_t v8,
   vsetq_lane_u64(23, v2, 1);
   vsetq_lane_u64(23, v2, 2); // expected-error {{argument value 2 is outside 
the valid range [0, 1]}}
 }
+
+void test_immediate_shifts(uint8x16_t vb, uint16x8_t vh, uint32x4_t vw)
+{
+  vshlq_n(vb, 0);
+  vshlq_n(vb, 7);
+  vshlq_n(vh, 0);
+  vshlq_n(vh, 15);
+  vshlq_n(vw, 0);
+  vshlq_n(vw, 31);
+
+  vshlq_n(vb, -1); // expected-error {{argument value -1 is outside the valid 
range [0, 7]}}
+  vshlq_n(vb, 8); // expected-error {{argument value 8 is outside the valid 
range [0, 7]}}
+  vshlq_n(vh, -1); // expected-error {{argument value -1 is outside the valid 
range [0, 15]}}
+  vshlq_n(vh, 16); // expected-error {{argument value 16 is outside the valid 
range [0, 15]}}
+  vshlq_n(vw, -1); // expected-error {{argument value -1 is outside the valid 
range [0, 31]}}
+  vshlq_n(vw, 32); // expected-error {{argument value 32 is outside the valid 
range [0, 31]}}
+
+  vqshlq_n(vb, 0);
+  vqshlq_n(vb, 7);
+  vqshlq_n(vh, 0);
+  vqshlq_n(vh, 15);
+  vqshlq_n(vw, 0);
+  vqshlq_n(vw, 31);
+
+  vqshlq_n(vb, -1); // expected-error {{argument value -1 is outside the valid 
range [0, 7]}}
+  vqshlq_n(vb, 8); // expected-error {{argument value 8 is outside the valid 
range [0, 7]}}
+  vqshlq_n(vh, -1); // expected-error {{argument value -1 is outside the valid 
range [0, 15]}}
+  vqshlq_n(vh, 16); // expected-error {{argument value 16 is outside the valid 
range [0, 15]}}
+  vqshlq_n(vw, -1); // expected-error {{argument value -1 is outside the valid 
range [0, 31]}}
+  vqshlq_n(vw, 32); // expected-error {{argument value 32 is outside the valid 
range [0, 31]}}
+
+  vsliq(vb, vb, 0);
+  vsliq(vb, vb, 7);
+  vsliq(vh, vh, 0);
+  vsliq(vh, vh, 15);
+  vsliq(vw, vw, 0);
+  vsliq(vw, vw, 31);
+
+  vsliq(vb, vb, -1); // expected-error {{argument value -1 is outside the 
valid range [0, 7]}}
+  vsliq(vb, vb, 8); // expected-error {{argument value 8 is outside the valid 
range [0, 7]}}
+  vsliq(vh, vh, -1); // expected-error {{argument value -1 is outside the 
valid range [0, 15]}}
+  vsliq(vh, vh, 16); // expected-error {{argument value 16 is outside the 
valid range [0, 15]}}
+  vsliq(vw, vw, -1); // expected-error {{argument value -1 is outside the 
valid range [0, 31]}}
+  vsliq(vw, vw, 32); // expected-error {{argument value 32 is outside the 
valid range [0, 31]}}
+
+  vshllbq(vb, 1);
+  vshllbq(vb, 8);
+  vshllbq(vh, 1);
+  vshllbq(vh, 16);
+
+  vshllbq(vb, 0); // expected-error {{argument value 0 is outside the valid 
range [1, 8]}}
+  vshllbq(vb, 9); // expected-error {{argument value 9 is outside the valid 
range [1, 8]}}
+  vshllbq(vh, 0); // expected-error {{argument value 0 is outside the valid 
range [1, 16]}}
+  vshllbq(vh, 17); // expected-error {{argument value 17 is outside the valid 
range [1, 16]}}
+
+  vshrq(vb, 1);
+  vshrq(vb, 8);
+  vshrq(vh, 1);
+  vshrq(vh, 16);
+  vshrq(vw, 1);
+  vshrq(vw, 32);
+
+  vshrq(vb, 0); // expected-error {{argument value 0 is outside the valid 
range [1, 8]}}
+  vshrq(vb, 9); // expected-error {{argument value 9 is outside the valid 
range [1, 8]}}
+  vshrq(vh, 0); // expected-error {{argument value 0 is outside the valid 
range [1, 16]}}
+  vshrq(vh, 17); // expected-error {{argument value 17 is outside the valid 
range [1, 16]}}
+  vshrq(vw, 0); // expected-error {{argument value 0 is outside the valid 
rang

[clang] 06d07ec - [Clang] Handle target-specific builtins returning aggregates.

2020-01-09 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-01-09T17:28:37Z
New Revision: 06d07ec4a372b55e6fb77bf0b97964bde16a3184

URL: 
https://github.com/llvm/llvm-project/commit/06d07ec4a372b55e6fb77bf0b97964bde16a3184
DIFF: 
https://github.com/llvm/llvm-project/commit/06d07ec4a372b55e6fb77bf0b97964bde16a3184.diff

LOG: [Clang] Handle target-specific builtins returning aggregates.

Summary:
A few of the ARM MVE builtins directly return a structure type. This
causes an assertion failure at code-gen time if you try to assign the
result of the builtin to a variable, because the `RValue` created in
`EmitBuiltinExpr` from the `llvm::Value` produced by codegen is always
made by `RValue::get()`, which creates a non-aggregate `RValue` that
will fail an assertion when `AggExprEmitter::withReturnValueSlot` calls
`Src.getAggregatePointer()`. A similar failure occurs if you try to use
the struct return value directly to extract one field, e.g.
`vld2q(address).val[0]`.

The existing code-gen tests for those MVE builtins pass the returned
structure type directly to the C `return` statement, which apparently
managed to avoid that particular code path, so we didn't notice the
crash.

Now `EmitBuiltinExpr` checks the evaluation kind of the builtin's return
value, and does the necessary handling for aggregate returns. I've added
two extra test cases, both of which crashed before this change.

Reviewers: dmgreen, rjmccall

Reviewed By: rjmccall

Subscribers: kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D72271

Added: 


Modified: 
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/arm-mve-intrinsics/vld24.c

Removed: 




diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3fadf09c460d..2842fe826636 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4332,9 +4332,29 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
 return RValue::get(V);
   }
 
-  // See if we have a target specific builtin that needs to be lowered.
-  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue))
-return RValue::get(V);
+  // Some target-specific builtins can have aggregate return values, e.g.
+  // __builtin_arm_mve_vld2q_u32. So if the result is an aggregate, force
+  // ReturnValue to be non-null, so that the target-specific emission code can
+  // always just emit into it.
+  TypeEvaluationKind EvalKind = getEvaluationKind(E->getType());
+  if (EvalKind == TEK_Aggregate && ReturnValue.isNull()) {
+Address DestPtr = CreateMemTemp(E->getType(), "agg.tmp");
+ReturnValue = ReturnValueSlot(DestPtr, false);
+  }
+
+  // Now see if we can emit a target-specific builtin.
+  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E, ReturnValue)) {
+switch (EvalKind) {
+case TEK_Scalar:
+  return RValue::get(V);
+case TEK_Aggregate:
+  return RValue::getAggregate(ReturnValue.getValue(),
+  ReturnValue.isVolatile());
+case TEK_Complex:
+  llvm_unreachable("No current target builtin returns complex");
+}
+llvm_unreachable("Bad evaluation kind in EmitBuiltinExpr");
+  }
 
   ErrorUnsupported(E, "builtin function");
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vld24.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vld24.c
index 984d5989217e..a0f37fe65d3d 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vld24.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vld24.c
@@ -98,3 +98,45 @@ void test_vst2q_f16(float16_t *addr, float16x8x2_t value)
 vst2q_f16(addr, value);
 #endif /* POLYMORPHIC */
 }
+
+// CHECK-LABEL: @load_into_variable(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call { <8 x i16>, <8 x i16> } 
@llvm.arm.mve.vld2q.v8i16.p0i16(i16* [[ADDR:%.*]])
+// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { <8 x i16>, <8 x i16> } 
[[TMP0]], 0
+// CHECK-NEXT:[[TMP2:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T:%.*]] 
undef, <8 x i16> [[TMP1]], 0, 0
+// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { <8 x i16>, <8 x i16> } 
[[TMP0]], 1
+// CHECK-NEXT:[[TMP4:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] [[TMP2]], 
<8 x i16> [[TMP3]], 0, 1
+// CHECK-NEXT:store <8 x i16> [[TMP1]], <8 x i16>* [[VALUES:%.*]], align 8
+// CHECK-NEXT:[[ARRAYIDX4:%.*]] = getelementptr inbounds <8 x i16>, <8 x 
i16>* [[VALUES]], i32 1
+// CHECK-NEXT:store <8 x i16> [[TMP3]], <8 x i16>* [[ARRAYIDX4]], align 8
+// CHECK-NEXT:ret void
+//
+void load_into_variable(const uint16_t *addr, uint16x8_t *values)
+{
+uint16x8x2_t v;
+#ifdef POLYMORPHIC
+v = vld2q(addr);
+#else /* POLYMORPHIC */
+v = vld2q_u16(addr);
+#endif /* POLYMORPHIC */
+values[0] = v.val[0];
+values[1] = v.val[1];
+}
+
+// CHECK-LABEL: @extract_one_vector(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call { <4 x i32>, <4 x i32> } 
@llvm.arm.mve.vld2q.v4i32.

[clang] 1ccee0e - [ARM, MVE] Make `vqrshrun` generate the right instruction.

2020-01-10 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-01-10T11:25:05Z
New Revision: 1ccee0e86386762bd742fd067391b6c4be089806

URL: 
https://github.com/llvm/llvm-project/commit/1ccee0e86386762bd742fd067391b6c4be089806
DIFF: 
https://github.com/llvm/llvm-project/commit/1ccee0e86386762bd742fd067391b6c4be089806.diff

LOG: [ARM,MVE] Make `vqrshrun` generate the right instruction.

Summary:
A copy-paste error in `arm_mve.td` meant that the MVE `vqrshrun`
intrinsic family was generating the `vqshrun` machine instruction,
because in the IR intrinsic call, the rounding flag argument was set
to 0 rather than 1.

Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D72496

Added: 


Modified: 
clang/include/clang/Basic/arm_mve.td
clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm-dyadic.c

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 86a04e33ce76..6d0bb96cba6f 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -681,7 +681,7 @@ let params = [s16, s32, u16, u32], pnt = PNT_NType in {
 }
 let params = [s16, s32], pnt = PNT_NType in {
   defm vqshrun  : VSHRN;
-  defm vqrshrun : VSHRN;
+  defm vqrshrun : VSHRN;
 }
 let params = T.Int, pnt = PNT_NType in {
   defm vsli : DyadicImmShift;

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm-dyadic.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm-dyadic.c
index 3d4f77b99d74..c5591392e373 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm-dyadic.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm-dyadic.c
@@ -1086,7 +1086,7 @@ uint16x8_t test_vqrshrntq_m_n_u32(uint16x8_t a, 
uint32x4_t b, mve_pred16_t p)
 
 // CHECK-LABEL: @test_vqrshrunbq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> 
@llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 
7, i32 1, i32 0, i32 1, i32 0, i32 0)
+// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> 
@llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 
7, i32 1, i32 1, i32 1, i32 0, i32 0)
 // CHECK-NEXT:ret <16 x i8> [[TMP0]]
 //
 uint8x16_t test_vqrshrunbq_n_s16(uint8x16_t a, int16x8_t b)
@@ -1100,7 +1100,7 @@ uint8x16_t test_vqrshrunbq_n_s16(uint8x16_t a, int16x8_t 
b)
 
 // CHECK-LABEL: @test_vqrshrunbq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> 
@llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 
1, i32 1, i32 0, i32 1, i32 0, i32 0)
+// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> 
@llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 
1, i32 1, i32 1, i32 1, i32 0, i32 0)
 // CHECK-NEXT:ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vqrshrunbq_n_s32(uint16x8_t a, int32x4_t b)
@@ -1114,7 +1114,7 @@ uint16x8_t test_vqrshrunbq_n_s32(uint16x8_t a, int32x4_t 
b)
 
 // CHECK-LABEL: @test_vqrshruntq_n_s16(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> 
@llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 
1, i32 1, i32 0, i32 1, i32 0, i32 1)
+// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> 
@llvm.arm.mve.vshrn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 
1, i32 1, i32 1, i32 1, i32 0, i32 1)
 // CHECK-NEXT:ret <16 x i8> [[TMP0]]
 //
 uint8x16_t test_vqrshruntq_n_s16(uint8x16_t a, int16x8_t b)
@@ -1128,7 +1128,7 @@ uint8x16_t test_vqrshruntq_n_s16(uint8x16_t a, int16x8_t 
b)
 
 // CHECK-LABEL: @test_vqrshruntq_n_s32(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> 
@llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 
3, i32 1, i32 0, i32 1, i32 0, i32 1)
+// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> 
@llvm.arm.mve.vshrn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 
3, i32 1, i32 1, i32 1, i32 0, i32 1)
 // CHECK-NEXT:ret <8 x i16> [[TMP0]]
 //
 uint16x8_t test_vqrshruntq_n_s32(uint16x8_t a, int32x4_t b)
@@ -1144,7 +1144,7 @@ uint16x8_t test_vqrshruntq_n_s32(uint16x8_t a, int32x4_t 
b)
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
 // CHECK-NEXT:[[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP0]])
-// CHECK-NEXT:[[TMP2:%.*]] = call <16 x i8> 
@llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 4, i32 1, i32 0, i32 1, i32 0, i32 0, <8 x i1> [[TMP1]])
+// CHECK-NEXT:[[TMP2:%.*]] = call <16 x i8> 
@llvm.arm.mve.vshrn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> 
[[B:%.*]], i32 4, i32 1, i32 1, i32 1, i32 0, i32 0, <8 x i1> [[TMP1]])
 // CHECK-NEXT:ret <16 x i8> [[TMP2]]
 //
 uint8x16_t test_vqrshrunbq_m_n_s16(uint8x16_t a, int16x8_t b, mve_pred16_t p

[clang] 71d5454 - [ARM, MVE] Use the new Tablegen `defvar` and `if` statements.

2020-01-14 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-01-14T12:08:03Z
New Revision: 71d5454b377239213874a0d762860e6a3e60bf54

URL: 
https://github.com/llvm/llvm-project/commit/71d5454b377239213874a0d762860e6a3e60bf54
DIFF: 
https://github.com/llvm/llvm-project/commit/71d5454b377239213874a0d762860e6a3e60bf54.diff

LOG: [ARM,MVE] Use the new Tablegen `defvar` and `if` statements.

Summary:
This cleans up a lot of ugly `foreach` bodges that I've been using to
work around the lack of those two language features. Now they both
exist, I can make then all into something more legible!

In particular, in the common pattern in `ARMInstrMVE.td` where a
multiclass defines an `Instruction` instance plus one or more `Pat` that
select it, I've used a `defvar` to wrap `!cast(NAME)` so
that the patterns themselves become a little more legible.

Replacing a `foreach` with a `defvar` removes a level of block
structure, so several pieces of code have their indentation changed by
this patch. Best viewed with whitespace ignored.

NFC: the output of `llvm-tblgen -print-records` on the two affected
Tablegen sources is exactly identical before and after this change, so
there should be no effect at all on any of the other generated files.

Reviewers: MarkMurrayARM, miyuki

Reviewed By: MarkMurrayARM

Subscribers: kristof.beyls, hiraditya, dmgreen, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D72690

Added: 


Modified: 
clang/include/clang/Basic/arm_mve.td
llvm/lib/Target/ARM/ARMInstrMVE.td

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 6d0bb96cba6f..0e023b85459c 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -212,20 +212,17 @@ def vmaxvq: Intrinsic $prev, $vec))>;
 }
 
-foreach half = [ "b", "t" ] in
-foreach halfconst = [ !if(!eq(half, "b"), 0, 1) ] in {
-
-let params = [f32], pnt = PNT_None in {
-
-def vcvt#half#q_f16: Intrinsic<
-VecOf, (args VecOf:$inactive, Vector:$a),
-(IRInt<"vcvt_narrow"> $inactive, $a, halfconst)>;
-def vcvt#half#q_m_f16: Intrinsic<
-VecOf, (args VecOf:$inactive, Vector:$a, PredOf:$pred),
-(IRInt<"vcvt_narrow_predicated"> $inactive, $a, halfconst, $pred)>;
-
-} // params = [f32], pnt = PNT_None
-
+foreach half = [ "b", "t" ] in {
+  defvar halfconst = !if(!eq(half, "b"), 0, 1);
+
+  let params = [f32], pnt = PNT_None in {
+def vcvt#half#q_f16: Intrinsic<
+  VecOf, (args VecOf:$inactive, Vector:$a),
+  (IRInt<"vcvt_narrow"> $inactive, $a, halfconst)>;
+def vcvt#half#q_m_f16: Intrinsic<
+  VecOf, (args VecOf:$inactive, Vector:$a, PredOf:$pred),
+  (IRInt<"vcvt_narrow_predicated"> $inactive, $a, halfconst, $pred)>;
+  } // params = [f32], pnt = PNT_None
 } // loop over half = "b", "t"
 
 multiclass compare_with_pred;
 
 multiclass DyadicImmShift {
-  foreach intparams = [!if(!eq(!cast(outtype), !cast(Vector)),
-   [Vector], [outtype, Vector])] in {
-def q_n: Intrinsic<
-outtype, (args outtype:$a, Vector:$b, imm:$sh),
-!con((IRInt $a, $b, $sh), extraargs)>;
-
-def q_m_n: Intrinsic<
-outtype, (args outtype:$a, Vector:$b, imm:$sh, Predicate:$pred),
-!con((IRInt
- $a, $b, $sh), extraargs, (? $pred))>;
-  }
+  defvar intparams = !if(!eq(!cast(outtype), !cast(Vector)),
+ [Vector], [outtype, Vector]);
+
+  def q_n: Intrinsic<
+  outtype, (args outtype:$a, Vector:$b, imm:$sh),
+  !con((IRInt $a, $b, $sh), extraargs)>;
+
+  def q_m_n: Intrinsic<
+  outtype, (args outtype:$a, Vector:$b, imm:$sh, Predicate:$pred),
+  !con((IRInt
+   $a, $b, $sh), extraargs, (? $pred))>;
 }
 
 multiclass VSHRN {
@@ -672,12 +669,11 @@ multiclass VSHRN {
 }
 
 let params = [s16, s32, u16, u32], pnt = PNT_NType in {
-  foreach U = [(unsignedflag Scalar)] in {
-defm vshrn   : VSHRN;
-defm vqshrn  : VSHRN;
-defm vrshrn  : VSHRN;
-defm vqrshrn : VSHRN;
-  }
+  defvar U = (unsignedflag Scalar);
+  defm vshrn   : VSHRN;
+  defm vqshrn  : VSHRN;
+  defm vrshrn  : VSHRN;
+  defm vqrshrn : VSHRN;
 }
 let params = [s16, s32], pnt = PNT_NType in {
   defm vqshrun  : VSHRN;

diff  --git a/llvm/lib/Target/ARM/ARMInstrMVE.td 
b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 325c9153491d..604291be822c 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -594,25 +594,24 @@ class MVE_VABAV size>
 
 multiclass MVE_VABAV_m {
   def "" : MVE_VABAV;
+  defvar Inst = !cast(NAME);
 
   let Predicates = [HasMVEInt] in {
 def : Pat<(i32 (int_arm_mve_vabav
-(i32 VTI.Unsigned),
-(i32 rGPR:$Rda_src),
-(VTI.Vec MQPR:$Qn), (VTI.Vec MQPR:$Qm))),
-  (i32 (!cast(NAME)
-(i32 rGPR:$Rda_src),
-  

[clang] ada01d1 - [clang] New __attribute__((__clang_arm_mve_strict_polymorphism)).

2020-01-15 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-01-15T15:04:10Z
New Revision: ada01d1b869763f7d5d3438dcfce02066b06ab0a

URL: 
https://github.com/llvm/llvm-project/commit/ada01d1b869763f7d5d3438dcfce02066b06ab0a
DIFF: 
https://github.com/llvm/llvm-project/commit/ada01d1b869763f7d5d3438dcfce02066b06ab0a.diff

LOG: [clang] New __attribute__((__clang_arm_mve_strict_polymorphism)).

This is applied to the vector types defined in  for use
with the intrinsics for the ARM MVE vector architecture.

Its purpose is to inhibit lax vector conversions, but only in the
context of overload resolution of the MVE polymorphic intrinsic
functions. This solves an ambiguity problem with polymorphic MVE
intrinsics that take a vector and a scalar argument: the scalar
argument can often have the wrong integer type due to default integer
promotions or unsuffixed literals, and therefore, the type of the
vector argument should be considered trustworthy when resolving MVE
polymorphism.

As part of the same change, I've added the new attribute to the
declarations generated by the MveEmitter Tablegen backend (and
corrected a namespace issue with the other attribute while I was
there).

Reviewers: aaron.ballman, dmgreen

Reviewed By: aaron.ballman

Subscribers: kristof.beyls, JDevlieghere, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D72518

Added: 
clang/test/Sema/overload-arm-mve.c

Modified: 
clang/include/clang/Basic/Attr.td
clang/include/clang/Basic/AttrDocs.td
clang/include/clang/Basic/DiagnosticSemaKinds.td
clang/lib/AST/TypePrinter.cpp
clang/lib/Sema/SemaOverload.cpp
clang/lib/Sema/SemaType.cpp
clang/utils/TableGen/MveEmitter.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 16556b5f0745..10db2a868dce 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1479,6 +1479,11 @@ def NeonVectorType : TypeAttr {
   let ASTNode = 0;
 }
 
+def ArmMveStrictPolymorphism : TypeAttr, TargetSpecificAttr {
+  let Spellings = [Clang<"__clang_arm_mve_strict_polymorphism">];
+  let Documentation = [ArmMveStrictPolymorphismDocs];
+}
+
 def NoUniqueAddress : InheritableAttr, TargetSpecificAttr 
{
   let Spellings = [CXX11<"", "no_unique_address", 201803>];
   let Subjects = SubjectList<[NonBitField], ErrorDiag>;

diff  --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index 03d36ae7ab32..456edd1daafc 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -4789,3 +4789,45 @@ close the handle. It is also assumed to require an open 
handle to work with.
   zx_status_t zx_handle_close(zx_handle_t handle [[clang::release_handle]]);
   }];
 }
+
+def ArmMveStrictPolymorphismDocs : Documentation {
+let Category = DocCatType;
+let Content = [{
+This attribute is used in the implementation of the ACLE intrinsics for the Arm
+MVE instruction set. It is used to define the vector types used by the MVE
+intrinsics.
+
+Its effect is to modify the behavior of a vector type with respect to function
+overloading. If a candidate function for overload resolution has a parameter
+type with this attribute, then the selection of that candidate function will be
+disallowed if the actual argument can only be converted via a lax vector
+conversion. The aim is to prevent spurious ambiguity in ARM MVE polymorphic
+intrinsics.
+
+.. code-block:: c++
+
+  void overloaded(uint16x8_t vector, uint16_t scalar);
+  void overloaded(int32x4_t vector, int32_t scalar);
+  uint16x8_t myVector;
+  uint16_t myScalar;
+
+  // myScalar is promoted to int32_t as a side effect of the addition,
+  // so if lax vector conversions are considered for myVector, then
+  // the two overloads are equally good (one argument conversion
+  // each). But if the vector has the __clang_arm_mve_strict_polymorphism
+  // attribute, only the uint16x8_t,uint16_t overload will match.
+  overloaded(myVector, myScalar + 1);
+
+However, this attribute does not prohibit lax vector conversions in contexts
+other than overloading.
+
+.. code-block:: c++
+
+  uint16x8_t function();
+
+  // This is still permitted with lax vector conversion enabled, even
+  // if the vector types have __clang_arm_mve_strict_polymorphism
+  int32x4_t result = function();
+
+}];
+}

diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 7d8231d140e4..ffa326932a1c 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -6593,6 +6593,8 @@ def 
note_objc_unsafe_perform_selector_method_declared_here :  Note<
   "method %0 that returns %1 declared here">;
 def err_attribute_arm_mve_alias : Error<
   "'__clang_arm_mve_alias' attribute can only be applied to an ARM MVE 
builtin">;
+def err_attribute_arm_mv

[clang] fd569a1 - [libclang] Fix error handler in translateSourceLocation.

2021-06-18 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2021-06-18T13:43:14+01:00
New Revision: fd569a11b585d13cdceac2d890c2beda0fa5f0eb

URL: 
https://github.com/llvm/llvm-project/commit/fd569a11b585d13cdceac2d890c2beda0fa5f0eb
DIFF: 
https://github.com/llvm/llvm-project/commit/fd569a11b585d13cdceac2d890c2beda0fa5f0eb.diff

LOG: [libclang] Fix error handler in translateSourceLocation.

Given an invalid SourceLocation, translateSourceLocation will call
clang_getNullLocation, and then do nothing with the result. But
clang_getNullLocation has no side effects: it just constructs and
returns a null CXSourceLocation value.

Surely the intention was to //return// that null CXSourceLocation to
the caller, instead of throwing it away and pressing on anyway.

Reviewed By: miyuki

Differential Revision: https://reviews.llvm.org/D104442

Added: 


Modified: 
clang/tools/libclang/CXSourceLocation.h

Removed: 




diff  --git a/clang/tools/libclang/CXSourceLocation.h 
b/clang/tools/libclang/CXSourceLocation.h
index ce3d09e1c9eb8..c86f6850375bb 100644
--- a/clang/tools/libclang/CXSourceLocation.h
+++ b/clang/tools/libclang/CXSourceLocation.h
@@ -29,7 +29,7 @@ static inline CXSourceLocation
 translateSourceLocation(const SourceManager &SM, const LangOptions &LangOpts,
 SourceLocation Loc) {
   if (Loc.isInvalid())
-clang_getNullLocation();
+return clang_getNullLocation();
 
   CXSourceLocation Result = { { &SM, &LangOpts, },
   Loc.getRawEncoding() };



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] e49985b - Remove unused parameter from parseMSInlineAsm.

2021-07-12 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2021-07-12T15:07:03+01:00
New Revision: e49985bb6065d4f5ea69fe578e326ec6d43a6b24

URL: 
https://github.com/llvm/llvm-project/commit/e49985bb6065d4f5ea69fe578e326ec6d43a6b24
DIFF: 
https://github.com/llvm/llvm-project/commit/e49985bb6065d4f5ea69fe578e326ec6d43a6b24.diff

LOG: Remove unused parameter from parseMSInlineAsm.

No implementation uses the `LocCookie` parameter at all. Errors are
reported from inside that function by `llvm::SourceMgr`, and the
instance of that at the clang call site arranges to pass the error
messages back to a `ClangAsmParserCallback`, which is where the clang
SourceLocation for the error is computed.

(This is part of a patch series working towards the ability to make
SourceLocation into a 64-bit type to handle larger translation units.
But this particular change seems beneficial in its own right.)

Reviewed By: miyuki

Differential Revision: https://reviews.llvm.org/D105490

Added: 


Modified: 
clang/lib/Parse/ParseStmtAsm.cpp
llvm/include/llvm/MC/MCParser/MCAsmParser.h
llvm/lib/MC/MCParser/AsmParser.cpp
llvm/lib/MC/MCParser/MasmParser.cpp

Removed: 




diff  --git a/clang/lib/Parse/ParseStmtAsm.cpp 
b/clang/lib/Parse/ParseStmtAsm.cpp
index 9037895a3bbfc..e520151dcad76 100644
--- a/clang/lib/Parse/ParseStmtAsm.cpp
+++ b/clang/lib/Parse/ParseStmtAsm.cpp
@@ -633,9 +633,9 @@ StmtResult 
Parser::ParseMicrosoftAsmStatement(SourceLocation AsmLoc) {
   SmallVector, 4> OpExprs;
   SmallVector Constraints;
   SmallVector Clobbers;
-  if (Parser->parseMSInlineAsm(AsmLoc.getPtrEncoding(), AsmStringIR, 
NumOutputs,
-   NumInputs, OpExprs, Constraints, Clobbers,
-   MII.get(), IP.get(), Callback))
+  if (Parser->parseMSInlineAsm(AsmStringIR, NumOutputs, NumInputs, OpExprs,
+   Constraints, Clobbers, MII.get(), IP.get(),
+   Callback))
 return StmtError();
 
   // Filter out "fpsw" and "mxcsr". They aren't valid GCC asm clobber

diff  --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h 
b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index 56188b7ebaec7..c9b3ab3256da8 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -202,8 +202,8 @@ class MCAsmParser {
 
   /// Parse MS-style inline assembly.
   virtual bool parseMSInlineAsm(
-  void *AsmLoc, std::string &AsmString, unsigned &NumOutputs,
-  unsigned &NumInputs, SmallVectorImpl> &OpDecls,
+  std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs,
+  SmallVectorImpl> &OpDecls,
   SmallVectorImpl &Constraints,
   SmallVectorImpl &Clobbers, const MCInstrInfo *MII,
   const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) = 0;

diff  --git a/llvm/lib/MC/MCParser/AsmParser.cpp 
b/llvm/lib/MC/MCParser/AsmParser.cpp
index 3bc668e699cbc..45e6dfee4ca4b 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -258,9 +258,9 @@ class AsmParser : public MCAsmParser {
 return LTODiscardSymbols.contains(Name);
   }
 
-  bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
-unsigned &NumOutputs, unsigned &NumInputs,
-SmallVectorImpl> &OpDecls,
+  bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs,
+unsigned &NumInputs,
+SmallVectorImpl> &OpDecls,
 SmallVectorImpl &Constraints,
 SmallVectorImpl &Clobbers,
 const MCInstrInfo *MII, const MCInstPrinter *IP,
@@ -5927,8 +5927,8 @@ static int rewritesSort(const AsmRewrite *AsmRewriteA,
 }
 
 bool AsmParser::parseMSInlineAsm(
-void *AsmLoc, std::string &AsmString, unsigned &NumOutputs,
-unsigned &NumInputs, SmallVectorImpl> &OpDecls,
+std::string &AsmString, unsigned &NumOutputs, unsigned &NumInputs,
+SmallVectorImpl> &OpDecls,
 SmallVectorImpl &Constraints,
 SmallVectorImpl &Clobbers, const MCInstrInfo *MII,
 const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) {

diff  --git a/llvm/lib/MC/MCParser/MasmParser.cpp 
b/llvm/lib/MC/MCParser/MasmParser.cpp
index a91623770116a..a5a48d4b1e9d8 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -514,9 +514,9 @@ class MasmParser : public MCAsmParser {
 
   bool lookUpType(StringRef Name, AsmTypeInfo &Info) const override;
 
-  bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
-unsigned &NumOutputs, unsigned &NumInputs,
-SmallVectorImpl> &OpDecls,
+  bool parseMSInlineAsm(std::string &AsmString, unsigned &NumOutputs,
+unsigned &NumInputs,
+SmallVectorImpl> &OpDecls,
 SmallVectorImpl &Constraints,
   

[clang] 60ea6f3 - [ARM] Allow selecting hard-float ABI in integer-only MVE.

2023-02-01 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2023-02-01T09:05:12Z
New Revision: 60ea6f35a270d11c91770a2fc366888e7d3859f4

URL: 
https://github.com/llvm/llvm-project/commit/60ea6f35a270d11c91770a2fc366888e7d3859f4
DIFF: 
https://github.com/llvm/llvm-project/commit/60ea6f35a270d11c91770a2fc366888e7d3859f4.diff

LOG: [ARM] Allow selecting hard-float ABI in integer-only MVE.

Armv8.1-M can be configured to support the integer subset of the MVE
vector instructions, and no floating point. In that situation, the FP
and vector registers still exist, and so do the load, store and move
instructions that transfer data in and out of them. So there's no
reason the hard floating point ABI can't be supported, and you might
reasonably want to use it, for the sake of intrinsics-based code
passing explicit MVE vector types between functions.

But the selection of the hard float ABI in the backend was gated on
Subtarget->hasVFP2Base(), which is false in the case of integer MVE
and no FP.

As a result, you'd silently get the soft float ABI even if you
deliberately tried to select it, e.g. with clang options such as
--target=arm-none-eabi -mfloat-abi=hard -march=armv8.1m.main+nofp+mve

The hard float ABI should have been gated on the weaker condition
Subtarget->hasFPRegs(), because the only requirement for being able to
pass arguments in the FP registers is that the registers themselves
should exist.

I haven't added a new test, because changing the existing
CodeGen/Thumb2/float-ops.ll test seemed sufficient. But I've added a
comment explaining why the results are expected to be what they are.

Reviewed By: lenary

Differential Revision: https://reviews.llvm.org/D142703

Added: 


Modified: 
clang/docs/ReleaseNotes.rst
llvm/docs/ReleaseNotes.rst
llvm/lib/Target/ARM/ARMFastISel.cpp
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/test/CodeGen/Thumb2/float-ops.ll

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c6139252e0c34..2b2ca8b2987f0 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -163,6 +163,13 @@ DWARF Support in Clang
 Arm and AArch64 Support in Clang
 
 
+* The hard-float ABI is now available in Armv8.1-M configurations that
+  have integer MVE instructions (and therefore have FP registers) but
+  no scalar or vector floating point computation. Previously, trying
+  to select the hard-float ABI on such a target (via
+  ``-mfloat-abi=hard`` or a triple ending in ``hf``) would silently
+  use the soft-float ABI instead.
+
 Floating Point Support in Clang
 ---
 

diff  --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index d628257a76904..a3f02992048a4 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -71,6 +71,10 @@ Changes to the AMDGPU Backend
 Changes to the ARM Backend
 --
 
+- The hard-float ABI is now available in Armv8.1-M configurations that
+  have integer MVE instructions (and therefore have FP registers) but
+  no scalar or vector floating point computation.
+
 Changes to the AVR Backend
 --
 

diff  --git a/llvm/lib/Target/ARM/ARMFastISel.cpp 
b/llvm/lib/Target/ARM/ARMFastISel.cpp
index 62a090f4bca81..60a6e9ade9234 100644
--- a/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -1842,7 +1842,7 @@ CCAssignFn 
*ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
   case CallingConv::CXX_FAST_TLS:
 // Use target triple & subtarget features to do actual dispatch.
 if (Subtarget->isAAPCS_ABI()) {
-  if (Subtarget->hasVFP2Base() &&
+  if (Subtarget->hasFPRegs() &&
   TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
   else

diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp 
b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 8a28e6b4e4fd2..07fa829731563 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2081,7 +2081,7 @@ 
ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
   case CallingConv::Tail:
 if (!Subtarget->isAAPCS_ABI())
   return CallingConv::ARM_APCS;
-else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
+else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
  getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
  !isVarArg)
   return CallingConv::ARM_AAPCS_VFP;

diff  --git a/llvm/test/CodeGen/Thumb2/float-ops.ll 
b/llvm/test/CodeGen/Thumb2/float-ops.ll
index 51f18afaf0a46..d2b1dd6f05a3f 100644
--- a/llvm/test/CodeGen/Thumb2/float-ops.ll
+++ b/llvm/test/CodeGen/Thumb2/float-ops.ll
@@ -83,7 +83,7 @@ entry:
 define float @rem_f(float %a, float %b) {
 entry:
 ; CHECK-LABEL: rem_f:
-; NONE: bl fmodf
+; NONE: {{b|bl}} fmodf
 ; HARD: b fmod

[clang] ceb21fa - [ARM] Fix how size-0 bitfields affect homogeneous aggregates.

2022-06-10 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2022-06-10T11:27:24+01:00
New Revision: ceb21fa4e49ddc8478371b41250f206082c5c67e

URL: 
https://github.com/llvm/llvm-project/commit/ceb21fa4e49ddc8478371b41250f206082c5c67e
DIFF: 
https://github.com/llvm/llvm-project/commit/ceb21fa4e49ddc8478371b41250f206082c5c67e.diff

LOG: [ARM] Fix how size-0 bitfields affect homogeneous aggregates.

By both AAPCS32 and AAPCS64, the test for whether an aggregate
qualifies as homogeneous (either HFA or HVA) is based on the data
layout alone. So any logical member of the structure that does not
affect the data layout also should not affect homogeneity. In
particular, an empty bitfield ('int : 0') should make no difference.

In fact, clang considered it to make a difference in C but not in C++,
and justified that policy as compatible with gcc. But that's
considered a bug in gcc as well (at least for Arm targets), and it's
fixed in gcc 12.1.

This fix mimics gcc's: zero-sized bitfields are now ignored in all
languages for the Arm (32- and 64-bit) ABIs. But I've left the
previous behaviour unchanged in other ABIs, by means of adding an
ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate query
method which the Arm subclasses override.

Reviewed By: lenary

Differential Revision: https://reviews.llvm.org/D127197

Added: 
clang/test/CodeGen/homogeneous-aggregates.c

Modified: 
clang/lib/CodeGen/ABIInfo.h
clang/lib/CodeGen/TargetInfo.cpp

Removed: 




diff  --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h
index 0d12183055e18..6214148adab93 100644
--- a/clang/lib/CodeGen/ABIInfo.h
+++ b/clang/lib/CodeGen/ABIInfo.h
@@ -100,6 +100,7 @@ namespace swiftcall {
 
 virtual bool isHomogeneousAggregateSmallEnough(const Type *Base,
uint64_t Members) const;
+virtual bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const;
 
 bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
 uint64_t &Members) const;

diff  --git a/clang/lib/CodeGen/TargetInfo.cpp 
b/clang/lib/CodeGen/TargetInfo.cpp
index 5e97a946782ca..d481d1c2857bc 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -240,6 +240,11 @@ bool ABIInfo::isHomogeneousAggregateSmallEnough(const Type 
*Base,
   return false;
 }
 
+bool ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() const {
+  // For compatibility with GCC, ignore empty bitfields in C++ mode.
+  return getContext().getLangOpts().CPlusPlus;
+}
+
 LLVM_DUMP_METHOD void ABIArgInfo::dump() const {
   raw_ostream &OS = llvm::errs();
   OS << "(ABIArgInfo Kind=";
@@ -5213,8 +5218,7 @@ bool ABIInfo::isHomogeneousAggregate(QualType Ty, const 
Type *&Base,
   if (isEmptyRecord(getContext(), FT, true))
 continue;
 
-  // For compatibility with GCC, ignore empty bitfields in C++ mode.
-  if (getContext().getLangOpts().CPlusPlus &&
+  if (isZeroLengthBitfieldPermittedInHomogeneousAggregate() &&
   FD->isZeroLengthBitField(getContext()))
 continue;
 
@@ -5511,6 +5515,7 @@ class AArch64ABIInfo : public SwiftABIInfo {
   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
  uint64_t Members) const override;
+  bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override;
 
   bool isIllegalVectorType(QualType Ty) const;
 
@@ -5970,6 +5975,16 @@ bool 
AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
   return Members <= 4;
 }
 
+bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
+const {
+  // AAPCS64 says that the rule for whether something is a homogeneous
+  // aggregate is applied to the output of the data layout decision. So
+  // anything that doesn't affect the data layout also does not affect
+  // homogeneity. In particular, zero-length bitfields don't stop a struct
+  // being homogeneous.
+  return true;
+}
+
 Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
CodeGenFunction &CGF) const {
   ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true,
@@ -6339,6 +6354,7 @@ class ARMABIInfo : public SwiftABIInfo {
   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
  uint64_t Members) const override;
+  bool isZeroLengthBitfieldPermittedInHomogeneousAggregate() const override;
 
   bool isEffectivelyAAPCS_VFP(unsigned callConvention, bool acceptHalf) const;
 
@@ -7002,6 +7018,15 @@ bool ARMABIInfo::isHomogeneousAggregateSmallEnough(const 
Type *Base,
   return Members <= 4;
 }
 
+bool ARMABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate() const {
+  // AAPCS32 says that the rule for w

[clang] 9073b53 - [Clang,ARM] Add release note for D127197.

2022-06-10 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2022-06-10T15:19:33+01:00
New Revision: 9073b53e5d7f0bdc603a5c816300ac27644bc6a8

URL: 
https://github.com/llvm/llvm-project/commit/9073b53e5d7f0bdc603a5c816300ac27644bc6a8
DIFF: 
https://github.com/llvm/llvm-project/commit/9073b53e5d7f0bdc603a5c816300ac27644bc6a8.diff

LOG: [Clang,ARM] Add release note for D127197.

I should have put that in the original commit, but @lenary only just
reminded me that it needed to be there.

Added: 


Modified: 
clang/docs/ReleaseNotes.rst

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 1ead55633a09..5905fa2e917e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -458,6 +458,10 @@ ABI Changes in Clang
   such packing. Clang now matches the gcc behavior (except on Darwin and PS4).
   You can switch back to the old ABI behavior with the flag:
   ``-fclang-abi-compat=14.0``.
+- When compiling C for ARM or AArch64, a zero-length bitfield in a ``struct``
+  (e.g. ``int : 0``) no longer prevents the structure from being considered a
+  homogeneous floating-point or vector aggregate. The new behavior agrees with
+  the AAPCS specification, and matches the similar bug fix in GCC 12.1.
 
 OpenMP Support in Clang
 ---



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 45a9945 - [ARM, MVE] Add ACLE intrinsics for the vminv/vmaxv family.

2020-03-20 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-03-20T15:42:33Z
New Revision: 45a9945b9ea95bd065d3c4e08d9089a309b24a23

URL: 
https://github.com/llvm/llvm-project/commit/45a9945b9ea95bd065d3c4e08d9089a309b24a23
DIFF: 
https://github.com/llvm/llvm-project/commit/45a9945b9ea95bd065d3c4e08d9089a309b24a23.diff

LOG: [ARM,MVE] Add ACLE intrinsics for the vminv/vmaxv family.

Summary:
I've implemented these as target-specific IR intrinsics, because
they're not //quite// enough like @llvm.experimental.vector.reduce.min
(which doesn't take the extra scalar parameter). Also this keeps the
predicated and unpredicated versions looking similar, and the
floating-point minnm/maxnm versions fold into the same schema.

We had a couple of min/max reductions already implemented, from the
initial pathfinding exercise in D67158. Those were done by having
separate IR intrinsic names for the signed and unsigned integer
versions; as part of this commit, I've changed them to use a flag
parameter indicating signedness, which is how we ended up deciding
that the rest of the MVE intrinsics family ought to work. So now
hopefully the ewhole lot is consistent.

In the new llc test, the output code from the `v8f16` test functions
looks quite unpleasant, but most of it is PCS lowering (you can't pass
a `half` directly in or out of a function). In other circumstances,
where you do something else with your `half` in the same function, it
doesn't look nearly as nasty.

Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard

Reviewed By: MarkMurrayARM

Subscribers: kristof.beyls, hiraditya, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D76490

Added: 


Modified: 
clang/include/clang/Basic/arm_mve.td
clang/test/CodeGen/arm-mve-intrinsics/vminvq.c
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMInstrMVE.td
llvm/test/CodeGen/Thumb2/mve-intrinsics/vminvq.ll

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 45e45899de5f..d32f7fd92f2c 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -536,11 +536,42 @@ let params = T.Float in {
 (IRInt<"vmaxnma_predicated", 
[Vector,Predicate]> $a, $b, $pred)>;
 }
 
+multiclass Reduction basetypes,
+ bit needSign = 0,
+ dag postCG = (seq (id $ret)),
+ dag accArg = (args Accumulator:$prev),
+ dag preCG = (seq)> {
+  defvar intArgsBase   = (? $prev, $vec);
+  defvar intArgsUnpred = !con(intArgsBase,
+  !if(needSign, (? (unsignedflag Scalar)), (?)));
+  defvar intArgsPred   = !con(intArgsUnpred, (? $pred));
+  defvar intUnpred = !setop(intArgsUnpred, IRInt);
+  defvar intPred   = !setop(intArgsPred, IRInt<
+basename#"_predicated", !listconcat(basetypes, [Predicate])>);
+
+  def "": Intrinsic<
+Accumulator, !con(accArg, (args Vector:$vec)),
+!con(preCG, (seq intUnpred:$ret), postCG)>;
+  def _p: Intrinsic<
+Accumulator, !con(accArg, (args Vector:$vec, Predicate:$pred)),
+!con(preCG, (seq intPred:$ret), postCG)>;
+}
+
 let params = T.Int in {
-def vminvq: Intrinsic $prev, $vec))>;
-def vmaxvq: Intrinsic $prev, $vec))>;
+defm vminvq: Reduction;
+defm vmaxvq: Reduction;
+}
+
+let params = T.Signed in {
+defm vminavq: Reduction;
+defm vmaxavq: Reduction;
+}
+
+let params = T.Float in {
+defm vminnmvq: Reduction;
+defm vmaxnmvq: Reduction;
+defm vminnmavq: Reduction;
+defm vmaxnmavq: Reduction;
 }
 
 foreach half = [ "b", "t" ] in {

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c
index 1cf4d0ee198e..0d484bf98f7a 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vminvq.c
@@ -1,97 +1,853 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | 
FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa 
| FileCheck %s

[clang] 1adfa4c - [ARM, MVE] Add ACLE intrinsics for the vaddv/vaddlv family.

2020-03-20 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-03-20T15:42:33Z
New Revision: 1adfa4c99169733dedb67b4f7ab03d2fbb196162

URL: 
https://github.com/llvm/llvm-project/commit/1adfa4c99169733dedb67b4f7ab03d2fbb196162
DIFF: 
https://github.com/llvm/llvm-project/commit/1adfa4c99169733dedb67b4f7ab03d2fbb196162.diff

LOG: [ARM,MVE] Add ACLE intrinsics for the vaddv/vaddlv family.

Summary:
I've implemented them as target-specific IR intrinsics rather than
using `@llvm.experimental.vector.reduce.add`, on the grounds that the
'experimental' intrinsic doesn't currently have much code generation
benefit, and my replacements encapsulate the sign- or zero-extension
so that you don't expose the illegal MVE vector type (`<4 x i64>`) in
IR.

The machine instructions come in two versions: with and without an
input accumulator. My new IR intrinsics, like the 'experimental' one,
don't take an accumulator parameter: we represent that by just adding
on the input value using an ordinary i32 or i64 add. So if you write
the `vaddvaq` C-language intrinsic with an input accumulator of zero,
it can be optimised to VADDV, and conversely, if you write something
like `x += vaddvq(y)` then that can be combined into VADDVA.

Most of this is achieved in isel lowering, by converting these IR
intrinsics into the existing `ARMISD::VADDV` family of custom SDNode
types. For the difficult case (64-bit accumulators), isel lowering
already implements the optimization of folding an addition into a
VADDLV to make a VADDLVA; so once we've made a VADDLV, our job is
already done, except that I had to introduce a parallel set of ARMISD
nodes for the //predicated// forms of VADDLV.

For the simpler VADDV, we handle the predicated form by just leaving
the IR intrinsic alone and matching it in an ordinary dag pattern.

Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, danielkiss, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D76491

Added: 
clang/test/CodeGen/arm-mve-intrinsics/vaddv.c
llvm/test/CodeGen/Thumb2/mve-intrinsics/vaddv.ll

Modified: 
clang/include/clang/Basic/arm_mve.td
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMISelLowering.h
llvm/lib/Target/ARM/ARMInstrMVE.td

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index d32f7fd92f2c..25daae2a0a25 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -1445,6 +1445,33 @@ multiclass MVEBinaryVectorHoriz64R {
"vrmlldavha">;
 }
 
+multiclass VADDV {
+  defvar accArg = !if(acc, (args Scalar:$acc), (args));
+  defvar predArg = !if(pred, (args Predicate:$pred), (args));
+  defvar intrinsic = !if(pred,
+  IRInt,
+  IRInt);
+  defvar intCG = !con((intrinsic $v, (unsignedflag Scalar)),
+  !if(pred, (? $pred), (?)));
+  defvar accCG = !if(acc, (add intCG, $acc), intCG);
+
+  def "": Intrinsic;
+}
+
+let params = T.Int in {
+defm vaddvq: VADDV<0, 0, "addv", Scalar32>;
+defm vaddvaq   : VADDV<1, 0, "addv", Scalar32>;
+defm vaddvq_p  : VADDV<0, 1, "addv", Scalar32>;
+defm vaddvaq_p : VADDV<1, 1, "addv", Scalar32>;
+}
+
+let params = [s32, u32] in {
+defm vaddlvq: VADDV<0, 0, "addlv", Scalar64>;
+defm vaddlvaq   : VADDV<1, 0, "addlv", Scalar64>;
+defm vaddlvq_p  : VADDV<0, 1, "addlv", Scalar64>;
+defm vaddlvaq_p : VADDV<1, 1, "addlv", Scalar64>;
+}
+
 let params = T.Int in {
 def vabavq : Intrinsic (unsignedflag Scalar), $a, $b, $c)>;

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vaddv.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vaddv.c
new file mode 100644
index ..6bacc2775881
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vaddv.c
@@ -0,0 +1,470 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+ // RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve 
-mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S 
-mem2reg | FileCheck %s
+ // RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve 
-mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | 
opt -S -mem2reg | FileCheck %s
+
+#include 
+
+// CHECK-LABEL: @test_vaddvq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call i32 @llvm.arm.mve.addv.v16i8(<16 x i8> 
[[A:%.*]], i32 0)
+// CHECK-NEXT:ret i32 [[TMP0]]
+//
+int32_t test_vaddvq_s8(int8x16_t a) {
+#ifdef POLYMORPHIC
+  return vaddvq(a);
+#else  /* POLYMORPHIC */
+  return vaddvq_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vaddvq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call i32 @llvm.arm.mve.addv.v8i16(<8 x i16> 
[[A:%.*]], i32 0)
+// CHECK-NEXT:ret i32 [[TMP0]]
+//
+int32_t test_vaddvq_s16(in

[clang] f282b6a - [ReleaseNotes, ARM] MVE intrinsics are all implemented!

2020-03-24 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-03-24T11:42:25Z
New Revision: f282b6ab23a0f6ede0f1c8b6ccb5ad3c17a5ed2f

URL: 
https://github.com/llvm/llvm-project/commit/f282b6ab23a0f6ede0f1c8b6ccb5ad3c17a5ed2f
DIFF: 
https://github.com/llvm/llvm-project/commit/f282b6ab23a0f6ede0f1c8b6ccb5ad3c17a5ed2f.diff

LOG: [ReleaseNotes,ARM] MVE intrinsics are all implemented!

Summary:
The next release of LLVM will support the full ACLE spec for MVE intrinsics,
so it's worth saying so in the release notes.

Reviewers: kristof.beyls

Reviewed By: kristof.beyls

Subscribers: cfe-commits, hans, dmgreen, llvm-commits

Tags: #llvm, #clang

Differential Revision: https://reviews.llvm.org/D76513

Added: 


Modified: 
clang/docs/ReleaseNotes.rst
llvm/docs/ReleaseNotes.rst

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 9f20c271b50e..ad13fb1b3e95 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -57,6 +57,10 @@ Improvements to Clang's diagnostics
 Non-comprehensive list of changes in this release
 -
 
+- For the ARM target, C-language intrinsics are now provided for the full Arm
+  v8.1-M MVE instruction set.  supports the complete API defined
+  in the Arm C Language Extensions.
+
 
 New Compiler Flags
 --

diff  --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index bbfcc6076c01..4f6e759bbeb3 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -72,6 +72,9 @@ Changes to the ARM Backend
 
 During this release ...
 
+* Implemented C-language intrinsics for the full Arm v8.1-M MVE instruction
+  set.  now supports the complete API defined in the Arm C
+  Language Extensions.
 
 Changes to the MIPS Target
 --



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 8f1651c - [ARM, MVE] Add missing tests for vqdmlash intrinsics.

2020-03-25 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-03-25T09:46:16Z
New Revision: 8f1651ccead149fbd2e6fe692fb8a7f787a222bd

URL: 
https://github.com/llvm/llvm-project/commit/8f1651ccead149fbd2e6fe692fb8a7f787a222bd
DIFF: 
https://github.com/llvm/llvm-project/commit/8f1651ccead149fbd2e6fe692fb8a7f787a222bd.diff

LOG: [ARM,MVE] Add missing tests for vqdmlash intrinsics.

Summary:
These were accidentally left out of D76123. I added tests for the
other three instructions in this small cross-product family (vqdmlah,
vqrdmlah, vqrdmlash) but missed this one.

Reviewers: miyuki

Reviewed By: miyuki

Subscribers: kristof.beyls, dmgreen, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D76714

Added: 


Modified: 
clang/test/CodeGen/arm-mve-intrinsics/ternary.c
llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll

Removed: 




diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c 
b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c
index 90e258715d26..77eb8d41fe58 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c
@@ -357,6 +357,47 @@ int32x4_t test_vqdmlahq_n_s32(int32x4_t a, int32x4_t b, 
int32_t c) {
 #endif /* POLYMORPHIC */
 }
 
+// CHECK-LABEL: @test_vqdmlashq_n_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = zext i8 [[ADD:%.*]] to i32
+// CHECK-NEXT:[[TMP1:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlash.v16i8(<16 x i8> [[M1:%.*]], <16 x i8> [[M2:%.*]], i32 
[[TMP0]])
+// CHECK-NEXT:ret <16 x i8> [[TMP1]]
+//
+int8x16_t test_vqdmlashq_n_s8(int8x16_t m1, int8x16_t m2, int8_t add) {
+#ifdef POLYMORPHIC
+  return vqdmlashq(m1, m2, add);
+#else  /* POLYMORPHIC */
+  return vqdmlashq_n_s8(m1, m2, add);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqdmlashq_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[ADD:%.*]] to i32
+// CHECK-NEXT:[[TMP1:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqdmlash.v8i16(<8 x i16> [[M1:%.*]], <8 x i16> [[M2:%.*]], i32 
[[TMP0]])
+// CHECK-NEXT:ret <8 x i16> [[TMP1]]
+//
+int16x8_t test_vqdmlashq_n_s16(int16x8_t m1, int16x8_t m2, int16_t add) {
+#ifdef POLYMORPHIC
+  return vqdmlashq(m1, m2, add);
+#else  /* POLYMORPHIC */
+  return vqdmlashq_n_s16(m1, m2, add);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqdmlashq_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqdmlash.v4i32(<4 x i32> [[M1:%.*]], <4 x i32> [[M2:%.*]], i32 
[[ADD:%.*]])
+// CHECK-NEXT:ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vqdmlashq_n_s32(int32x4_t m1, int32x4_t m2, int32_t add) {
+#ifdef POLYMORPHIC
+  return vqdmlashq(m1, m2, add);
+#else  /* POLYMORPHIC */
+  return vqdmlashq_n_s32(m1, m2, add);
+#endif /* POLYMORPHIC */
+}
+
 // CHECK-LABEL: @test_vqrdmlahq_n_s8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
@@ -810,6 +851,53 @@ int32x4_t test_vqdmlahq_m_n_s32(int32x4_t a, int32x4_t b, 
int32_t c, mve_pred16_
 #endif /* POLYMORPHIC */
 }
 
+// CHECK-LABEL: @test_vqdmlashq_m_n_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = zext i8 [[ADD:%.*]] to i32
+// CHECK-NEXT:[[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:[[TMP2:%.*]] = call <16 x i1> 
@llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
+// CHECK-NEXT:[[TMP3:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlash.predicated.v16i8.v16i1(<16 x i8> [[M1:%.*]], <16 x i8> 
[[M2:%.*]], i32 [[TMP0]], <16 x i1> [[TMP2]])
+// CHECK-NEXT:ret <16 x i8> [[TMP3]]
+//
+int8x16_t test_vqdmlashq_m_n_s8(int8x16_t m1, int8x16_t m2, int8_t add, 
mve_pred16_t p) {
+#ifdef POLYMORPHIC
+  return vqdmlashq_m(m1, m2, add, p);
+#else  /* POLYMORPHIC */
+  return vqdmlashq_m_n_s8(m1, m2, add, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqdmlashq_m_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[ADD:%.*]] to i32
+// CHECK-NEXT:[[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:[[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 
[[TMP1]])
+// CHECK-NEXT:[[TMP3:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqdmlash.predicated.v8i16.v8i1(<8 x i16> [[M1:%.*]], <8 x i16> 
[[M2:%.*]], i32 [[TMP0]], <8 x i1> [[TMP2]])
+// CHECK-NEXT:ret <8 x i16> [[TMP3]]
+//
+int16x8_t test_vqdmlashq_m_n_s16(int16x8_t m1, int16x8_t m2, int16_t add, 
mve_pred16_t p) {
+#ifdef POLYMORPHIC
+  return vqdmlashq_m(m1, m2, add, p);
+#else  /* POLYMORPHIC */
+  return vqdmlashq_m_n_s16(m1, m2, add, p);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqdmlashq_m_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:[[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
+// CHECK-NEXT:[[TMP2:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqdmlash.predicated.v4i32.v4i1(<4 x i32> [[M1:%.*]], <4 x i32> 
[[M2:%.*]], i32 [[ADD:%.*]], <4 x 

[clang] 8c26f42 - [clang, ARM, MVE] Remove redundant #includes in test file.

2020-02-27 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-02-27T09:39:35Z
New Revision: 8c26f42fe90e3f8612d2f57a3c9c5e7fcff5e91e

URL: 
https://github.com/llvm/llvm-project/commit/8c26f42fe90e3f8612d2f57a3c9c5e7fcff5e91e
DIFF: 
https://github.com/llvm/llvm-project/commit/8c26f42fe90e3f8612d2f57a3c9c5e7fcff5e91e.diff

LOG: [clang,ARM,MVE] Remove redundant #includes in test file.

I made that file by pasting together several pieces, and forgot to
take out the #include  from the tops of the later ones, so
the test was pointlessly including the same header five times. NFC.

Added: 


Modified: 
clang/test/CodeGen/arm-mve-intrinsics/absneg.c

Removed: 




diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c 
b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c
index 94339c834809..4f888093d8b8 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c
@@ -527,7 +527,6 @@ int32x4_t test_vqnegq_s32(int32x4_t a)
 return vqnegq_s32(a);
 #endif /* POLYMORPHIC */
 }
-#include 
 
 // CHECK-LABEL: @test_vnegq_m_f16(
 // CHECK-NEXT:  entry:
@@ -689,8 +688,6 @@ int32x4_t test_vnegq_x_s32(int32x4_t a, mve_pred16_t p)
 #endif /* POLYMORPHIC */
 }
 
-#include 
-
 // CHECK-LABEL: @test_vabsq_m_f16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
@@ -851,8 +848,6 @@ int32x4_t test_vabsq_x_s32(int32x4_t a, mve_pred16_t p)
 #endif /* POLYMORPHIC */
 }
 
-#include 
-
 // CHECK-LABEL: @test_vqnegq_m_s8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
@@ -901,8 +896,6 @@ int32x4_t test_vqnegq_m_s32(int32x4_t inactive, int32x4_t 
a, mve_pred16_t p)
 #endif /* POLYMORPHIC */
 }
 
-#include 
-
 // CHECK-LABEL: @test_vqabsq_m_s8(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] a41ecf0 - [ARM, MVE] Add ACLE intrinsics for VQMOV[U]N family.

2020-03-02 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-03-02T10:33:30Z
New Revision: a41ecf0eb05190c8597f98b8d41d7a6e678aec0b

URL: 
https://github.com/llvm/llvm-project/commit/a41ecf0eb05190c8597f98b8d41d7a6e678aec0b
DIFF: 
https://github.com/llvm/llvm-project/commit/a41ecf0eb05190c8597f98b8d41d7a6e678aec0b.diff

LOG: [ARM,MVE] Add ACLE intrinsics for VQMOV[U]N family.

Summary:
These instructions work like VMOVN (narrowing a vector of wide values
to half size, and overwriting every other lane of an output register
with the result), except that the narrowing conversion is saturating.
They come in three signedness flavours: signed to signed, unsigned to
unsigned, and signed to unsigned. All are represented in IR by a
target-specific intrinsic that takes two separate 'unsigned' flags.

Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D75252

Added: 
clang/test/CodeGen/arm-mve-intrinsics/vqmovn.c
llvm/test/CodeGen/Thumb2/mve-intrinsics/vqmovn.ll

Modified: 
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMInstrMVE.td

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index efc6be1158b8..c64a75ffeb8e 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -514,6 +514,33 @@ defm vmovntq: vmovn<1, (zip (vreinterpret $inactive, 
Vector), $a)>;
 defm vmovnbq: vmovn<0,
(zip $a, (vreinterpret (vrev $inactive, (bitsize Scalar)), Vector))>;
 
+multiclass vqmovn {
+  defvar RetVector = VecOf;
+
+  let params = [s16, u16, s32, u32] in {
+def : Intrinsic<
+  RetVector, (args RetVector:$inactive, Vector:$a),
+  (IRInt<"vqmovn", [RetVector, Vector]>
+  $inactive, $a, (unsignedflag RetScalar), (unsignedflag Scalar), 
top)>,
+  NameOverride;
+def: Intrinsic<
+  RetVector, (args RetVector:$inactive, Vector:$a, Predicate:$pred),
+  (IRInt<"vqmovn_predicated", [RetVector, Vector, Predicate]>
+  $inactive, $a, (unsignedflag RetScalar), (unsignedflag Scalar),
+  top, $pred)>,
+  NameOverride;
+  }
+}
+
+let params = [s16, s32, u16, u32] in {
+  defm vqmovntq: vqmovn<1, HalfScalar>;
+  defm vqmovnbq: vqmovn<0, HalfScalar>;
+}
+let params = [s16, s32] in {
+  defm vqmovuntq: vqmovn<1, UHalfScalar>;
+  defm vqmovunbq: vqmovn<0, UHalfScalar>;
+}
+
 multiclass vrnd {
   let params = T.Float in {
 def "": Intrinsic;

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index dbcad78cce75..daf73871f052 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -323,8 +323,10 @@ def SVector: VecOf;
 // UHalfVector is a vector of half-sized _unsigned integers_.
 def DblVector: VecOf>;
 def DblPredicate: PredOf>;
-def HalfVector: VecOf>;
-def UHalfVector: VecOf>>;
+def HalfScalar: HalfSize;
+def HalfVector: VecOf;
+def UHalfScalar: Unsigned>;
+def UHalfVector: VecOf;
 
 // Expands to the 32-bit integer of the same signedness as Scalar.
 def Scalar32: CopyKind;

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vqmovn.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vqmovn.c
new file mode 100644
index ..24c3fd550bf4
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vqmovn.c
@@ -0,0 +1,366 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve 
-mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S 
-emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -DPOLYMORPHIC -triple thumbv8.1m.main-arm-none-eabi 
-target-feature +mve -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include 
+
+// CHECK-LABEL: @test_vqmovnbq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqmovn.v16i8.v8i16(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 
0, i32 0, i32 0)
+// CHECK-NEXT:ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vqmovnbq_s16(int8x16_t a, int16x8_t b)
+{
+#ifdef POLYMORPHIC
+return vqmovnbq(a, b);
+#else /* POLYMORPHIC */
+return vqmovnbq_s16(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqmovnbq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqmovn.v8i16.v4i32(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 
0, i32 0, i32 0)
+// CHECK-NEXT:ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vqmovnbq_s32(int16x8_t a, int32x4_t b)
+{
+#ifdef POLYMORPHIC
+return vqmovnbq(a, b);
+#else /* POLYMORPHIC */
+return 

[clang] 1a8cbfa - [ARM, MVE] Add ACLE intrinsics for VCVT[ANPM] family.

2020-03-02 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-03-02T10:33:30Z
New Revision: 1a8cbfa514ff83ac62c20deec0d9ea2c6606bbdf

URL: 
https://github.com/llvm/llvm-project/commit/1a8cbfa514ff83ac62c20deec0d9ea2c6606bbdf
DIFF: 
https://github.com/llvm/llvm-project/commit/1a8cbfa514ff83ac62c20deec0d9ea2c6606bbdf.diff

LOG: [ARM,MVE] Add ACLE intrinsics for VCVT[ANPM] family.

Summary:
These instructions convert a vector of floats to a vector of integers
of the same size, with assorted non-default rounding modes.
Implemented in IR as target-specific intrinsics, because as far as I
can see there are no matches for that functionality in the standard IR
intrinsics list.

Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D75255

Added: 
clang/test/CodeGen/arm-mve-intrinsics/vcvt_anpm.c
llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt_anpm.ll

Modified: 
clang/include/clang/Basic/arm_mve.td
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMInstrMVE.td

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index dfdb101d587f..c1cc10b09dc6 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -482,11 +482,25 @@ multiclass float_int_conversions,
 NameOverride<"vcvtq_" # IScalar>;
+
+  foreach suffix = ["a","n","p","m"] in
+def : Intrinsic
+(unsignedflag IScalar), $a)>,
+  NameOverride<"vcvt"#suffix#"q_" # IScalar>;
 }
 defm vcvtq: IntrinsicMX
 $a, (unsignedflag IScalar), $pred, $inactive),
 1, "_" # IScalar, PNT_2Type, PNT_None>;
+
+foreach suffix = ["a","n","p","m"] in {
+  defm "vcvt"#suffix#"q" : IntrinsicMX<
+  IVector, (args FVector:$a, Predicate:$pred),
+  (IRInt<"vcvt"#suffix#"_predicated", [IVector, FVector, Predicate]>
+  (unsignedflag IScalar), $inactive, $a, $pred),
+  1, "_" # IScalar, PNT_2Type, PNT_None>;
+}
   }
 }
 

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt_anpm.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vcvt_anpm.c
new file mode 100644
index ..e5dbd4c8f68b
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt_anpm.c
@@ -0,0 +1,614 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -DPOLYMORPHIC -triple thumbv8.1m.main-arm-none-eabi 
-target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include 
+
+// CHECK-LABEL: @test_vcvtaq_s16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> 
@llvm.arm.mve.vcvta.v8i16.v8f16(i32 0, <8 x half> [[A:%.*]])
+// CHECK-NEXT:ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vcvtaq_s16_f16(float16x8_t a)
+{
+return vcvtaq_s16_f16(a);
+}
+
+// CHECK-LABEL: @test_vcvtaq_s32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vcvta.v4i32.v4f32(i32 0, <4 x float> [[A:%.*]])
+// CHECK-NEXT:ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vcvtaq_s32_f32(float32x4_t a)
+{
+return vcvtaq_s32_f32(a);
+}
+
+// CHECK-LABEL: @test_vcvtaq_u16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> 
@llvm.arm.mve.vcvta.v8i16.v8f16(i32 1, <8 x half> [[A:%.*]])
+// CHECK-NEXT:ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vcvtaq_u16_f16(float16x8_t a)
+{
+return vcvtaq_u16_f16(a);
+}
+
+// CHECK-LABEL: @test_vcvtaq_u32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vcvta.v4i32.v4f32(i32 1, <4 x float> [[A:%.*]])
+// CHECK-NEXT:ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vcvtaq_u32_f32(float32x4_t a)
+{
+return vcvtaq_u32_f32(a);
+}
+
+// CHECK-LABEL: @test_vcvtmq_s16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> 
@llvm.arm.mve.vcvtm.v8i16.v8f16(i32 0, <8 x half> [[A:%.*]])
+// CHECK-NEXT:ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vcvtmq_s16_f16(float16x8_t a)
+{
+return vcvtmq_s16_f16(a);
+}
+
+// CHECK-LABEL: @test_vcvtmq_s32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vcvtm.v4i32.v4f32(i32 0, <4 x float> [[A:%.*]])
+// CHECK-NEXT:ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vcvtmq_s32_f32(float32x4_t a)
+{
+return vcvtmq_s32_f32(a);
+}
+
+// CHECK-LABEL: @test_vcvtmq_u16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> 
@llvm.arm.mve.vcvtm.v

[clang] b08d2dd - [ARM, MVE] Add ACLE intrinsics for VCVT.F32.F16 family.

2020-03-02 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-03-02T10:33:30Z
New Revision: b08d2ddd69b4a2209930b31fe456b4d7c1ce148f

URL: 
https://github.com/llvm/llvm-project/commit/b08d2ddd69b4a2209930b31fe456b4d7c1ce148f
DIFF: 
https://github.com/llvm/llvm-project/commit/b08d2ddd69b4a2209930b31fe456b4d7c1ce148f.diff

LOG: [ARM,MVE] Add ACLE intrinsics for VCVT.F32.F16 family.

Summary:
These instructions make a vector of `<4 x float>` by widening every
other lane of a vector of `<8 x half>`.

I wondered about representing these using standard IR, along the lines
of a shufflevector to extract elements of the input into a `<4 x half>`
followed by an `fpext` to turn that into `<4 x float>`. But it looks as
if that would take a lot of work in isel lowering to make it match any
pattern I could sensibly write in Tablegen, and also I haven't been
able to think of any other case where that pattern might be generated
in IR, so there wouldn't be any extra code generation win from doing
it that way.

Therefore, I've just used another target-specific intrinsic. We can
always change it to the other way later if anyone thinks of a good
reason.

(In order to put the intrinsic definition near similar things in
`IntrinsicsARM.td`, I've also lifted the definition of the
`MVEMXPredicated` multiclass higher up the file, without changing it.)

Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard

Reviewed By: miyuki

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D75254

Added: 


Modified: 
clang/include/clang/Basic/arm_mve.td
clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMInstrMVE.td
llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt.ll

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index c64a75ffeb8e..dfdb101d587f 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -453,6 +453,15 @@ foreach half = [ "b", "t" ] in {
   VecOf, (args VecOf:$inactive, Vector:$a, PredOf:$pred),
   (IRInt<"vcvt_narrow_predicated"> $inactive, $a, halfconst, $pred)>;
   } // params = [f32], pnt = PNT_None
+
+  let params = [f16], pnt = PNT_None in {
+def vcvt#half#q_f32: Intrinsic, (args Vector:$a),
+  (IRInt<"vcvt_widen"> $a, halfconst)>;
+defm vcvt#half#q: IntrinsicMX<
+  VecOf, (args Vector:$a, PredOf:$pred),
+  (IRInt<"vcvt_widen_predicated"> $inactive, $a, halfconst, $pred),
+  1, "_f32">;
+  } // params = [f16], pnt = PNT_None
 } // loop over half = "b", "t"
 
 multiclass float_int_conversions {

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
index 0391b77e365f..d03ac31a8024 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
@@ -697,3 +697,71 @@ uint32x4_t test_vcvtq_x_n_u32_f32(float32x4_t a, 
mve_pred16_t p)
 {
 return vcvtq_x_n_u32_f32(a, 32, p);
 }
+
+// CHECK-LABEL: @test_vcvtbq_f32_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen(<8 
x half> [[A:%.*]], i32 0)
+// CHECK-NEXT:ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vcvtbq_f32_f16(float16x8_t a)
+{
+return vcvtbq_f32_f16(a);
+}
+
+// CHECK-LABEL: @test_vcvttq_f32_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.widen(<8 
x half> [[A:%.*]], i32 1)
+// CHECK-NEXT:ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vcvttq_f32_f16(float16x8_t a)
+{
+return vcvttq_f32_f16(a);
+}
+
+// CHECK-LABEL: @test_vcvtbq_m_f32_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:[[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
+// CHECK-NEXT:[[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.vcvt.widen.predicated(<4 x float> [[INACTIVE:%.*]], <8 x half> 
[[A:%.*]], i32 0, <4 x i1> [[TMP1]])
+// CHECK-NEXT:ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvtbq_m_f32_f16(float32x4_t inactive, float16x8_t a, 
mve_pred16_t p)
+{
+return vcvtbq_m_f32_f16(inactive, a, p);
+}
+
+// CHECK-LABEL: @test_vcvttq_m_f32_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
+// CHECK-NEXT:[[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 
[[TMP0]])
+// CHECK-NEXT:[[TMP2:%.*]] = call <4 x float> 
@llvm.arm.mve.vcvt.widen.predicated(<4 x float> [[INACTIVE:%.*]], <8 x half> 
[[A:%.*]], i32 1, <4 x i1> [[TMP1]])
+// CHECK-NEXT:ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcvttq_m_f32_f16(float32x4_t inactive, float16x8_t a, 
mve_pred16_t p)
+{
+return vcvttq_m_f32_f16(inactive, a, p);
+}
+
+// CHECK-LABEL: @test_vcvtbq_x_f32_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]]

[clang] 810127f - [ARM,MVE] Add the `vsbciq` intrinsics.

2020-03-04 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-03-04T08:49:27Z
New Revision: 810127f6ab5d5d7e7d6b8c3ae0b96f2027437ca8

URL: 
https://github.com/llvm/llvm-project/commit/810127f6ab5d5d7e7d6b8c3ae0b96f2027437ca8
DIFF: 
https://github.com/llvm/llvm-project/commit/810127f6ab5d5d7e7d6b8c3ae0b96f2027437ca8.diff

LOG: [ARM,MVE] Add the `vsbciq` intrinsics.

Summary:
These are exactly parallel to the existing `vadciq` intrinsics, which
we implemented last year as part of the original MVE intrinsics
framework setup.

Just like VADC/VADCI, the MVE VSBC/VSBCI instructions deliver two
outputs, both of which the intrinsic exposes: a modified vector
register and a carry flag. So they have to be instruction-selected in
C++ rather than Tablegen. However, in this case, that's trivial: the
same C++ isel routine we already have for VADC works unchanged, and
all we have to do is to pass it a different instruction id.

Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard

Reviewed By: miyuki

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D75444

Added: 


Modified: 
clang/include/clang/Basic/arm_mve.td
clang/test/CodeGen/arm-mve-intrinsics/vadc.c
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/test/CodeGen/Thumb2/mve-intrinsics/vadc.ll

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index c1cc10b09dc6..fd04dfde95c2 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -1139,27 +1139,31 @@ defm sqrshr: ScalarSaturatingShiftReg;
 def lsll: LongScalarShift $lo, $hi, $sh)>;
 def asrl: LongScalarShift $lo, $hi, $sh)>;
 
+multiclass vadcsbc {
+  def q: Intrinsic:$carry),
+  (seq (IRInt $a, $b, (shl (load $carry), 29)):$pair,
+   (store (and 1, (lshr (xval $pair, 1), 29)), $carry),
+   (xval $pair, 0))>;
+  def iq: Intrinsic:$carry),
+  (seq (IRInt $a, $b, 0):$pair,
+   (store (and 1, (lshr (xval $pair, 1), 29)), $carry),
+   (xval $pair, 0))>;
+  def q_m: Intrinsic:$carry, Predicate:$pred),
+  (seq (IRInt $inactive, $a, $b,
+   (shl (load $carry), 29), $pred):$pair,
+   (store (and 1, (lshr (xval $pair, 1), 29)), $carry),
+   (xval $pair, 0))>;
+  def iq_m: Intrinsic:$carry, Predicate:$pred),
+  (seq (IRInt $inactive, $a, $b,
+   0, $pred):$pair,
+   (store (and 1, (lshr (xval $pair, 1), 29)), $carry),
+   (xval $pair, 0))>;
+}
 let params = T.Int32 in {
-def vadcq: Intrinsic:$carry),
-(seq (IRInt<"vadc", [Vector]> $a, $b, (shl (load $carry), 29)):$pair,
- (store (and 1, (lshr (xval $pair, 1), 29)), $carry),
- (xval $pair, 0))>;
-def vadciq: Intrinsic:$carry),
-(seq (IRInt<"vadc", [Vector]> $a, $b, 0):$pair,
- (store (and 1, (lshr (xval $pair, 1), 29)), $carry),
- (xval $pair, 0))>;
-def vadcq_m: Intrinsic:$carry, Predicate:$pred),
-(seq (IRInt<"vadc_predicated", [Vector, Predicate]> $inactive, $a, $b,
- (shl (load $carry), 29), $pred):$pair,
- (store (and 1, (lshr (xval $pair, 1), 29)), $carry),
- (xval $pair, 0))>;
-def vadciq_m: Intrinsic:$carry, Predicate:$pred),
-(seq (IRInt<"vadc_predicated", [Vector, Predicate]> $inactive, $a, $b,
- 0, $pred):$pair,
- (store (and 1, (lshr (xval $pair, 1), 29)), $carry),
- (xval $pair, 0))>;
+  defm vadc: vadcsbc;
+  defm vsbc: vadcsbc;
 }
 
 multiclass VectorComplexAddPred {

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vadc.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vadc.c
index 94fa1d1b00f2..f5e6c7d33983 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vadc.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vadc.c
@@ -87,3 +87,163 @@ int32x4_t test_vadcq_m_s32(int32x4_t inactive, int32x4_t a, 
int32x4_t b, unsigne
 return vadcq_m_s32(inactive, a, b, carry, p);
 #endif /* POLYMORPHIC */
 }
+
+// CHECK-LABEL: @test_vsbciq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call { <4 x i32>, i32 } 
@llvm.arm.mve.vsbc.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0)
+// CHECK-NEXT:[[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 1
+// CHECK-NEXT:[[TMP2:%.*]] = lshr i32 [[TMP1]], 29
+// CHECK-NEXT:[[TMP3:%.*]] = and i32 1, [[TMP2]]
+// CHECK-NEXT:store i32 [[TMP3]], i32* [[CARRY_OUT:%.*]], align 4
+// CHECK-NEXT:[[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
+// CHECK-NEXT:ret <4 x i32> [[TMP4]]
+//
+int32x4_t test_vsbciq_s32(int32x4_t a, int32x4_t b, unsigned *carry_out) {
+#ifdef POLYMORPHIC
+  return vsbciq(a, b, carry_out);
+#else  /* POLYMORPHIC */
+  return vsbciq_s32(a, b, carry_out);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsbciq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] 

[clang] 068b2f3 - [ARM,MVE] Add the `vshlcq` intrinsics.

2020-03-04 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-03-04T08:49:27Z
New Revision: 068b2f313c7d27d9f6445df12d4d45d2d8c00898

URL: 
https://github.com/llvm/llvm-project/commit/068b2f313c7d27d9f6445df12d4d45d2d8c00898
DIFF: 
https://github.com/llvm/llvm-project/commit/068b2f313c7d27d9f6445df12d4d45d2d8c00898.diff

LOG: [ARM,MVE] Add the `vshlcq` intrinsics.

Summary:
The VSHLC instruction performs a left shift of a whole vector register
by an immediate shift count up to 32, shifting in new bits at the low
end from a GPR and delivering the shifted-out bits from the high end
back into the same GPR.

Since the instruction produces two outputs (the shifted vector
register and the output GPR of shifted-out bits), it has to be
instruction-selected in C++ rather than Tablegen.

Reviewers: MarkMurrayARM, dmgreen, miyuki, ostannard

Reviewed By: miyuki

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D75445

Added: 
clang/test/CodeGen/arm-mve-intrinsics/vshlc.c
llvm/test/CodeGen/Thumb2/mve-intrinsics/vshlc.ll

Modified: 
clang/include/clang/Basic/arm_mve.td
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index fd04dfde95c2..d9a2035e8a0e 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -1166,6 +1166,22 @@ let params = T.Int32 in {
   defm vsbc: vadcsbc;
 }
 
+let params = T.Int in {
+  def vshlcq: Intrinsic<
+Vector, (args Vector:$v, Ptr:$ps, imm_1to32:$imm),
+(seq (load $ps):$s,
+ (IRInt<"vshlc", [Vector]> $v, $s, $imm):$pair,
+ (store (xval $pair, 0), $ps),
+ (xval $pair, 1))>;
+  def vshlcq_m: Intrinsic<
+Vector, (args Vector:$v, Ptr:$ps, imm_1to32:$imm, Predicate:$pred),
+(seq (load $ps):$s,
+ (IRInt<"vshlc_predicated", [Vector, Predicate]>
+  $v, $s, $imm, $pred):$pair,
+ (store (xval $pair, 0), $ps),
+ (xval $pair, 1))>;
+}
+
 multiclass VectorComplexAddPred {
   def "" : Intrinsic not_halving, angle, $a, $b)>;

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vshlc.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vshlc.c
new file mode 100644
index ..1a53a90f26fa
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vshlc.c
@@ -0,0 +1,221 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve 
-mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S 
-emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve 
-mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone 
-DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include 
+
+// CHECK-LABEL: @test_vshlcq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
+// CHECK-NEXT:[[TMP1:%.*]] = call { i32, <16 x i8> } 
@llvm.arm.mve.vshlc.v16i8(<16 x i8> [[A:%.*]], i32 [[TMP0]], i32 18)
+// CHECK-NEXT:[[TMP2:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 0
+// CHECK-NEXT:store i32 [[TMP2]], i32* [[B]], align 4
+// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { i32, <16 x i8> } [[TMP1]], 1
+// CHECK-NEXT:ret <16 x i8> [[TMP3]]
+//
+int8x16_t test_vshlcq_s8(int8x16_t a, uint32_t *b) {
+#ifdef POLYMORPHIC
+  return vshlcq(a, b, 18);
+#else  /* POLYMORPHIC */
+  return vshlcq_s8(a, b, 18);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlcq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
+// CHECK-NEXT:[[TMP1:%.*]] = call { i32, <8 x i16> } 
@llvm.arm.mve.vshlc.v8i16(<8 x i16> [[A:%.*]], i32 [[TMP0]], i32 16)
+// CHECK-NEXT:[[TMP2:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 0
+// CHECK-NEXT:store i32 [[TMP2]], i32* [[B]], align 4
+// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { i32, <8 x i16> } [[TMP1]], 1
+// CHECK-NEXT:ret <8 x i16> [[TMP3]]
+//
+int16x8_t test_vshlcq_s16(int16x8_t a, uint32_t *b) {
+#ifdef POLYMORPHIC
+  return vshlcq(a, b, 16);
+#else  /* POLYMORPHIC */
+  return vshlcq_s16(a, b, 16);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vshlcq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = load i32, i32* [[B:%.*]], align 4
+// CHECK-NEXT:[[TMP1:%.*]] = call { i32, <4 x i32> } 
@llvm.arm.mve.vshlc.v4i32(<4 x i32> [[A:%.*]], i32 [[TMP0]], i32 4)
+// CHECK-NEXT:[[TMP2:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 0
+// CHECK-NEXT:store i32 [[TMP2]], i32* [[B]], align 4
+// CHECK-NEXT:[[TMP3:%.*]] = extractvalue { i32, <4 x i32> } [[TMP1]], 1
+// CHECK-NEXT:ret <4 x i32> [[TMP3]]
+//
+int32x4_t test_vshlcq_s

[clang] 26bc7cb - [clang, MveEmitter] Fix sign/zero extension in range limits.

2019-11-06 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2019-11-06T09:01:42Z
New Revision: 26bc7cb05edd6bea4b9a1593baf0fbe9e45f54e4

URL: 
https://github.com/llvm/llvm-project/commit/26bc7cb05edd6bea4b9a1593baf0fbe9e45f54e4
DIFF: 
https://github.com/llvm/llvm-project/commit/26bc7cb05edd6bea4b9a1593baf0fbe9e45f54e4.diff

LOG: [clang,MveEmitter] Fix sign/zero extension in range limits.

In the code that generates Sema range checks on constant arguments, I
had a piece of code that checks the bounds specified in the Tablegen
intrinsic description against the range of the integer type being
tested. If the bounds are large enough to permit any value of the
integer type, you can omit the compile-time range check. (This case is
expected to come up in some of the bitwise operation intrinsics.)

But somehow I got my signed/unsigned check backwards (asking for the
signed min/max of an unsigned type and vice versa), and also made a
sign extension error in which a signed negative value gets
zero-extended. Now rewritten more sensibly, and it should get its
first sensible test from the next batch of intrinsics I'm planning to
add in D69791.

Reviewers: dmgreen

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D69789

Added: 


Modified: 
clang/utils/TableGen/MveEmitter.cpp

Removed: 




diff  --git a/clang/utils/TableGen/MveEmitter.cpp 
b/clang/utils/TableGen/MveEmitter.cpp
index 9c3328e3bbfb..1f9752261fbf 100644
--- a/clang/utils/TableGen/MveEmitter.cpp
+++ b/clang/utils/TableGen/MveEmitter.cpp
@@ -782,15 +782,14 @@ class ACLEIntrinsic {
   }
 
   llvm::APInt typelo, typehi;
-  if (cast(IA.ArgType)->kind() == ScalarTypeKind::UnsignedInt) 
{
-typelo = llvm::APInt::getSignedMinValue(IA.ArgType->sizeInBits());
-typehi = llvm::APInt::getSignedMaxValue(IA.ArgType->sizeInBits());
+  unsigned Bits = IA.ArgType->sizeInBits();
+  if (cast(IA.ArgType)->kind() == ScalarTypeKind::SignedInt) {
+typelo = llvm::APInt::getSignedMinValue(Bits).sext(128);
+typehi = llvm::APInt::getSignedMaxValue(Bits).sext(128);
   } else {
-typelo = llvm::APInt::getMinValue(IA.ArgType->sizeInBits());
-typehi = llvm::APInt::getMaxValue(IA.ArgType->sizeInBits());
+typelo = llvm::APInt::getMinValue(Bits).zext(128);
+typehi = llvm::APInt::getMaxValue(Bits).zext(128);
   }
-  typelo = typelo.sext(128);
-  typehi = typehi.sext(128);
 
   std::string Index = utostr(kv.first);
 



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 38f0165 - [ARM MVE] Remove accidental 64-bit vst2/vld2 intrinsics.

2019-11-06 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2019-11-06T09:01:42Z
New Revision: 38f016520f6edbfa7d059b60ac54e80dd955ada5

URL: 
https://github.com/llvm/llvm-project/commit/38f016520f6edbfa7d059b60ac54e80dd955ada5
DIFF: 
https://github.com/llvm/llvm-project/commit/38f016520f6edbfa7d059b60ac54e80dd955ada5.diff

LOG: [ARM MVE] Remove accidental 64-bit vst2/vld2 intrinsics.

ACLE defines no such intrinsic as vst2q_u64, and the MVE instruction
set has no corresponding instruction. But I had accidentally added
them to the fledgling  anyway, and if you used them, you'd
get a compiler crash.

Reviewers: dmgreen

Subscribers: kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D69788

Added: 


Modified: 
clang/include/clang/Basic/arm_mve.td

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index a760fdd87b1a..30a76511a3cd 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -18,7 +18,7 @@
 
 include "arm_mve_defs.td"
 
-let params = T.All in
+let params = T.Usual in
 foreach n = [ 2, 4 ] in {
   def "vst"#n#"q": Intrinsic, MultiVector),
  (CustomCodegen<"VST24"> n:$NumVectors,



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 902e845 - [ARM, MVE] Add intrinsics for 'administrative' vector operations.

2019-11-15 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2019-11-15T09:53:43Z
New Revision: 902e84556a51c70d95088aaa059ab9c494ab3516

URL: 
https://github.com/llvm/llvm-project/commit/902e84556a51c70d95088aaa059ab9c494ab3516
DIFF: 
https://github.com/llvm/llvm-project/commit/902e84556a51c70d95088aaa059ab9c494ab3516.diff

LOG: [ARM,MVE] Add intrinsics for 'administrative' vector operations.

This batch of intrinsics includes lots of things that move vector data
around or change its type without really affecting its value very
much. It includes the `vreinterpretq` family (cast one vector type to
another); `vuninitializedq` (create a vector of a given type with
don't-care contents); and `vcreateq` (make a 128-bit vector out of two
`uint64_t` halves).

These are all implemented using completely standard IR that's already
tested in existing LLVM unit tests, so I've just written a clang test
to check the IR is correct, and left it at that.

I've also added some richer infrastructure to the MveEmitter Tablegen
backend, to make it specify the exact integer type of integer
arguments passed to IR construction functions, and wrap those
arguments in a `static_cast` in the autogenerated C++. That was
necessary to prevent an overloading ambiguity when passing the integer
literal `0` to `IRBuilder::CreateInsertElement`, because otherwise, it
could mean either a null pointer `llvm::Value *` or a zero `uint64_t`.

Reviewers: ostannard, MarkMurrayARM, dmgreen

Subscribers: kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D70133

Added: 
clang/test/CodeGen/arm-mve-intrinsics/admin.c

Modified: 
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td
clang/utils/TableGen/MveEmitter.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index d2f877dda28e..c8501813d264 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -373,3 +373,44 @@ def vadciq_m: Intrinsic;
 }
+
+foreach desttype = T.All in {
+  // We want a vreinterpretq between every pair of supported vector types
+  // _except_ that there shouldn't be one from a type to itself.
+  //
+  // So this foldl expression implements what you'd write in Python as
+  // [srctype for srctype in T.All if srctype != desttype]
+  let params = !foldl([], T.All, tlist, srctype, !listconcat(tlist,
+  !if(!eq(!cast(desttype),!cast(srctype)),[],[srctype])))
+  in {
+def "vreinterpretq_" # desttype: Intrinsic<
+VecOf, (args Vector:$x), (bitcast $x, VecOf)>;
+  }
+}
+
+let params = T.All in {
+  let pnt = PNT_None in {
+def vcreateq: Intrinsic), $a, 0),
+ $b, 1), Vector)>;
+def vuninitializedq: Intrinsic;
+  }
+
+  // This is the polymorphic form of vuninitializedq, which takes no type
+  // suffix, but takes an _unevaluated_ vector parameter and returns an
+  // uninitialized vector of the same vector type.
+  //
+  // This intrinsic has no _non_-polymorphic form exposed to the user. But each
+  // separately typed version of it still has to have its own clang builtin id,
+  // which can't be called vuninitializedq_u32 or similar because that would
+  // collide with the explicit nullary versions above. So I'm calling them
+  // vuninitializedq_polymorphic_u32 (and so on) for builtin id purposes; that
+  // full name never appears in the header file due to the polymorphicOnly
+  // flag, and the _polymorphic suffix is omitted from the shortened name by
+  // the custom PolymorphicNameType here.
+  let polymorphicOnly = 1, nonEvaluating = 1,
+  pnt = PolymorphicNameType<1, "polymorphic"> in {
+def vuninitializedq_polymorphic: Intrinsic<
+Vector, (args Vector), (undef Vector)>;
+  }
+}

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index da6928fc137a..911c2c129db9 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -29,6 +29,11 @@ def args;
 // 
-
 // Family of nodes for use in the codegen dag for an intrinsic, corresponding
 // to function calls that return LLVM IR nodes.
+class IRBuilderParam { int index = index_; }
+class IRBuilderAddrParam : IRBuilderParam;
+class IRBuilderIntParam : IRBuilderParam {
+  string type = type_;
+}
 class IRBuilderBase {
   // The prefix of the function call, including an open parenthesis.
   string prefix;
@@ -36,8 +41,7 @@ class IRBuilderBase {
   // Any parameters that have types that have to be treated specially by the
   // Tablegen back end. Generally these will be types other than llvm::Value *,
   // although not all other types need special treatment (e.g. llvm::Type *).
-  list address_params = []; // indices of parameters with type Address
-  list int_constant_params = 

[clang] 9e37892 - [ARM,MVE] Add intrinsics for vector get/set lane.

2019-11-15 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2019-11-15T09:53:58Z
New Revision: 9e37892773c0954a15f84b011223da1e707ab3bf

URL: 
https://github.com/llvm/llvm-project/commit/9e37892773c0954a15f84b011223da1e707ab3bf
DIFF: 
https://github.com/llvm/llvm-project/commit/9e37892773c0954a15f84b011223da1e707ab3bf.diff

LOG: [ARM,MVE] Add intrinsics for vector get/set lane.

This adds the `vgetq_lane` and `vsetq_lane` families, to copy between
a scalar and a specified lane of a vector.

One of the new `vgetq_lane` intrinsics returns a `float16_t`, which
causes a compile error if `%clang_cc1` doesn't get the option
`-fallow-half-arguments-and-returns`. The driver passes that option to
cc1 already, but I've had to edit all the explicit cc1 command lines
in the existing MVE intrinsics tests.

A couple of fixes are included for the code I wrote up front in
MveEmitter to support lane-index immediates (and which nothing has
tested until now): the type was wrong (`uint32_t` instead of `int`)
and the range was off by one.

I've also added a method of bypassing the default promotion to `i32`
that is done by the MveEmitter code generation: it's sensible to
promote short scalars like `i16` to `i32` if they're going to be
passed to custom IR intrinsics representing a machine instruction
operating on GPRs, but not if they're going to be passed to standard
IR operations like `insertelement` which expect the exact type.

Reviewers: ostannard, MarkMurrayARM, dmgreen

Reviewed By: dmgreen

Subscribers: kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D70188

Added: 
clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c

Modified: 
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td
clang/test/CodeGen/arm-mve-intrinsics/load-store.c
clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c
clang/test/CodeGen/arm-mve-intrinsics/vadc.c
clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
clang/test/CodeGen/arm-mve-intrinsics/vld24.c
clang/test/CodeGen/arm-mve-intrinsics/vldr.c
clang/test/CodeGen/arm-mve-intrinsics/vminvq.c
clang/test/Sema/arm-mve-immediates.c
clang/utils/TableGen/MveEmitter.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index c8501813d264..b72a8303ba3e 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -413,4 +413,9 @@ let params = T.All in {
 def vuninitializedq_polymorphic: Intrinsic<
 Vector, (args Vector), (undef Vector)>;
   }
+
+  def vgetq_lane: Intrinsic;
+  def vsetq_lane: Intrinsic:$e, Vector:$v, 
imm_lane:$lane),
+(ielt_var $v, $e, $lane)>;
 }

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index 911c2c129db9..4a1d6ed92664 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -77,6 +77,8 @@ def xval: IRBuilder<"CreateExtractValue"> {
 def ielt_const: IRBuilder<"CreateInsertElement"> {
   let special_params = [IRBuilderIntParam<2, "uint64_t">];
 }
+def ielt_var: IRBuilder<"CreateInsertElement">;
+def xelt_var: IRBuilder<"CreateExtractElement">;
 def trunc: IRBuilder<"CreateTrunc">;
 def bitcast: IRBuilder<"CreateBitCast">;
 def extend: CGHelperFn<"SignOrZeroExtend"> {
@@ -172,6 +174,10 @@ def CTO_CopyKind: ComplexTypeOp;
 // of _s32 / _f16 / _u8 suffix.
 def Void : Type;
 
+// A wrapper you can put on an intrinsic's argument type to prevent it from
+// being automatically promoted to i32 from a smaller integer type.
+class unpromoted : Type { Type underlying_type = t; }
+
 // Primitive types: base class, and an instance for the set of scalar integer
 // and floating types that MVE uses.
 class PrimitiveType: Type {
@@ -285,7 +291,7 @@ def imm_0toNm1 : Immediate>;
 
 // imm_lane has to be the index of a vector lane in the main vector type, i.e
 // it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane)
-def imm_lane : Immediate;
+def imm_lane : Immediate;
 
 // imm_1to32 can be in the range 1 to 32, unconditionally. (e.g. scalar shift
 // intrinsics)

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c 
b/clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c
new file mode 100644
index ..6eaf0f8a71f5
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/get-set-lane.c
@@ -0,0 +1,291 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | 
FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-

[clang] 254b4f2 - [ARM,MVE] Add intrinsics for scalar shifts.

2019-11-19 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2019-11-19T14:47:29Z
New Revision: 254b4f250007ef9f2d2377eb912963beafa39754

URL: 
https://github.com/llvm/llvm-project/commit/254b4f250007ef9f2d2377eb912963beafa39754
DIFF: 
https://github.com/llvm/llvm-project/commit/254b4f250007ef9f2d2377eb912963beafa39754.diff

LOG: [ARM,MVE] Add intrinsics for scalar shifts.

This fills in the small family of MVE intrinsics that have nothing to
do with vectors: they implement bit-shift operations on 32- or 64-bit
values held in one or two general-purpose registers. Most of these
shift operations saturate if shifting left, and round to nearest if
shifting right, although LSLL and ASRL behave like ordinary shifts.

When these instructions take a variable shift count in a register,
they pay attention to its sign, so that (for example) LSLL or UQRSHLL
will shift left if given a positive number but right if given a
negative one. That makes even LSLL and ASRL different enough from
standard LLVM IR shift semantics that I couldn't see any better
alternative than to simply model the whole family as a set of
MVE-specific IR intrinsics.

(The //immediate// forms of LSLL and ASRL, on the other hand, do
behave exactly like a standard IR shift of a 64-bit value. In fact,
those forms don't have ACLE intrinsics defined at all, because you can
just write an ordinary C shift operation if you want one of those.)

The 64-bit shifts have to be instruction-selected in C++, because they
deliver two output values. But the 32-bit ones are simple enough that
I could write a DAG isel pattern directly into each Instruction
record.

Reviewers: ostannard, MarkMurrayARM, dmgreen

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D70319

Added: 


Modified: 
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td
clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/lib/Target/ARM/ARMInstrMVE.td
llvm/test/CodeGen/Thumb2/mve-intrinsics/scalar-shifts.ll

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index e227d95f9735..d8d199f464d9 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -388,13 +388,56 @@ defm vstrhq: scatter_offset_both;
 defm vstrwq: scatter_offset_both;
 defm vstrdq: scatter_offset_both;
 
-let params = [Void], pnt = PNT_None in
-def urshrl: Intrinsic $lo, $hi, $shift):$pair,
-   (or (shl (u64 (xval $pair, 1)), (u64 32)),
-   (u64 (xval $pair, 0>;
+// Base class for the scalar shift intrinsics.
+class ScalarShift:
+  Intrinsic 
{
+  let params = [Void];
+  let pnt = PNT_None;
+}
+
+// Subclass that includes the machinery to take a 64-bit input apart
+// into halves, retrieve the two halves of a shifted output as a pair,
+// and glue the pieces of the pair back into an i64 for output.
+class LongScalarShift:
+   ScalarShift;
+
+// The family of saturating/rounding scalar shifts that take an
+// immediate shift count. They come in matched 32- and 64-bit pairs.
+multiclass ScalarSaturatingShiftImm {
+  def "": ScalarShift $value, $sh)>;
+  def l:  LongScalarShift $lo, $hi, $sh)>;
+}
+defm uqshl: ScalarSaturatingShiftImm;
+defm urshr: ScalarSaturatingShiftImm;
+defm sqshl: ScalarSaturatingShiftImm;
+defm srshr: ScalarSaturatingShiftImm;
+
+// The family of saturating/rounding scalar shifts that take a
+// register shift count. They also have 32- and 64-bit forms, but the
+// 64-bit form also has a version that saturates to 48 bits, so the IR
+// intrinsic takes an extra saturation-type operand.
+multiclass ScalarSaturatingShiftReg {
+  def "":  ScalarShift $value, $sh)>;
+  def l:   LongScalarShift $lo, $hi, $sh, 64)>;
+  def l_sat48: LongScalarShift $lo, $hi, $sh, 48)>;
+}
+defm uqrshl: ScalarSaturatingShiftReg;
+defm sqrshr: ScalarSaturatingShiftReg;
+
+// The intrinsics for LSLL and ASRL come in 64-bit versions only, with
+// no saturation count.
+def lsll: LongScalarShift $lo, $hi, $sh)>;
+def asrl: LongScalarShift $lo, $hi, $sh)>;
 
 let params = T.Int32 in {
 def vadcq: Intrinsic:$carry),

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index a4ba4ed87de3..27cdada02ec4 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -312,7 +312,7 @@ def imm_lane : Immediate;
 
 // imm_1to32 can be in the range 1 to 32, unconditionally. (e.g. scalar shift
 // intrinsics)
-def imm_1to32 : Immediate>;
+def imm_1to32 : Immediate>;
 
 // imm_1248 can be 1, 2, 4 or 8. (e.g. vidupq)
 def imm_1248 : Immediate> {

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c 

[clang] d97b3e3 - [ARM][MVE] Add intrinsics for immediate shifts.

2019-12-09 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2019-12-09T15:44:09Z
New Revision: d97b3e3e65cd77a81b39732af84a1a4229e95091

URL: 
https://github.com/llvm/llvm-project/commit/d97b3e3e65cd77a81b39732af84a1a4229e95091
DIFF: 
https://github.com/llvm/llvm-project/commit/d97b3e3e65cd77a81b39732af84a1a4229e95091.diff

LOG: [ARM][MVE] Add intrinsics for immediate shifts.

Summary:
This adds the family of `vshlq_n` and `vshrq_n` ACLE intrinsics, which
shift every lane of a vector left or right by a compile-time
immediate. They mostly work by expanding to the IR `shl`, `lshr` and
`ashr` operations, with their second operand being a vector splat of
the immediate.

There's a fiddly special case, though. ACLE specifies that the
immediate in `vshrq_n` can take values up to //and including// the bit
size of the vector lane. But LLVM IR thinks that shifting right by the
full size of the lane is UB, and feels free to replace the `lshr` with
an `undef` half way through the optimization pipeline. Hence, to keep
this legal in source code, I have to detect it at codegen time.
Logical (unsigned) right shifts by the element size are handled by
simply emitting the zero vector; arithmetic ones are converted into a
shift of one bit less, which will always give the same output.

In order to do that check, I also had to enhance the tablegen
MveEmitter so that it can cope with converting a builtin function's
operand into a bare integer to pass to a code-generating subfunction.
Previously the only bare integers it knew how to handle were flags
generated from within `arm_mve.td`.

Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard

Reviewed By: MarkMurrayARM

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D71065

Added: 
clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll

Modified: 
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td
clang/lib/CodeGen/CGBuiltin.cpp
clang/utils/TableGen/MveEmitter.cpp
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMInstrMVE.td

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 19852702c1bc..cc4b6d9e8234 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -522,6 +522,33 @@ defm vstrhq: scatter_offset_both;
 defm vstrwq: scatter_offset_both;
 defm vstrdq: scatter_offset_both;
 
+multiclass PredicatedImmediateVectorShift<
+Immediate immtype, string predIntrName, list unsignedFlag = []> {
+  foreach predIntr = [IRInt] in {
+def _m_n: Intrinsic;
+def _x_n: Intrinsic;
+  }
+}
+
+let params = T.Int in {
+  def vshlq_n: Intrinsic;
+  defm vshlq: PredicatedImmediateVectorShift;
+
+  let pnt = PNT_NType in {
+def vshrq_n: Intrinsic;
+defm vshrq: PredicatedImmediateVectorShift;
+  }
+}
+
 // Base class for the scalar shift intrinsics.
 class ScalarShift:
   Intrinsic 
{

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index d837a1d33d00..5aa10f250eda 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -66,6 +66,10 @@ def xor: IRBuilder<"CreateXor">;
 def sub: IRBuilder<"CreateSub">;
 def shl: IRBuilder<"CreateShl">;
 def lshr: IRBuilder<"CreateLShr">;
+def immshr: CGHelperFn<"MVEImmediateShr"> {
+  let special_params = [IRBuilderIntParam<1, "unsigned">,
+IRBuilderIntParam<2, "bool">];
+}
 def fadd: IRBuilder<"CreateFAdd">;
 def fmul: IRBuilder<"CreateFMul">;
 def fsub: IRBuilder<"CreateFSub">;
@@ -308,8 +312,8 @@ def imm_simd_vmvn : Immediate {
 //
 // imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1
 // inclusive.
-def imm_1toN : Immediate>;
-def imm_0toNm1 : Immediate>;
+def imm_1toN : Immediate>;
+def imm_0toNm1 : Immediate>;
 
 // imm_lane has to be the index of a vector lane in the main vector type, i.e
 // it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane)

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b5b0c3e61d47..94d10a1aedf2 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6801,6 +6801,14 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned 
BuiltinID,
   }
 }
 
+template
+static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
+  llvm::APSInt IntVal;
+  bool IsConst = E->isIntegerConstantExpr(IntVal, Context);
+  assert(IsConst && "Sema should have checked this was a constant");
+  return IntVal.getExtValue();
+}
+
 static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
  llvm::Type *T, bool Unsigned) {
   // Helper function called by Tablegen-constructed ARM MVE bui

[clang] bd0f271 - [ARM][MVE] Add intrinsics for immediate shifts. (reland)

2019-12-11 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2019-12-11T10:10:09Z
New Revision: bd0f271c9e55ab69b45258e4922869099ed18307

URL: 
https://github.com/llvm/llvm-project/commit/bd0f271c9e55ab69b45258e4922869099ed18307
DIFF: 
https://github.com/llvm/llvm-project/commit/bd0f271c9e55ab69b45258e4922869099ed18307.diff

LOG: [ARM][MVE] Add intrinsics for immediate shifts. (reland)

This adds the family of `vshlq_n` and `vshrq_n` ACLE intrinsics, which
shift every lane of a vector left or right by a compile-time
immediate. They mostly work by expanding to the IR `shl`, `lshr` and
`ashr` operations, with their second operand being a vector splat of
the immediate.

There's a fiddly special case, though. ACLE specifies that the
immediate in `vshrq_n` can take values up to //and including// the bit
size of the vector lane. But LLVM IR thinks that shifting right by the
full size of the lane is UB, and feels free to replace the `lshr` with
an `undef` half way through the optimization pipeline. Hence, to keep
this legal in source code, I have to detect it at codegen time.
Logical (unsigned) right shifts by the element size are handled by
simply emitting the zero vector; arithmetic ones are converted into a
shift of one bit less, which will always give the same output.

In order to do that check, I also had to enhance the tablegen
MveEmitter so that it can cope with converting a builtin function's
operand into a bare integer to pass to a code-generating subfunction.
Previously the only bare integers it knew how to handle were flags
generated from within `arm_mve.td`.

Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard

Reviewed By: dmgreen, MarkMurrayARM

Subscribers: echristo, hokein, rdhindsa, kristof.beyls, hiraditya, cfe-commits, 
llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D71065

Added: 
clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll

Modified: 
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td
clang/lib/CodeGen/CGBuiltin.cpp
clang/utils/TableGen/MveEmitter.cpp
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMInstrMVE.td

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 618a087d6275..9b6053e57861 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -609,6 +609,33 @@ defm vstrhq: scatter_offset_both;
 defm vstrwq: scatter_offset_both;
 defm vstrdq: scatter_offset_both;
 
+multiclass PredicatedImmediateVectorShift<
+Immediate immtype, string predIntrName, list unsignedFlag = []> {
+  foreach predIntr = [IRInt] in {
+def _m_n: Intrinsic;
+def _x_n: Intrinsic;
+  }
+}
+
+let params = T.Int in {
+  def vshlq_n: Intrinsic;
+  defm vshlq: PredicatedImmediateVectorShift;
+
+  let pnt = PNT_NType in {
+def vshrq_n: Intrinsic;
+defm vshrq: PredicatedImmediateVectorShift;
+  }
+}
+
 // Base class for the scalar shift intrinsics.
 class ScalarShift:
   Intrinsic 
{

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index 1d72cc45796c..6bc9b35f0fc4 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -66,6 +66,10 @@ def xor: IRBuilder<"CreateXor">;
 def sub: IRBuilder<"CreateSub">;
 def shl: IRBuilder<"CreateShl">;
 def lshr: IRBuilder<"CreateLShr">;
+def immshr: CGHelperFn<"MVEImmediateShr"> {
+  let special_params = [IRBuilderIntParam<1, "unsigned">,
+IRBuilderIntParam<2, "bool">];
+}
 def fadd: IRBuilder<"CreateFAdd">;
 def fmul: IRBuilder<"CreateFMul">;
 def fsub: IRBuilder<"CreateFSub">;
@@ -318,8 +322,8 @@ def imm_simd_vmvn : Immediate {
 //
 // imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1
 // inclusive.
-def imm_1toN : Immediate>;
-def imm_0toNm1 : Immediate>;
+def imm_1toN : Immediate>;
+def imm_0toNm1 : Immediate>;
 
 // imm_lane has to be the index of a vector lane in the main vector type, i.e
 // it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane)

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 68c956a98637..8a53739626e1 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6916,6 +6916,15 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned 
BuiltinID,
   }
 }
 
+template
+static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
+  llvm::APSInt IntVal;
+  bool IsConst = E->isIntegerConstantExpr(IntVal, Context);
+  assert(IsConst && "Sema should have checked this was a constant");
+  (void)IsConst;
+  return IntVal.getExtValue();
+}
+
 static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
  llvm::Type *T, bool Unsigned) {
   // He

[clang] d290424 - [ARM][MVE] Factor out an IntrinsicMX multiclass.

2019-12-11 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2019-12-11T12:07:26Z
New Revision: d290424731ede31fd5fd75b929df8fe0adb547c7

URL: 
https://github.com/llvm/llvm-project/commit/d290424731ede31fd5fd75b929df8fe0adb547c7
DIFF: 
https://github.com/llvm/llvm-project/commit/d290424731ede31fd5fd75b929df8fe0adb547c7.diff

LOG: [ARM][MVE] Factor out an IntrinsicMX multiclass.

Summary:
The ACLE intrinsics for MVE contain a lot of pairs of functions with
`_m` and `_x` in the name, wrapping a predicated MVE instruction which
only partially overwrites its output register. They have the common
pattern that the `_m` variant takes an initial argument called
'inactive', of the same type as the return value, supplying the input
value of the output register, so that lanes disabled by the
predication will be taken from that parameter; the `_x` variant omits
that initial argument, and simply sets it to undef.

That common pattern is simple enough to wrap into a multiclass, which
should save a lot of effort in setting up all the rest of the `_x`
variants. In this commit I introduce `multiclass IntrinsicMX` in
`arm_mve_defs.td`, and convert existing generation of m/x pairs to use
it.

This allows me to remove the `PredicatedImmediateVectorShift`
multiclass (from D71065) completely, because the new multiclass makes
it so much simpler that it's not worth bothering to define it at all.

Reviewers: MarkMurrayARM, miyuki

Reviewed By: MarkMurrayARM, miyuki

Subscribers: kristof.beyls, dmgreen, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D71335

Added: 


Modified: 
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 9b6053e57861..7ed3c04c58db 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -609,30 +609,21 @@ defm vstrhq: scatter_offset_both;
 defm vstrwq: scatter_offset_both;
 defm vstrdq: scatter_offset_both;
 
-multiclass PredicatedImmediateVectorShift<
-Immediate immtype, string predIntrName, list unsignedFlag = []> {
-  foreach predIntr = [IRInt] in {
-def _m_n: Intrinsic;
-def _x_n: Intrinsic;
-  }
-}
-
 let params = T.Int in {
   def vshlq_n: Intrinsic;
-  defm vshlq: PredicatedImmediateVectorShift;
+  defm vshlq: IntrinsicMX
+   $v, $sh, $pred, $inactive), "_n">;
 
   let pnt = PNT_NType in {
 def vshrq_n: Intrinsic;
-defm vshrq: PredicatedImmediateVectorShift;
+defm vshrq: IntrinsicMX
+ $v, $sh, (unsignedflag Scalar), $pred, $inactive), "_n">;
   }
 }
 
@@ -713,25 +704,17 @@ def vadciq_m: Intrinsic {
   def "" : Intrinsic not_halving, angle, $a, $b)>;
-  def _m : Intrinsic
not_halving, angle, $inactive, $a, $b, $pred)>;
-  def _x : Intrinsic
-   not_halving, angle, (undef Vector), $a, $b, $pred)>;
 }
 
 multiclass VectorComplexMulPred {
   def "" : Intrinsic angle, $a, $b)>;
-  def _m : Intrinsic angle, $inactive, $a, $b,
   $pred)>;
-  def _x : Intrinsic angle, (undef Vector), $a,
-  $b, $pred)>;
 }
 
 multiclass VectorComplexMLAPred {

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index 6bc9b35f0fc4..3e22e44607ca 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -432,6 +432,30 @@ class NameOverride {
   string basename = basename_;
 }
 
+// A wrapper to define both _m and _x versions of a predicated
+// intrinsic.
+multiclass IntrinsicMX {
+  // The _m variant takes an initial parameter called $inactive, which
+  // provides the input value of the output register, i.e. all the
+  // inactive lanes in the predicated operation take their values from
+  // this.
+  def "_m" # nameSuffix:
+ Intrinsic;
+
+  // The _x variant leaves off that parameter, and simply uses an
+  // undef value of the same type.
+  def "_x" # nameSuffix:
+ Intrinsic {
+// Allow overriding of the polymorphic name type, because
+// sometimes the _m and _x variants polymorph 
diff erently
+// (typically because the type of the inactive parameter can be
+// used as a disambiguator if it's present).
+let pnt = pnt_x;
+  }
+}
+
 // 
-
 // Convenience lists of parameter types. 'T' is just a container record, so you
 // can define a typical intrinsic with 'let Params = T.Usual', or similar,



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 25305a9 - [ARM][MVE] Add intrinsics for more immediate shifts.

2019-12-13 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2019-12-13T13:07:39Z
New Revision: 25305a9311d45bc602014b7ee7584e80675aaf59

URL: 
https://github.com/llvm/llvm-project/commit/25305a9311d45bc602014b7ee7584e80675aaf59
DIFF: 
https://github.com/llvm/llvm-project/commit/25305a9311d45bc602014b7ee7584e80675aaf59.diff

LOG: [ARM][MVE] Add intrinsics for more immediate shifts.

Summary:
This fills in the remaining shift operations that take a single vector
input and an immediate shift count: the `vqshl`, `vqshlu`, `vrshr` and
`vshll[bt]` families.

`vshll[bt]` (which shifts each input lane left into a double-width
output lane) is the most interesting one. There are separate MC
instruction ids for shifting by exactly the input lane width and
shifting by less than that, because the instruction encoding is so
completely different for the lane-width special case. So I had to
write two sets of patterns to match based on the immediate shift
count, which involved adding a ComplexPattern matcher to avoid the
general-case pattern accidentally matching the special case too. For
that family I've made sure to add an llc codegen test for both
versions of each instruction.

I'm experimenting with a new strategy for parametrising the isel
patterns for all these instructions: adding extra fields to the
relevant `Instruction` subclass itself, which are ignored by the
Tablegen backends that generate the MC data, but can be retrieved from
each instance of that instruction subclass when it's passed as a
template parameter to the multiclass that generates its isel patterns.
A nice effect of that is that I can fill in those informational fields
using `let` blocks, rather than having to type them out once per
instruction at `defm` time.

(As a result, quite a lot of existing instruction `def`s are
reindented by this patch, so it's clearer to read with whitespace
changes ignored.)

Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard

Reviewed By: MarkMurrayARM

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D71458

Added: 


Modified: 
clang/include/clang/Basic/arm_mve.td
clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/lib/Target/ARM/ARMInstrInfo.td
llvm/lib/Target/ARM/ARMInstrMVE.td
llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 8bb567c573a3..9d9c067ade1c 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -606,6 +606,47 @@ let params = T.Int in {
   }
 }
 
+let params = T.Int in {
+  def vqshlq_n: Intrinsic $v, $sh, (unsignedflag Scalar))>;
+  def vqshlq_m_n: Intrinsic
+$v, $sh, (unsignedflag Scalar), $pred, $inactive)>;
+
+  let pnt = PNT_NType in {
+def vrshrq_n: Intrinsic $v, $sh, (unsignedflag Scalar))>;
+defm vrshrq: IntrinsicMX
+  $v, $sh, (unsignedflag Scalar), $pred, $inactive), "_n">;
+  }
+}
+
+let params = T.Signed, pnt = PNT_NType in {
+  def vqshluq_n: Intrinsic $v, $sh)>;
+  def vqshluq_m_n: Intrinsic
+$v, $sh, $pred, $inactive)>;
+}
+
+multiclass vshll_imm {
+  let params = !listconcat(T.Int8, T.Int16), pnt = PNT_NType in {
+def _n: Intrinsic
+$v, $sh, (unsignedflag Scalar), top)>;
+defm "": IntrinsicMX
+$v, $sh, (unsignedflag Scalar), top, $pred, $inactive), "_n">;
+  }
+}
+defm vshllbq : vshll_imm<0>;
+defm vshlltq : vshll_imm<1>;
+
 // Base class for the scalar shift intrinsics.
 class ScalarShift:
   Intrinsic 
{

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
index 200273c03654..2128d0801c6a 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c
@@ -720,3 +720,918 @@ uint32x4_t test_vshrq_x_n_u32(uint32x4_t a, mve_pred16_t 
p)
 return vshrq_x_n_u32(a, 6, p);
 #endif /* POLYMORPHIC */
 }
+
+// CHECK-LABEL: @test_vqshlq_n_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> [[A:%.*]], i32 3, i32 0)
+// CHECK-NEXT:ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vqshlq_n_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+return vqshlq_n(a, 3);
+#else /* POLYMORPHIC */
+return vqshlq_n_s8(a, 3);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqshlq_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> [[A:%.*]], i32 4, i32 0)
+// CHECK-NEXT:ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vqshlq_n_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+return vqshlq_n(a, 4);
+#else /* POLYMORPHIC */
+return vqshlq_n_s16(a,

[clang] d608fee - [ARM, MVE] Fix user-namespace violation in arm_mve.h.

2020-03-12 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-03-12T11:13:50Z
New Revision: d608fee8399a9fa6f2819076131c6ac30cc16eef

URL: 
https://github.com/llvm/llvm-project/commit/d608fee8399a9fa6f2819076131c6ac30cc16eef
DIFF: 
https://github.com/llvm/llvm-project/commit/d608fee8399a9fa6f2819076131c6ac30cc16eef.diff

LOG: [ARM,MVE] Fix user-namespace violation in arm_mve.h.

Summary:
We were generating the declarations of polymorphic intrinsics using
`__attribute__((overloadable))`. But `overloadable` is a valid
identifier for an end user to define as a macro in a C program, and if
they do that before including ``, then we shouldn't cause a
compile error.

Fixed to spell the attribute name `__overloadable__` instead.

Reviewers: miyuki, MarkMurrayARM, ostannard

Reviewed By: miyuki

Subscribers: kristof.beyls, dmgreen, danielkiss, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D75997

Added: 


Modified: 
clang/utils/TableGen/MveEmitter.cpp

Removed: 




diff  --git a/clang/utils/TableGen/MveEmitter.cpp 
b/clang/utils/TableGen/MveEmitter.cpp
index 9a9fe00eed74..f75f5000f0f6 100644
--- a/clang/utils/TableGen/MveEmitter.cpp
+++ b/clang/utils/TableGen/MveEmitter.cpp
@@ -1874,7 +1874,7 @@ void MveEmitter::EmitHeader(raw_ostream &OS) {
 // match your call".
 
 OS << "static __inline__ __attribute__(("
-   << (Polymorphic ? "overloadable, " : "")
+   << (Polymorphic ? "__overloadable__, " : "")
<< "__clang_arm_builtin_alias(__builtin_arm_mve_" << Int.fullName()
<< ")))\n"
<< RetTypeName << FunctionName << "(" << ArgTypesString << ");\n";
@@ -2041,7 +2041,7 @@ void CdeEmitter::EmitHeader(raw_ostream &OS) {
   // Emit the actual declaration. See MveEmitter::EmitHeader for detailed
   // comments
   OS << "static __inline__ __attribute__(("
- << (Polymorphic ? "overloadable, " : "")
+ << (Polymorphic ? "__overloadable__, " : "")
  << "__clang_arm_builtin_alias(__builtin_arm_" << 
Int.builtinExtension()
  << "_" << Int.fullName() << ")))\n"
  << RetTypeName << FunctionName << "(" << ArgTypesString << ");\n";



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 3f8e714 - [ARM,MVE] Add intrinsics and isel for MVE fused multiply-add.

2020-03-12 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-03-12T11:13:50Z
New Revision: 3f8e714e2f9f2dc3367d2f3fc569abfaf28f314c

URL: 
https://github.com/llvm/llvm-project/commit/3f8e714e2f9f2dc3367d2f3fc569abfaf28f314c
DIFF: 
https://github.com/llvm/llvm-project/commit/3f8e714e2f9f2dc3367d2f3fc569abfaf28f314c.diff

LOG: [ARM,MVE] Add intrinsics and isel for MVE fused multiply-add.

Summary:
This adds the ACLE intrinsic family for the VFMA and VFMS
instructions, which perform fused multiply-add on vectors of floats.

I've represented the unpredicated versions in IR using the cross-
platform `@llvm.fma` IR intrinsic. We already had isel rules to
convert one of those into a vector VFMA in the simplest possible way;
but we didn't have rules to detect a negated argument and turn it into
VFMS, or rules to detect a splat argument and turn it into one of the
two vector/scalar forms of the instruction. Now we have all of those.

The predicated form uses a target-specific intrinsic as usual, but
I've stuck to just one, for a predicated FMA. The subtraction and
splat versions are code-generated by passing an fneg or a splat as one
of its operands, the same way as the unpredicated version.

In arm_mve_defs.h, I've had to introduce a tiny extra piece of
infrastructure: a record `id` for use in codegen dags which implements
the identity function. (Just because you can't declare a Tablegen
value of type dag which is //only// a `$varname`: you have to wrap it
in something. Now I can write `(id $varname)` to get the same effect.)

Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, danielkiss, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D75998

Added: 
clang/test/CodeGen/arm-mve-intrinsics/ternary.c
llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll

Modified: 
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMInstrMVE.td
llvm/test/CodeGen/Thumb2/mve-fmas.ll

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index d9a2035e8a0e..d2203d650301 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -162,6 +162,46 @@ let pnt = PNT_NType in {
 }
 }
 
+multiclass FMA {
+  // FMS instructions are defined in the ArmARM as if they negate the
+  // second multiply input.
+  defvar m2_cg = !if(add, (id $m2), (fneg $m2));
+
+  defvar unpred_cg = (IRIntBase<"fma", [Vector]> $m1, m2_cg, $addend);
+  defvar pred_cg   = (IRInt<"fma_predicated", [Vector, Predicate]>
+  $m1, m2_cg, $addend, $pred);
+
+  def q: Intrinsic;
+
+  def q_m: Intrinsic;
+
+  // Only FMA has the vector/scalar variants, not FMS
+  if add then let pnt = PNT_NType in {
+
+def q_n: Intrinsic:$m2_s),
+ (seq (splat $m2_s):$m2, unpred_cg)>;
+def sq_n: Intrinsic:$addend_s),
+(seq (splat $addend_s):$addend, unpred_cg)>;
+def q_m_n: Intrinsic:$m2_s,
+   Predicate:$pred),
+ (seq (splat $m2_s):$m2, pred_cg)>;
+def sq_m_n: Intrinsic:$addend_s,
+Predicate:$pred),
+  (seq (splat $addend_s):$addend, pred_cg)>;
+  }
+}
+
+let params = T.Float in {
+  defm vfma: FMA<1>;
+  defm vfms: FMA<0>;
+}
+
 let params = !listconcat(T.Int16, T.Int32) in {
   let pnt = PNT_None in {
 def vmvnq_n: Intrinsic {
 }
 def zip: CGHelperFn<"VectorZip">;
 
+// Trivial 'codegen' function that just returns its argument. Useful
+// for wrapping up a variable name like $foo into a thing you can pass
+// around as type 'dag'.
+def id: IRBuilderBase {
+  // All the other cases of IRBuilderBase use 'prefix' to specify a function
+  // call, including the open parenthesis. MveEmitter puts the closing paren on
+  // the end. So if we _just_ specify an open paren with no function name
+  // before it, then the generated C++ code will simply wrap the input value in
+  // parentheses, returning it unchanged.
+  let prefix = "(";
+}
+
 // Helper for making boolean flags in IR
 def i1: IRBuilderBase {
   let prefix = "llvm::ConstantInt::get(Builder.getInt1Ty(), ";

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/ternary.c 
b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c
new file mode 100644
index ..ab1cb14c3aed
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/ternary.c
@@ -0,0 +1,261 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -S -emit-llvm -o - %s | opt -S -sroa | FileCheck %s
+// RUN: %clang_cc1 -triple th

[clang] 28c5d97 - [ARM, MVE] Add intrinsics and isel for MVE integer VMLA.

2020-03-18 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-03-18T10:55:04Z
New Revision: 28c5d97beec7a2582869f992f54a178c805e2e51

URL: 
https://github.com/llvm/llvm-project/commit/28c5d97beec7a2582869f992f54a178c805e2e51
DIFF: 
https://github.com/llvm/llvm-project/commit/28c5d97beec7a2582869f992f54a178c805e2e51.diff

LOG: [ARM,MVE] Add intrinsics and isel for MVE integer VMLA.

Summary:
These instructions compute multiply+add in integers, with one of the
operands being a splat of a scalar. (VMLA and VMLAS differ in whether
the splat operand is a multiplier or the addend.)

I've represented these in IR using existing standard IR operations for
the unpredicated forms. The predicated forms are done with target-
specific intrinsics, as usual.

When operating on n-bit vector lanes, only the bottom n bits of the
i32 scalar operand are used. So we have to tell that to isel lowering,
to allow it to remove a pointless sign- or zero-extension instruction
on that input register. That's done in `PerformIntrinsicCombine`, but
first I had to enable `PerformIntrinsicCombine` for MVE targets
(previously all the intrinsics it handled were for NEON), and make it
a method of `ARMTargetLowering` so that it can get at
`SimplifyDemandedBits`.

Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, danielkiss, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D76122

Added: 


Modified: 
clang/include/clang/Basic/arm_mve.td
clang/test/CodeGen/arm-mve-intrinsics/ternary.c
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMISelLowering.h
llvm/lib/Target/ARM/ARMInstrMVE.td
llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index d2203d650301..5498a144c9e2 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -202,6 +202,24 @@ let params = T.Float in {
   defm vfms: FMA<0>;
 }
 
+let params = T.Int, pnt = PNT_NType in {
+  def vmlaq_n: Intrinsic<
+Vector, (args Vector:$addend, Vector:$m1, unpromoted:$m2_s),
+(add (mul $m1, (splat $m2_s)), $addend)>;
+  def vmlasq_n: Intrinsic<
+Vector, (args Vector:$m1, Vector:$m2, unpromoted:$addend_s),
+(add (mul $m1, $m2), (splat $addend_s))>;
+
+  def vmlaq_m_n: Intrinsic<
+Vector, (args Vector:$addend, Vector:$m1, Scalar:$m2_s, Predicate:$pred),
+(IRInt<"vmla_n_predicated", [Vector, Predicate]>
+$addend, $m1, $m2_s, $pred)>;
+  def vmlasq_m_n: Intrinsic<
+Vector, (args Vector:$m1, Vector:$m2, Scalar:$addend_s, Predicate:$pred),
+(IRInt<"vmlas_n_predicated", [Vector, Predicate]>
+$m1, $m2, $addend_s, $pred)>;
+}
+
 let params = !listconcat(T.Int16, T.Int32) in {
   let pnt = PNT_None in {
 def vmvnq_n: Intrinsic undef, i8 
[[C:%.*]], i32 0
+// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <16 x i8> 
[[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
+// CHECK-NEXT:[[TMP0:%.*]] = mul <16 x i8> [[B:%.*]], [[DOTSPLAT]]
+// CHECK-NEXT:[[TMP1:%.*]] = add <16 x i8> [[TMP0]], [[A:%.*]]
+// CHECK-NEXT:ret <16 x i8> [[TMP1]]
+//
+int8x16_t test_vmlaq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
+#ifdef POLYMORPHIC
+  return vmlaq(a, b, c);
+#else  /* POLYMORPHIC */
+  return vmlaq_n_s8(a, b, c);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmlaq_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 
[[C:%.*]], i32 0
+// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <8 x i16> 
[[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
+// CHECK-NEXT:[[TMP0:%.*]] = mul <8 x i16> [[B:%.*]], [[DOTSPLAT]]
+// CHECK-NEXT:[[TMP1:%.*]] = add <8 x i16> [[TMP0]], [[A:%.*]]
+// CHECK-NEXT:ret <8 x i16> [[TMP1]]
+//
+int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
+#ifdef POLYMORPHIC
+  return vmlaq(a, b, c);
+#else  /* POLYMORPHIC */
+  return vmlaq_n_s16(a, b, c);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmlaq_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 
[[C:%.*]], i32 0
+// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <4 x i32> 
[[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
+// CHECK-NEXT:[[TMP0:%.*]] = mul <4 x i32> [[B:%.*]], [[DOTSPLAT]]
+// CHECK-NEXT:[[TMP1:%.*]] = add <4 x i32> [[TMP0]], [[A:%.*]]
+// CHECK-NEXT:ret <4 x i32> [[TMP1]]
+//
+int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
+#ifdef POLYMORPHIC
+  return vmlaq(a, b, c);
+#else  /* POLYMORPHIC */
+  return vmlaq_n_s32(a, b, c);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vmlaq_n_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <16 x

[clang] 928776d - [ARM,MVE] Add intrinsics for the VQDMLAH family.

2020-03-18 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-03-18T10:55:04Z
New Revision: 928776de9233be1487c1b56f90c90ed25b25e355

URL: 
https://github.com/llvm/llvm-project/commit/928776de9233be1487c1b56f90c90ed25b25e355
DIFF: 
https://github.com/llvm/llvm-project/commit/928776de9233be1487c1b56f90c90ed25b25e355.diff

LOG: [ARM,MVE] Add intrinsics for the VQDMLAH family.

Summary:
These are complicated integer multiply+add instructions with extra
saturation, taking the high half of a double-width product, and
optional rounding. There's no sensible way to represent that in
standard IR, so I've converted the clang builtins directly to
target-specific intrinsics.

Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard

Reviewed By: miyuki

Subscribers: kristof.beyls, hiraditya, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D76123

Added: 


Modified: 
clang/include/clang/Basic/arm_mve.td
clang/test/CodeGen/arm-mve-intrinsics/ternary.c
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMInstrMVE.td
llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 5498a144c9e2..ae6ce4837d76 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -220,6 +220,29 @@ let params = T.Int, pnt = PNT_NType in {
 $m1, $m2, $addend_s, $pred)>;
 }
 
+multiclass VQDMLA {
+  def hq_n: Intrinsic<
+Vector, (args Vector:$addend, Vector:$m1, Scalar:$m2_s),
+(IRInt $addend, $m1, $m2_s)>;
+  def shq_n: Intrinsic<
+Vector, (args Vector:$m1, Vector:$m2, Scalar:$addend_s),
+(IRInt $m1, $m2, $addend_s)>;
+
+  def hq_m_n: Intrinsic<
+Vector, (args Vector:$addend, Vector:$m1, Scalar:$m2_s, Predicate:$pred),
+(IRInt
+ $addend, $m1, $m2_s, $pred)>;
+  def shq_m_n: Intrinsic<
+Vector, (args Vector:$m1, Vector:$m2, Scalar:$addend_s, Predicate:$pred),
+(IRInt
+ $m1, $m2, $addend_s, $pred)>;
+}
+
+let params = T.Signed, pnt = PNT_NType in {
+  defm vqdmla: VQDMLA;
+  defm vqrdmla: VQDMLA;
+}
+
 let params = !listconcat(T.Int16, T.Int32) in {
   let pnt = PNT_None in {
 def vmvnq_n: Intrinsic 
@llvm.arm.mve.vqdmlah.v16i8(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 
[[TMP0]])
+// CHECK-NEXT:ret <16 x i8> [[TMP1]]
+//
+int8x16_t test_vqdmlahq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
+#ifdef POLYMORPHIC
+  return vqdmlahq(a, b, c);
+#else  /* POLYMORPHIC */
+  return vqdmlahq_n_s8(a, b, c);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqdmlahq_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-NEXT:[[TMP1:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlah.v8i16(<8 
x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 [[TMP0]])
+// CHECK-NEXT:ret <8 x i16> [[TMP1]]
+//
+int16x8_t test_vqdmlahq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
+#ifdef POLYMORPHIC
+  return vqdmlahq(a, b, c);
+#else  /* POLYMORPHIC */
+  return vqdmlahq_n_s16(a, b, c);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqdmlahq_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlah.v4i32(<4 
x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 [[C:%.*]])
+// CHECK-NEXT:ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vqdmlahq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
+#ifdef POLYMORPHIC
+  return vqdmlahq(a, b, c);
+#else  /* POLYMORPHIC */
+  return vqdmlahq_n_s32(a, b, c);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqrdmlahq_n_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = zext i8 [[C:%.*]] to i32
+// CHECK-NEXT:[[TMP1:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqrdmlah.v16i8(<16 x i8> [[B:%.*]], <16 x i8> [[A:%.*]], i32 
[[TMP0]])
+// CHECK-NEXT:ret <16 x i8> [[TMP1]]
+//
+int8x16_t test_vqrdmlahq_n_s8(int8x16_t a, int8x16_t b, int8_t c) {
+#ifdef POLYMORPHIC
+  return vqrdmlahq(a, b, c);
+#else  /* POLYMORPHIC */
+  return vqrdmlahq_n_s8(a, b, c);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqrdmlahq_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = zext i16 [[C:%.*]] to i32
+// CHECK-NEXT:[[TMP1:%.*]] = call <8 x i16> 
@llvm.arm.mve.vqrdmlah.v8i16(<8 x i16> [[B:%.*]], <8 x i16> [[A:%.*]], i32 
[[TMP0]])
+// CHECK-NEXT:ret <8 x i16> [[TMP1]]
+//
+int16x8_t test_vqrdmlahq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
+#ifdef POLYMORPHIC
+  return vqrdmlahq(a, b, c);
+#else  /* POLYMORPHIC */
+  return vqrdmlahq_n_s16(a, b, c);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqrdmlahq_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> 
@llvm.arm.mve.vqrdmlah.v4i32(<4 x i32> [[B:%.*]], <4 x i32> [[A:%.*]], i32 
[[C:%.*]])
+// CHECK-NEXT:ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vqrdmlahq_n_s32(int32x4_t a, int32x4_t b, 

[clang] e13d153 - [ARM,MVE] Add intrinsics for the VQDMLAD family.

2020-03-18 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-03-18T17:11:22Z
New Revision: e13d153c1b59a11185bf6a1aa8853c9e14d556a5

URL: 
https://github.com/llvm/llvm-project/commit/e13d153c1b59a11185bf6a1aa8853c9e14d556a5
DIFF: 
https://github.com/llvm/llvm-project/commit/e13d153c1b59a11185bf6a1aa8853c9e14d556a5.diff

LOG: [ARM,MVE] Add intrinsics for the VQDMLAD family.

Summary:
This is another set of instructions too complicated to be sensibly
expressed in IR by anything short of a target-specific intrinsic.
Given input vectors a,b, the instruction generates intermediate values
2*(a[0]*b[0]+a[1]+b[1]), 2*(a[2]*b[2]+a[3]+b[3]), etc; takes the high
half of each double-width values, and overwrites half the lanes in the
output vector c, which you therefore have to provide the input value
of. Optionally you can swap the elements of b so that the are things
like a[0]*b[1]+a[1]*b[0]; optionally you can round to nearest when
taking the high half; and optionally you can take the difference
rather than sum of the two products. Finally, saturation is applied
when converting back to a single-width vector lane.

Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard

Reviewed By: miyuki

Subscribers: kristof.beyls, hiraditya, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D76359

Added: 
clang/test/CodeGen/arm-mve-intrinsics/vqdmlad.c
llvm/test/CodeGen/Thumb2/mve-intrinsics/vqdmlad.ll

Modified: 
clang/include/clang/Basic/arm_mve.td
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMInstrMVE.td

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index ae6ce4837d76..45e45899de5f 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -243,6 +243,26 @@ let params = T.Signed, pnt = PNT_NType in {
   defm vqrdmla: VQDMLA;
 }
 
+multiclass VQDMLAD {
+  def "": Intrinsic $a, $b, $c,
+(u32 exchange), (u32 round), (u32 subtract))>;
+  def _m: Intrinsic $a, $b, $c,
+(u32 exchange), (u32 round), (u32 subtract), $pred)>;
+}
+let params = T.Signed in {
+  defm vqdmladhq:   VQDMLAD<0, 0, 0>;
+  defm vqdmladhxq:  VQDMLAD<1, 0, 0>;
+  defm vqdmlsdhq:   VQDMLAD<0, 0, 1>;
+  defm vqdmlsdhxq:  VQDMLAD<1, 0, 1>;
+  defm vqrdmladhq:  VQDMLAD<0, 1, 0>;
+  defm vqrdmladhxq: VQDMLAD<1, 1, 0>;
+  defm vqrdmlsdhq:  VQDMLAD<0, 1, 1>;
+  defm vqrdmlsdhxq: VQDMLAD<1, 1, 1>;
+}
+
 let params = !listconcat(T.Int16, T.Int32) in {
   let pnt = PNT_None in {
 def vmvnq_n: Intrinsic
+
+// CHECK-LABEL: @test_vqdmladhq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlad.v16i8(<16 x i8> [[INACTIVE:%.*]], <16 x i8> [[A:%.*]], 
<16 x i8> [[B:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT:ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vqdmladhq_s8(int8x16_t inactive, int8x16_t a, int8x16_t b) {
+#ifdef POLYMORPHIC
+  return vqdmladhq(inactive, a, b);
+#else  /* POLYMORPHIC */
+  return vqdmladhq_s8(inactive, a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqdmladhq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 
x i16> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, i32 
0, i32 0)
+// CHECK-NEXT:ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vqdmladhq_s16(int16x8_t inactive, int16x8_t a, int16x8_t b) {
+#ifdef POLYMORPHIC
+  return vqdmladhq(inactive, a, b);
+#else  /* POLYMORPHIC */
+  return vqdmladhq_s16(inactive, a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqdmladhq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqdmlad.v4i32(<4 
x i32> [[INACTIVE:%.*]], <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, i32 
0, i32 0)
+// CHECK-NEXT:ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vqdmladhq_s32(int32x4_t inactive, int32x4_t a, int32x4_t b) {
+#ifdef POLYMORPHIC
+  return vqdmladhq(inactive, a, b);
+#else  /* POLYMORPHIC */
+  return vqdmladhq_s32(inactive, a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqdmladhxq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> 
@llvm.arm.mve.vqdmlad.v16i8(<16 x i8> [[INACTIVE:%.*]], <16 x i8> [[A:%.*]], 
<16 x i8> [[B:%.*]], i32 1, i32 0, i32 0)
+// CHECK-NEXT:ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vqdmladhxq_s8(int8x16_t inactive, int8x16_t a, int8x16_t b) {
+#ifdef POLYMORPHIC
+  return vqdmladhxq(inactive, a, b);
+#else  /* POLYMORPHIC */
+  return vqdmladhxq_s8(inactive, a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vqdmladhxq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqdmlad.v8i16(<8 
x i16> [[INACTIVE:%.*]], <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, i32 
0, i32 0)
+// CHECK-NEXT:ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vqdmladhxq_s16(int16x8_t inactive, int16x8

[clang] cf7e98e - [ARM,MVE] Add intrinsics for vdupq.

2020-02-03 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-02-03T11:20:06Z
New Revision: cf7e98e6f7805f4e2693a6dbbd12c10fe06fde70

URL: 
https://github.com/llvm/llvm-project/commit/cf7e98e6f7805f4e2693a6dbbd12c10fe06fde70
DIFF: 
https://github.com/llvm/llvm-project/commit/cf7e98e6f7805f4e2693a6dbbd12c10fe06fde70.diff

LOG: [ARM,MVE] Add intrinsics for vdupq.

Summary:
The unpredicated case of this is trivial: the clang codegen just makes
a vector splat of the input, and LLVM isel is already prepared to
handle that. For the predicated version, I've generated a `select`
between the same vector splat and the `inactive` input parameter, and
added new Tablegen isel rules to match that pattern into a predicated
`MVE_VDUP` instruction.

Reviewers: dmgreen, MarkMurrayARM, miyuki, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D73356

Added: 
clang/test/CodeGen/arm-mve-intrinsics/dup.c
llvm/test/CodeGen/Thumb2/mve-intrinsics/dup.ll

Modified: 
clang/include/clang/Basic/arm_mve.td
llvm/lib/Target/ARM/ARMInstrMVE.td

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index ee0ce25bf516..e9ad26a4e88e 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -138,6 +138,15 @@ let params = !listconcat(T.Int16, T.Int32) in {
 (select $pred, (or $v, (splat (Scalar $imm))), $v)>;
 }
 
+let params = T.Usual in {
+  let pnt = PNT_None in
+def vdupq_n: Intrinsic:$s), (splat $s)>;
+
+  defm vdupq: IntrinsicMX<
+  Vector, (args unpromoted:$s, Predicate:$pred),
+  (select $pred, (splat $s), $inactive), 1, "_n", PNT_NType, PNT_None>;
+}
+
 // The bitcasting below is not overcomplicating the IR because while
 // Vector and UVector may be 
diff erent vector types at the C level i.e.
 // vectors of same size signed/unsigned ints. Once they're lowered

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/dup.c 
b/clang/test/CodeGen/arm-mve-intrinsics/dup.c
new file mode 100644
index ..3bcec9d2549e
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/dup.c
@@ -0,0 +1,351 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | 
FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa 
-early-cse | FileCheck %s
+
+#include 
+
+// CHECK-LABEL: @test_vdupq_n_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32
+// CHECK-NEXT:[[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
+// CHECK-NEXT:[[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
+// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, 
half [[TMP1]], i32 0
+// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <8 x half> 
[[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
+// CHECK-NEXT:ret <8 x half> [[DOTSPLAT]]
+//
+float16x8_t test_vdupq_n_f16(float16_t a)
+{
+return vdupq_n_f16(a);
+}
+
+// CHECK-LABEL: @test_vdupq_n_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, 
float [[A:%.*]], i32 0
+// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <4 x float> 
[[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
+// CHECK-NEXT:ret <4 x float> [[DOTSPLAT]]
+//
+float32x4_t test_vdupq_n_f32(float32_t a)
+{
+return vdupq_n_f32(a);
+}
+
+// CHECK-LABEL: @test_vdupq_n_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 
[[A:%.*]], i32 0
+// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <16 x i8> 
[[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
+// CHECK-NEXT:ret <16 x i8> [[DOTSPLAT]]
+//
+int8x16_t test_vdupq_n_s8(int8_t a)
+{
+return vdupq_n_s8(a);
+}
+
+// CHECK-LABEL: @test_vdupq_n_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 
[[A:%.*]], i32 0
+// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <8 x i16> 
[[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
+// CHECK-NEXT:ret <8 x i16> [[DOTSPLAT]]
+//
+int16x8_t test_vdupq_n_s16(int16_t a)
+{
+return vdupq_n_s16(a);
+}
+
+// CHECK-LABEL: @test_vdupq_n_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 
[[A:%.*]], i32 0
+// CHECK-NEXT:[[DOTSPLAT:%.*]] = shufflevector <4 x i32> 
[[DOTSPLATINSERT]], <4 x i32> u

[clang] 90dc78b - [ARM, MVE] Add intrinsics for abs, neg and not operations.

2020-02-18 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-02-18T09:34:50Z
New Revision: 90dc78bc62784faaa55afb0320cf3c2187d80ac6

URL: 
https://github.com/llvm/llvm-project/commit/90dc78bc62784faaa55afb0320cf3c2187d80ac6
DIFF: 
https://github.com/llvm/llvm-project/commit/90dc78bc62784faaa55afb0320cf3c2187d80ac6.diff

LOG: [ARM,MVE] Add intrinsics for abs, neg and not operations.

Summary:
This commit adds the unpredicated intrinsics for the unary operations
vabsq (absolute value), vnegq (arithmetic negation), vmvnq (bitwise
complement), vqabsq and vqnegq (saturating versions of abs and neg for
signed integers, in the sense that they give INT_MAX if an input lane
is INT_MIN).

This is done entirely in clang: all of these operations have existing
isel patterns and existing tests for them on the LLVM side, so I've
just made clang emit the same IR that those patterns already match.

Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard

Reviewed By: MarkMurrayARM

Subscribers: kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D74331

Added: 
clang/test/CodeGen/arm-mve-intrinsics/absneg.c

Modified: 
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td
clang/lib/CodeGen/CGBuiltin.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 5cd88b07ebaa..dfc0ee87bb2f 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -234,6 +234,33 @@ let params = T.Unsigned in {
   defm vdwdup: vxdup_mc<(? u32:$limit, imm_1248:$step), (? $limit, $step)>;
 }
 
+let params = T.Int in {
+  def vmvnq: Intrinsic;
+}
+let params = T.Signed in {
+  def vnegq: Intrinsic;
+  def vabsq: Intrinsic;
+  def vqnegq: Intrinsic;
+  def vqabsq: Intrinsic;
+}
+let params = T.Float in {
+  def vnegq_f: Intrinsic,
+   NameOverride<"vnegq">;
+  def vabsq_f: Intrinsic $a)>, NameOverride<"vabsq">;
+}
+
 // The bitcasting below is not overcomplicating the IR because while
 // Vector and UVector may be 
diff erent vector types at the C level i.e.
 // vectors of same size signed/unsigned ints. Once they're lowered

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index d4e821589cfd..2d080f2653aa 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -98,6 +98,9 @@ def extend: CGHelperFn<"SignOrZeroExtend"> {
   let special_params = [IRBuilderIntParam<2, "bool">];
 }
 def zeroinit: IRFunction<"llvm::Constant::getNullValue">;
+def int_min: CGHelperFn<"ARMMVEConstantSplat<1,0>">;
+def int_max: CGHelperFn<"ARMMVEConstantSplat<0,1>">;
+def uint_max: CGHelperFn<"ARMMVEConstantSplat<1,1>">;
 def undef: IRFunction<"UndefValue::get">;
 def icmp_eq: IRBuilder<"CreateICmpEQ">;
 def icmp_ne: IRBuilder<"CreateICmpNE">;
@@ -117,6 +120,7 @@ def fcmp_lt: IRBuilder<"CreateFCmpOLT">;
 def fcmp_le: IRBuilder<"CreateFCmpOLE">;
 def splat: CGHelperFn<"ARMMVEVectorSplat">;
 def select: IRBuilder<"CreateSelect">;
+def fneg: IRBuilder<"CreateFNeg">;
 
 // A node that makes an Address out of a pointer-typed Value, by
 // providing an alignment as the second argument.

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 5e411bc7aa93..0081740f7280 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7056,6 +7056,19 @@ static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy 
&Builder,
   }
 }
 
+template
+static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
+  // MVE-specific helper function to make a vector splat of a constant such as
+  // UINT_MAX or INT_MIN, in which all bits below the highest one are equal.
+  llvm::Type *T = VT->getVectorElementType();
+  unsigned LaneBits = T->getPrimitiveSizeInBits();
+  uint32_t Value = HighBit << (LaneBits - 1);
+  if (OtherBits)
+Value |= (1UL << (LaneBits - 1)) - 1;
+  llvm::Value *Lane = llvm::ConstantInt::get(T, Value);
+  return ARMMVEVectorSplat(Builder, Lane);
+}
+
 Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
   const CallExpr *E,
   ReturnValueSlot ReturnValue,

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c 
b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c
new file mode 100644
index ..db4253f3590b
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c
@@ -0,0 +1,338 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp

[clang] b6236e9 - [ARM, MVE] Add the vrev16q, vrev32q, vrev64q family.

2020-02-18 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-02-18T09:34:50Z
New Revision: b6236e94799e43fad1f024e84ed56a85d9a3623f

URL: 
https://github.com/llvm/llvm-project/commit/b6236e94799e43fad1f024e84ed56a85d9a3623f
DIFF: 
https://github.com/llvm/llvm-project/commit/b6236e94799e43fad1f024e84ed56a85d9a3623f.diff

LOG: [ARM,MVE] Add the vrev16q, vrev32q, vrev64q family.

Summary:
These intrinsics just reorder the lanes of a vector, so the natural IR
representation is as a shufflevector operation. Existing LLVM codegen
already recognizes those particular shufflevectors and generates the
MVE VREV instruction.

This commit adds the unpredicated forms only.

Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D74334

Added: 
clang/test/CodeGen/arm-mve-intrinsics/vrev.c

Modified: 
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td
clang/lib/CodeGen/CGBuiltin.cpp

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index a2bf7afad41e..126c2e2214ae 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -1180,6 +1180,13 @@ defm vrmlsldavh : MVEBinaryVectorHoriz64R;
 defm vrmlsldavh : MVEBinaryVectorHoriz64R;
 }
 
+let params = T.All8 in
+def vrev16q : Intrinsic;
+let params = !listconcat(T.All8, T.All16) in
+def vrev32q : Intrinsic;
+let params = T.Usual in
+def vrev64q : Intrinsic;
+
 foreach desttype = T.All in {
   // We want a vreinterpretq between every pair of supported vector types
   // _except_ that there shouldn't be one from a type to itself.

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index c2e4a4232c23..9f245d0436c4 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -125,6 +125,9 @@ def sitofp: IRBuilder<"CreateSIToFP">;
 def uitofp: IRBuilder<"CreateUIToFP">;
 def fptosi: IRBuilder<"CreateFPToSI">;
 def fptoui: IRBuilder<"CreateFPToUI">;
+def vrev: CGHelperFn<"ARMMVEVectorElementReverse"> {
+  let special_params = [IRBuilderIntParam<1, "unsigned">];
+}
 
 // A node that makes an Address out of a pointer-typed Value, by
 // providing an alignment as the second argument.

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0081740f7280..788f14b37123 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7069,6 +7069,21 @@ static llvm::Value *ARMMVEConstantSplat(CGBuilderTy 
&Builder, llvm::Type *VT) {
   return ARMMVEVectorSplat(Builder, Lane);
 }
 
+static llvm::Value *ARMMVEVectorElementReverse(CGBuilderTy &Builder,
+   llvm::Value *V,
+   unsigned ReverseWidth) {
+  // MVE-specific helper function which reverses the elements of a
+  // vector within every (ReverseWidth)-bit collection of lanes.
+  SmallVector Indices;
+  unsigned LaneSize = V->getType()->getScalarSizeInBits();
+  unsigned Elements = 128 / LaneSize;
+  unsigned Mask = ReverseWidth / LaneSize - 1;
+  for (unsigned i = 0; i < Elements; i++)
+Indices.push_back(i ^ Mask);
+  return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()),
+ Indices);
+}
+
 Value *CodeGenFunction::EmitARMMVEBuiltinExpr(unsigned BuiltinID,
   const CallExpr *E,
   ReturnValueSlot ReturnValue,

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vrev.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vrev.c
new file mode 100644
index ..384d736d2a6d
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vrev.c
@@ -0,0 +1,215 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa 
-early-cse | FileCheck %s
+
+#include 
+
+// CHECK-LABEL: @test_vrev16q_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> 
undef, <16 x i32> 
+// CHECK-NEXT:ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vrev16q_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+return vrev16q(a);
+#else /* POLYMORPHIC */
+return vrev16q_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrev16q_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = s

[clang] c8b3196 - [ARM, MVE] Add intrinsics for FP rounding operations.

2020-02-18 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-02-18T09:34:50Z
New Revision: c8b3196e54308b0113d2a0888d13ccc92e3b7ccc

URL: 
https://github.com/llvm/llvm-project/commit/c8b3196e54308b0113d2a0888d13ccc92e3b7ccc
DIFF: 
https://github.com/llvm/llvm-project/commit/c8b3196e54308b0113d2a0888d13ccc92e3b7ccc.diff

LOG: [ARM,MVE] Add intrinsics for FP rounding operations.

Summary:
This adds the unpredicated forms of six different MVE intrinsics which
all round a vector of floating-point numbers to integer values,
leaving them still in FP format, differing only in rounding mode and
exception settings.

Five of them map to existing target-independent intrinsics in LLVM IR,
such as @llvm.trunc and @llvm.rint. The sixth, mapping to the `vrintn`
instruction, is done by inventing a target-specific intrinsic.

(`vrintn` behaves the same as `vrintx` in terms of the output value:
the side effects on the FPSCR flags are the only difference between
the two. But ACLE specifies separate user-callable intrinsics for the
two, so the side effects matter enough to make sure we generate the
right one of the two instructions in each case.)

Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard

Reviewed By: miyuki

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D74333

Added: 
clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll

Modified: 
clang/include/clang/Basic/arm_mve.td
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMInstrMVE.td

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 5b20f23c75c7..a2bf7afad41e 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -417,6 +417,21 @@ defm : float_int_conversions;
 defm : float_int_conversions;
 defm : float_int_conversions;
 
+let params = T.Float in {
+  def vrndq: Intrinsic $a)>;
+  def vrndmq: Intrinsic $a)>;
+  def vrndpq: Intrinsic $a)>;
+  def vrndaq: Intrinsic $a)>;
+  def vrndxq: Intrinsic $a)>;
+  def vrndnq: Intrinsic $a)>;
+}
+
 multiclass compare_with_pred {
   // Make the predicated and unpredicated versions of a single comparison.

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
new file mode 100644
index ..a324c36ed838
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
@@ -0,0 +1,173 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 
-disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa 
-early-cse | FileCheck %s
+
+#include 
+
+// CHECK-LABEL: @test_vrndaq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <8 x half> @llvm.round.v8f16(<8 x half> 
[[A:%.*]])
+// CHECK-NEXT:ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vrndaq_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+return vrndaq(a);
+#else /* POLYMORPHIC */
+return vrndaq_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndaq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x 
float> [[A:%.*]])
+// CHECK-NEXT:ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vrndaq_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+return vrndaq(a);
+#else /* POLYMORPHIC */
+return vrndaq_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <8 x half> @llvm.floor.v8f16(<8 x half> 
[[A:%.*]])
+// CHECK-NEXT:ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vrndmq_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+return vrndmq(a);
+#else /* POLYMORPHIC */
+return vrndmq_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x 
float> [[A:%.*]])
+// CHECK-NEXT:ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vrndmq_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+return vrndmq(a);
+#else /* POLYMORPHIC */
+return vrndmq_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndpq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <8 x half> @llvm.ceil.v8f16(<8 x half> 
[[A:%.*]])
+// CHECK-NEXT:ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vrndpq_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+return vrndpq(a);
+#else /* POLYMORPHIC */
+return vrndpq_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CH

[clang] df3ed6c - [ARM, MVE] Add intrinsics for int <-> float conversion.

2020-02-18 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-02-18T09:34:50Z
New Revision: df3ed6c0fe31094941e4cd814cdf924b63993c4e

URL: 
https://github.com/llvm/llvm-project/commit/df3ed6c0fe31094941e4cd814cdf924b63993c4e
DIFF: 
https://github.com/llvm/llvm-project/commit/df3ed6c0fe31094941e4cd814cdf924b63993c4e.diff

LOG: [ARM,MVE] Add intrinsics for int <-> float conversion.

Summary:
This adds the unpredicated versions of the family of vcvtq intrinsics
that convert between a vector of floats and a vector of the same size
of integer. These are represented in IR using the standard fptosi,
fptoui, sitofp and uitofp operations, which existing LLVM codegen
already handles.

Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard

Reviewed By: MarkMurrayARM

Subscribers: kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D74332

Added: 


Modified: 
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td
clang/test/CodeGen/arm-mve-intrinsics/vcvt.c

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index dfc0ee87bb2f..5b20f23c75c7 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -400,6 +400,23 @@ foreach half = [ "b", "t" ] in {
   } // params = [f32], pnt = PNT_None
 } // loop over half = "b", "t"
 
+multiclass float_int_conversions {
+  defvar FVector = VecOf;
+  defvar IVector = VecOf;
+
+  let params = [IScalar], pnt = PNT_2Type in
+def : Intrinsic,
+  NameOverride<"vcvtq_" # FScalar>;
+  let params = [FScalar], pnt = PNT_None in
+def : Intrinsic,
+  NameOverride<"vcvtq_" # IScalar>;
+}
+
+defm : float_int_conversions;
+defm : float_int_conversions;
+defm : float_int_conversions;
+defm : float_int_conversions;
+
 multiclass compare_with_pred {
   // Make the predicated and unpredicated versions of a single comparison.

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index 2d080f2653aa..c2e4a4232c23 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -121,6 +121,10 @@ def fcmp_le: IRBuilder<"CreateFCmpOLE">;
 def splat: CGHelperFn<"ARMMVEVectorSplat">;
 def select: IRBuilder<"CreateSelect">;
 def fneg: IRBuilder<"CreateFNeg">;
+def sitofp: IRBuilder<"CreateSIToFP">;
+def uitofp: IRBuilder<"CreateUIToFP">;
+def fptosi: IRBuilder<"CreateFPToSI">;
+def fptoui: IRBuilder<"CreateFPToUI">;
 
 // A node that makes an Address out of a pointer-typed Value, by
 // providing an alignment as the second argument.

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
index a1c99de62ebb..3220100d7b89 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
@@ -4,6 +4,102 @@
 
 #include 
 
+// CHECK-LABEL: @test_vcvtq_f16_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = sitofp <8 x i16> [[A:%.*]] to <8 x half>
+// CHECK-NEXT:ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vcvtq_f16_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+return vcvtq(a);
+#else /* POLYMORPHIC */
+return vcvtq_f16_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_f16_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = uitofp <8 x i16> [[A:%.*]] to <8 x half>
+// CHECK-NEXT:ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vcvtq_f16_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+return vcvtq(a);
+#else /* POLYMORPHIC */
+return vcvtq_f16_u16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_f32_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vcvtq_f32_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+return vcvtq(a);
+#else /* POLYMORPHIC */
+return vcvtq_f32_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_f32_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vcvtq_f32_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+return vcvtq(a);
+#else /* POLYMORPHIC */
+return vcvtq_f32_u32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_s16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = fptosi <8 x half> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vcvtq_s16_f16(float16x8_t a)
+{
+return vcvtq_s16_f16(a);
+}
+
+// CHECK-LABEL: @test_vcvtq_s32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = fptosi <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vcvtq_s32_f32(float32x4_t a)
+{
+return vcvtq_s32_f32(a);
+}
+
+// CHECK-LABEL: @test_vcvtq_

[clang] 68b49f7 - [ARM,MVE] Add intrinsics vclzq and vclsq.

2020-02-18 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-02-18T09:34:50Z
New Revision: 68b49f7ef49eec068b7ddcf86c868e2a193e64e1

URL: 
https://github.com/llvm/llvm-project/commit/68b49f7ef49eec068b7ddcf86c868e2a193e64e1
DIFF: 
https://github.com/llvm/llvm-project/commit/68b49f7ef49eec068b7ddcf86c868e2a193e64e1.diff

LOG: [ARM,MVE] Add intrinsics vclzq and vclsq.

Summary:
vclzq maps nicely to the existing target-independent @llvm.ctlz IR
intrinsic. But vclsq ('count leading sign bits') has no corresponding
target-independent intrinsic, so I've made up @llvm.arm.mve.vcls.

This commit adds the unpredicated forms only.

Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard

Reviewed By: miyuki

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D74335

Added: 
clang/test/CodeGen/arm-mve-intrinsics/vclz.c
llvm/test/CodeGen/Thumb2/mve-intrinsics/vcls.ll

Modified: 
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td
llvm/include/llvm/IR/IntrinsicsARM.td
llvm/lib/Target/ARM/ARMInstrMVE.td

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 126c2e2214ae..21801b4d448e 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -237,8 +237,11 @@ let params = T.Unsigned in {
 let params = T.Int in {
   def vmvnq: Intrinsic;
+  def vclzq: Intrinsic $a, (i1 0))>;
 }
 let params = T.Signed in {
+  def vclsq: Intrinsic $a)>;
   def vnegq: Intrinsic;
   def vabsq: Intrinsic {
   let special_params = [IRBuilderIntParam<1, "unsigned">];
 }
 
+// Helper for making boolean flags in IR
+def i1: IRBuilderBase {
+  let prefix = "llvm::ConstantInt::get(Builder.getInt1Ty(), ";
+  let special_params = [IRBuilderIntParam<0, "bool">];
+}
+
 // A node that makes an Address out of a pointer-typed Value, by
 // providing an alignment as the second argument.
 def address;

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vclz.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vclz.c
new file mode 100644
index ..7a2ebe0a627a
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vclz.c
@@ -0,0 +1,132 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve 
-mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S 
-emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve 
-mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone 
-DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include 
+
+// CHECK-LABEL: @test_vclzq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> 
[[A:%.*]], i1 false)
+// CHECK-NEXT:ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vclzq_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+return vclzq(a);
+#else /* POLYMORPHIC */
+return vclzq_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> 
[[A:%.*]], i1 false)
+// CHECK-NEXT:ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vclzq_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+return vclzq(a);
+#else /* POLYMORPHIC */
+return vclzq_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> 
[[A:%.*]], i1 false)
+// CHECK-NEXT:ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vclzq_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+return vclzq(a);
+#else /* POLYMORPHIC */
+return vclzq_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> 
[[A:%.*]], i1 false)
+// CHECK-NEXT:ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vclzq_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+return vclzq(a);
+#else /* POLYMORPHIC */
+return vclzq_u8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> 
[[A:%.*]], i1 false)
+// CHECK-NEXT:ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vclzq_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+return vclzq(a);
+#else /* POLYMORPHIC */
+return vclzq_u16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vclzq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> 
[[A:%.*]], i1 false)
+// CHECK-NEXT:ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vclzq_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+return vclzq(a);
+#else /* POLYMORPHIC */
+ret

[clang] 5e97940 - [ARM, MVE] Add the vmovlbq, vmovltq intrinsic family.

2020-02-18 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-02-18T09:34:50Z
New Revision: 5e97940cd27961a0b872ff551fc98135507288b3

URL: 
https://github.com/llvm/llvm-project/commit/5e97940cd27961a0b872ff551fc98135507288b3
DIFF: 
https://github.com/llvm/llvm-project/commit/5e97940cd27961a0b872ff551fc98135507288b3.diff

LOG: [ARM,MVE] Add the vmovlbq,vmovltq intrinsic family.

Summary:
These intrinsics take a vector of 2n elements, and return a vector of
n wider elements obtained by sign- or zero-extending every other
element of the input vector. They're represented in IR as a
shufflevector that extracts the odd or even elements of the input,
followed by a sext or zext.

Existing LLVM codegen already matches this pattern and generates the
VMOVLB instruction (which widens the even-index input lanes). But no
existing isel rule was generating VMOVLT, so I've added some. However,
the new rules currently only work in little-endian MVE, because the
pattern they expect from isel lowering includes a bitconvert which
doesn't have the right semantics in big-endian.

The output of one existing codegen test is improved by those new
rules.

This commit adds the unpredicated forms only.

Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D74336

Added: 
clang/test/CodeGen/arm-mve-intrinsics/vmovl.c
llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll

Modified: 
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td
clang/lib/CodeGen/CGBuiltin.cpp
llvm/lib/Target/ARM/ARMInstrMVE.td
llvm/test/CodeGen/Thumb2/mve-shuffleext.ll

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 21801b4d448e..55ddfc22aa3d 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -420,6 +420,13 @@ defm : float_int_conversions;
 defm : float_int_conversions;
 defm : float_int_conversions;
 
+let params = [s8, u8, s16, u16] in {
+  def vmovlbq: Intrinsic;
+  def vmovltq: Intrinsic;
+}
+
 let params = T.Float in {
   def vrndq: Intrinsic $a)>;

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index 9e5b7b32c511..f6816cdf45c9 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -128,6 +128,9 @@ def fptoui: IRBuilder<"CreateFPToUI">;
 def vrev: CGHelperFn<"ARMMVEVectorElementReverse"> {
   let special_params = [IRBuilderIntParam<1, "unsigned">];
 }
+def unzip: CGHelperFn<"VectorUnzip"> {
+  let special_params = [IRBuilderIntParam<1, "bool">];
+}
 
 // Helper for making boolean flags in IR
 def i1: IRBuilderBase {

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 788f14b37123..b30a79a0bf10 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7056,6 +7056,17 @@ static llvm::Value *ARMMVEVectorReinterpret(CGBuilderTy 
&Builder,
   }
 }
 
+static llvm::Value *VectorUnzip(CGBuilderTy &Builder, llvm::Value *V, bool 
Odd) {
+  // Make a shufflevector that extracts every other element of a vector (evens
+  // or odds, as desired).
+  SmallVector Indices;
+  unsigned InputElements = V->getType()->getVectorNumElements();
+  for (unsigned i = 0; i < InputElements; i += 2)
+Indices.push_back(i + Odd);
+  return Builder.CreateShuffleVector(V, llvm::UndefValue::get(V->getType()),
+ Indices);
+}
+
 template
 static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
   // MVE-specific helper function to make a vector splat of a constant such as

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c
new file mode 100644
index ..0b8ef596faed
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c
@@ -0,0 +1,126 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve 
-mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S 
-emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve 
-mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone 
-DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+
+#include 
+
+// CHECK-LABEL: @test_vmovlbq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = shufflevector <16 x i8> [[A:%.*]], <16 x i8> 
undef, <8 x i32> 
+// CHECK-NEXT:[[TMP1:%.*]] = sext <8 x i8> [[TMP0]] to <8 x i16>
+// CHECK-NEXT:ret <8 x i16> [[TMP1]]
+//
+int16x8_t test_vmovlbq_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+return vmovlbq(a);
+#else /* P

[clang] c32af44 - [ARM, MVE] Add the vmovnbq, vmovntq intrinsic family.

2020-02-18 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-02-18T09:34:50Z
New Revision: c32af4447f79f5e7f246917fe1c3f58b2f6fc2a6

URL: 
https://github.com/llvm/llvm-project/commit/c32af4447f79f5e7f246917fe1c3f58b2f6fc2a6
DIFF: 
https://github.com/llvm/llvm-project/commit/c32af4447f79f5e7f246917fe1c3f58b2f6fc2a6.diff

LOG: [ARM,MVE] Add the vmovnbq,vmovntq intrinsic family.

Summary:
These are in some sense the inverse of vmovl[bt]q: they take a vector
of n wide elements and truncate each to half its width. So they only
write half a vector's worth of output data, and therefore they also
take an 'inactive' parameter to provide the other half of the data in
the output vector. So vmovnb overwrites the even lanes of 'inactive'
with the narrowed values from the main input, and vmovnt overwrites
the odd lanes.

LLVM had existing codegen which generates these MVE instructions in
response to IR that takes two vectors of wide elements, or two vectors
of narrow ones. But in this case, we have one vector of each. So my
clang codegen strategy is to narrow the input vector of wide elements
by simply reinterpreting it as the output type, and then we have two
narrow vectors and can represent the operation as a vector shuffle
that interleaves lanes from both of them.

Even so, not all the cases I needed ended up being selected as a
single MVE instruction, so I've added a couple more patterns that spot
combinations of the 'MVEvmovn' and 'ARMvrev32' SDNodes which can be
generated as a VMOVN instruction with operands swapped.

This commit adds the unpredicated forms only.

Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard

Reviewed By: dmgreen

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D74337

Added: 
clang/test/CodeGen/arm-mve-intrinsics/vmovn.c
llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll

Modified: 
clang/include/clang/Basic/arm_mve.td
clang/include/clang/Basic/arm_mve_defs.td
clang/lib/CodeGen/CGBuiltin.cpp
clang/utils/TableGen/MveEmitter.cpp
llvm/lib/Target/ARM/ARMInstrMVE.td

Removed: 




diff  --git a/clang/include/clang/Basic/arm_mve.td 
b/clang/include/clang/Basic/arm_mve.td
index 55ddfc22aa3d..3a6b63199e39 100644
--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -427,6 +427,14 @@ let params = [s8, u8, s16, u16] in {
 (extend (unzip $a, 1), DblVector, (unsignedflag Scalar))>;
 }
 
+let params = [s16, u16, s32, u32] in {
+  def vmovnbq: Intrinsic;
+  def vmovntq: Intrinsic;
+}
+
 let params = T.Float in {
   def vrndq: Intrinsic $a)>;

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td 
b/clang/include/clang/Basic/arm_mve_defs.td
index f6816cdf45c9..7f8f717e8163 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -131,6 +131,7 @@ def vrev: CGHelperFn<"ARMMVEVectorElementReverse"> {
 def unzip: CGHelperFn<"VectorUnzip"> {
   let special_params = [IRBuilderIntParam<1, "bool">];
 }
+def zip: CGHelperFn<"VectorZip">;
 
 // Helper for making boolean flags in IR
 def i1: IRBuilderBase {
@@ -187,6 +188,10 @@ def seq;
 // and 0 for a signed (or floating) one.
 def unsignedflag;
 
+// 'bitsize' also takes a scalar type, and expands into an integer
+// constant giving its size in bits.
+def bitsize;
+
 // If you put CustomCodegen<"foo"> in an intrinsic's codegen field, it
 // indicates that the IR generation for that intrinsic is done by handwritten
 // C++ and not autogenerated at all. The effect in the MVE builtin codegen

diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b30a79a0bf10..401c4d8e0539 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7067,6 +7067,19 @@ static llvm::Value *VectorUnzip(CGBuilderTy &Builder, 
llvm::Value *V, bool Odd)
  Indices);
 }
 
+static llvm::Value *VectorZip(CGBuilderTy &Builder, llvm::Value *V0,
+  llvm::Value *V1) {
+  // Make a shufflevector that interleaves two vectors element by element.
+  assert(V0->getType() == V1->getType() && "Can't zip 
diff erent vector types");
+  SmallVector Indices;
+  unsigned InputElements = V0->getType()->getVectorNumElements();
+  for (unsigned i = 0; i < InputElements; i++) {
+Indices.push_back(i);
+Indices.push_back(i + InputElements);
+  }
+  return Builder.CreateShuffleVector(V0, V1, Indices);
+}
+
 template
 static llvm::Value *ARMMVEConstantSplat(CGBuilderTy &Builder, llvm::Type *VT) {
   // MVE-specific helper function to make a vector splat of a constant such as

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c 
b/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c
new file mode 100644
index ..5d157de0feb8
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c
@@ -0,0 +1,199 @@
+// NOTE: Assert

[clang] 98ea4b3 - [ARM,MVE] Make the MVE intrinsics work in C++!

2020-01-23 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-01-23T14:10:27Z
New Revision: 98ea4b30c2c4e122defce039e29f7023aa2663e7

URL: 
https://github.com/llvm/llvm-project/commit/98ea4b30c2c4e122defce039e29f7023aa2663e7
DIFF: 
https://github.com/llvm/llvm-project/commit/98ea4b30c2c4e122defce039e29f7023aa2663e7.diff

LOG: [ARM,MVE] Make the MVE intrinsics work in C++!

Summary:
Apparently nobody has tried this in months of development. It turns
out that `FunctionDecl::getBuiltinID` will never consider a function
to be a builtin if it is in C++ and not extern "C". So none of the
function declarations in  are recognized as builtins when
clang is compiling in C++ mode: it just emits calls to them as
ordinary functions, which then turn out not to exist at link time.

The trivial fix is to wrap most of arm_mve.h in an extern "C".

Added a test in clang/test/CodeGen/arm-mve-intrinsics which checks
basic functioning of the MVE header file in C++ mode. I've filled it
with copies of existing test functions from other files in that
directory, including a few moderately tricky cases of overloading (in
particular one that relies on the strict-polymorphism attribute added
in D72518).

(I considered making //every// test in that directory compile in both
C and C++ mode and check the code generation was identical. But I
think that would increase testing time by more than the value it adds,
and also update_cc_test_checks gets confused when the output function
name varies between RUN lines.)

Reviewers: LukeGeeson, MarkMurrayARM, miyuki, dmgreen

Reviewed By: MarkMurrayARM

Subscribers: kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D73268

Added: 
clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp

Modified: 
clang/utils/TableGen/MveEmitter.cpp

Removed: 




diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp 
b/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp
new file mode 100644
index ..47df53839d15
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/cplusplus.cpp
@@ -0,0 +1,160 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 
-disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature 
+mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O3 
-disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | 
FileCheck %s
+
+#include 
+
+// CHECK-LABEL: @_Z16test_vbicq_n_s1617__simd128_int16_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = and <8 x i16> [[A:%.*]], 
+// CHECK-NEXT:ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vbicq_n_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+return vbicq(a, 0xd500);
+#else /* POLYMORPHIC */
+return vbicq_n_s16(a, 0xd500);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @_Z16test_vbicq_n_u3218__simd128_uint32_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = and <4 x i32> [[A:%.*]], 
+// CHECK-NEXT:ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vbicq_n_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+return vbicq(a, 0x2000);
+#else /* POLYMORPHIC */
+return vbicq_n_u32(a, 0x2000);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @_Z16test_vorrq_n_s3217__simd128_int32_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = or <4 x i32> [[A:%.*]], 
+// CHECK-NEXT:ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vorrq_n_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+return vorrq(a, 0x1);
+#else /* POLYMORPHIC */
+return vorrq_n_s32(a, 0x1);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @_Z16test_vorrq_n_u1618__simd128_uint16_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = or <8 x i16> [[A:%.*]], 
+// CHECK-NEXT:ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vorrq_n_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+return vorrq(a, 0xf000);
+#else /* POLYMORPHIC */
+return vorrq_n_u16(a, 0xf000);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @_Z16test_vcmpeqq_f1619__simd128_float16_tS_(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = fcmp oeq <8 x half> [[A:%.*]], [[B:%.*]]
+// CHECK-NEXT:[[TMP1:%.*]] = tail call i32 @llvm.arm.mve.pred.v2i.v8i1(<8 
x i1> [[TMP0]]), !range !3
+// CHECK-NEXT:[[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+// CHECK-NEXT:ret i16 [[TMP2]]
+//
+mve_pred16_t test_vcmpeqq_f16(float16x8_t a, float16x8_t b)
+{
+#ifdef POLYMORPHIC
+return vcmpeqq(a, b);
+#else /* POLYMORPHIC */
+return vcmpeqq_f16(a, b);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @_Z18test_vcmpeqq_n_f1619__simd128_float16_tDh(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = bitcast float [[B_COERCE:%.*]] to i32
+// CHECK-NEXT:[[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
+

[clang] fe0d1b6 - [Clang] Warn about 'z' printf modifier in old MSVC.

2020-01-28 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2020-01-28T09:04:45Z
New Revision: fe0d1b6a8ac5048b8007e5e7cc2aeb4e3291bda0

URL: 
https://github.com/llvm/llvm-project/commit/fe0d1b6a8ac5048b8007e5e7cc2aeb4e3291bda0
DIFF: 
https://github.com/llvm/llvm-project/commit/fe0d1b6a8ac5048b8007e5e7cc2aeb4e3291bda0.diff

LOG: [Clang] Warn about 'z' printf modifier in old MSVC.

Summary:
The 'z' length modifier, signalling that an integer format specifier
takes a `size_t` sized integer, is only supported by the C library of
MSVC 2015 and later. Earlier versions don't recognize the 'z' at all,
and respond to `printf("%zu", x)` by just printing "zu".

So, if the MS compatibility version is set to a value earlier than
MSVC2015, it's useful to warn about 'z' modifiers in printf format
strings we check.

Reviewers: aaron.ballman, lebedev.ri, rnk, majnemer, zturner

Reviewed By: aaron.ballman

Subscribers: amccarth, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D73457

Added: 


Modified: 
clang/lib/AST/FormatString.cpp
clang/test/Sema/format-strings-ms.c

Removed: 




diff  --git a/clang/lib/AST/FormatString.cpp b/clang/lib/AST/FormatString.cpp
index fcc0b3b11e25..2ca8fee67bf0 100644
--- a/clang/lib/AST/FormatString.cpp
+++ b/clang/lib/AST/FormatString.cpp
@@ -748,6 +748,15 @@ bool FormatSpecifier::hasValidLengthModifier(const 
TargetInfo &Target,
 case LengthModifier::AsIntMax:
 case LengthModifier::AsSizeT:
 case LengthModifier::AsPtrDiff:
+  if (LM.getKind() == LengthModifier::AsSizeT &&
+  Target.getTriple().isOSMSVCRT() &&
+  !LO.isCompatibleWithMSVC(LangOptions::MSVC2015)) {
+// The standard libraries before MSVC2015 didn't support the 'z' length
+// modifier for size_t. So if the MS compatibility version is less than
+// that, reject.
+return false;
+  }
+
   switch (CS.getKind()) {
 case ConversionSpecifier::dArg:
 case ConversionSpecifier::DArg:

diff  --git a/clang/test/Sema/format-strings-ms.c 
b/clang/test/Sema/format-strings-ms.c
index 56a349051d42..c4d3e5664db0 100644
--- a/clang/test/Sema/format-strings-ms.c
+++ b/clang/test/Sema/format-strings-ms.c
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -fms-compatibility 
-triple=i386-pc-win32 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -fms-compatibility 
-triple=i386-pc-win32 -fms-compatibility-version=18 %s
+// RUN: %clang_cc1 -fsyntax-only -verify -fms-compatibility 
-triple=i386-pc-win32 -fms-compatibility-version=19 -DSIZE_T_OK %s
 // RUN: %clang_cc1 -fsyntax-only -verify -fms-compatibility 
-triple=i386-pc-win32 -Wformat-non-iso -DNON_ISO_WARNING %s
 
 int printf(const char *format, ...) __attribute__((format(printf, 1, 2)));
@@ -85,4 +87,11 @@ void z_test(void *p) {
   scanf("%Z", p); // expected-warning{{invalid conversion specifier 'Z'}}
 }
 
+void size_t_test(size_t s) {
+  printf("%zu", s);
+#ifndef SIZE_T_OK
+  // expected-warning@-2 {{length modifier 'z' results in undefined behavior 
or no effect with 'u' conversion specifier}}
+#endif
+}
+
 #endif



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 5fba4c4 - [AArch64] Don't #define __ARM_FP when there's no FPU.

2023-03-13 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2023-03-13T16:43:25Z
New Revision: 5fba4c4d08bdb38d0df2fd43afa4bec4f3809b66

URL: 
https://github.com/llvm/llvm-project/commit/5fba4c4d08bdb38d0df2fd43afa4bec4f3809b66
DIFF: 
https://github.com/llvm/llvm-project/commit/5fba4c4d08bdb38d0df2fd43afa4bec4f3809b66.diff

LOG: [AArch64] Don't #define __ARM_FP when there's no FPU.

On some R-profile CPUs, leaving out the FPU is an option. Clang will
accept `-march=armv8-r+nofp`, but it's currently not possible to find
out via the preprocessor whether it's in that mode (e.g. to change or
disable inline asm statements in your code).

The __ARM_FP macro, which has a bit set for each size of floating
point number supported by the hardware, is the natural thing to test.
But Clang was defining it unconditionally on AArch64. Now it checks
for FP support before defining it at all.

Reviewed By: tmatheson, DavidSpickett

Differential Revision: https://reviews.llvm.org/D145781

Added: 


Modified: 
clang/lib/Basic/Targets/AArch64.cpp
clang/lib/Basic/Targets/AArch64.h
clang/test/Preprocessor/aarch64-target-features.c

Removed: 




diff  --git a/clang/lib/Basic/Targets/AArch64.cpp 
b/clang/lib/Basic/Targets/AArch64.cpp
index 7f331004348f1..b274dd2672268 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -373,7 +373,8 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions 
&Opts,
   Builder.defineMacro("__ARM_ALIGN_MAX_STACK_PWR", "4");
 
   // 0xe implies support for half, single and double precision operations.
-  Builder.defineMacro("__ARM_FP", "0xE");
+  if (FPU & FPUMode)
+Builder.defineMacro("__ARM_FP", "0xE");
 
   // PCS specifies this for SysV variants, which is all we support. Other ABIs
   // may choose __ARM_FP16_FORMAT_ALTERNATIVE.
@@ -709,6 +710,8 @@ void 
AArch64TargetInfo::setFeatureEnabled(llvm::StringMap &Features,
 bool AArch64TargetInfo::handleTargetFeatures(std::vector 
&Features,
  DiagnosticsEngine &Diags) {
   for (const auto &Feature : Features) {
+if (Feature == "-fp-armv8")
+  HasNoFP = true;
 if (Feature == "-neon")
   HasNoNeon = true;
 if (Feature == "-sve")
@@ -937,6 +940,11 @@ bool 
AArch64TargetInfo::handleTargetFeatures(std::vector &Features,
   setDataLayout();
   setArchFeatures();
 
+  if (HasNoFP) {
+FPU &= ~FPUMode;
+FPU &= ~NeonMode;
+FPU &= ~SveMode;
+  }
   if (HasNoNeon) {
 FPU &= ~NeonMode;
 FPU &= ~SveMode;

diff  --git a/clang/lib/Basic/Targets/AArch64.h 
b/clang/lib/Basic/Targets/AArch64.h
index ee2c179d7c3df..f6e12176a77ec 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -26,7 +26,11 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public 
TargetInfo {
   static const TargetInfo::GCCRegAlias GCCRegAliases[];
   static const char *const GCCRegNames[];
 
-  enum FPUModeEnum { FPUMode, NeonMode = (1 << 0), SveMode = (1 << 1) };
+  enum FPUModeEnum {
+FPUMode = (1 << 0),
+NeonMode = (1 << 1),
+SveMode = (1 << 2),
+  };
 
   unsigned FPU = FPUMode;
   bool HasCRC = false;
@@ -73,6 +77,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public 
TargetInfo {
   bool HasWFxT = false;
   bool HasJSCVT = false;
   bool HasFCMA = false;
+  bool HasNoFP = false;
   bool HasNoNeon = false;
   bool HasNoSVE = false;
   bool HasFMV = true;

diff  --git a/clang/test/Preprocessor/aarch64-target-features.c 
b/clang/test/Preprocessor/aarch64-target-features.c
index 2a2f7efe34130..09f464466a56c 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -341,6 +341,10 @@
 // CHECK-MARCH-2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" 
"-fp-armv8" "-target-feature" "-neon" "-target-feature" "-crc" 
"-target-feature" "-crypto"
 // CHECK-MARCH-3: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" 
"-neon"
 
+// While we're checking +nofp, also make sure it stops defining __ARM_FP
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-r+nofp -x c -E -dM 
%s -o - | FileCheck -check-prefix=CHECK-NOFP %s
+// CHECK-NOFP-NOT: #define __ARM_FP{{ }}
+
 // Check +sm4:
 //
 // RUN: %clang -target aarch64 -march=armv8.2a+sm4 -### -c %s 2>&1 | FileCheck 
-check-prefix=CHECK-SM4 %s



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 20d6dee - -fsanitize=function: fix alignment fault on Arm targets.

2023-05-25 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2023-05-25T09:22:45+01:00
New Revision: 20d6dee40d507d467d3312d5e7dfdf088f106d31

URL: 
https://github.com/llvm/llvm-project/commit/20d6dee40d507d467d3312d5e7dfdf088f106d31
DIFF: 
https://github.com/llvm/llvm-project/commit/20d6dee40d507d467d3312d5e7dfdf088f106d31.diff

LOG: -fsanitize=function: fix alignment fault on Arm targets.

Function pointers are checked by loading a prefix structure from just
before the function's entry point. However, on Arm, the function
pointer is not always exactly equal to the address of the entry point,
because Thumb function pointers have the low bit set to tell the BX
instruction to enter them in Thumb state. So the generated code loads
from an odd address and suffers an alignment fault.

Fixed by clearing the low bit of the function pointer before
subtracting 8.

Differential Revision: https://reviews.llvm.org/D151308

Added: 


Modified: 
clang/lib/CodeGen/CGExpr.cpp
clang/test/CodeGen/ubsan-function.cpp

Removed: 




diff  --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 2c219d6e8411..c074732df2a7 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -5364,8 +5364,30 @@ RValue CodeGenFunction::EmitCall(QualType CalleeType, 
const CGCallee &OrigCallee
 
   llvm::Value *CalleePtr = Callee.getFunctionPointer();
 
+  // On 32-bit Arm, the low bit of a function pointer indicates whether
+  // it's using the Arm or Thumb instruction set. The actual first
+  // instruction lives at the same address either way, so we must clear
+  // that low bit before using the function address to find the prefix
+  // structure.
+  //
+  // This applies to both Arm and Thumb target triples, because
+  // either one could be used in an interworking context where it
+  // might be passed function pointers of both types.
+  llvm::Value *AlignedCalleePtr;
+  if (CGM.getTriple().isARM() || CGM.getTriple().isThumb()) {
+llvm::Value *CalleeAddress =
+Builder.CreatePtrToInt(CalleePtr, IntPtrTy);
+llvm::Value *Mask = llvm::ConstantInt::get(IntPtrTy, ~1);
+llvm::Value *AlignedCalleeAddress =
+Builder.CreateAnd(CalleeAddress, Mask);
+AlignedCalleePtr =
+Builder.CreateIntToPtr(AlignedCalleeAddress, CalleePtr->getType());
+  } else {
+AlignedCalleePtr = CalleePtr;
+  }
+
   llvm::Value *CalleePrefixStruct = Builder.CreateBitCast(
-  CalleePtr, llvm::PointerType::getUnqual(PrefixStructTy));
+  AlignedCalleePtr, llvm::PointerType::getUnqual(PrefixStructTy));
   llvm::Value *CalleeSigPtr =
   Builder.CreateConstGEP2_32(PrefixStructTy, CalleePrefixStruct, -1, 
0);
   llvm::Value *CalleeSig =

diff  --git a/clang/test/CodeGen/ubsan-function.cpp 
b/clang/test/CodeGen/ubsan-function.cpp
index fc9f60f5b205..ba55ee021cc9 100644
--- a/clang/test/CodeGen/ubsan-function.cpp
+++ b/clang/test/CodeGen/ubsan-function.cpp
@@ -1,11 +1,15 @@
-// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s 
-fsanitize=function -fno-sanitize-recover=all | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm -o - %s 
-fsanitize=function -fno-sanitize-recover=all | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64_be-linux-gnu -emit-llvm -o - %s 
-fsanitize=function -fno-sanitize-recover=all | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm -o - %s 
-fsanitize=function -fno-sanitize-recover=all | FileCheck %s 
--check-prefixes=CHECK,64
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -emit-llvm -o - %s 
-fsanitize=function -fno-sanitize-recover=all | FileCheck %s 
--check-prefixes=CHECK,64
+// RUN: %clang_cc1 -triple aarch64_be-linux-gnu -emit-llvm -o - %s 
-fsanitize=function -fno-sanitize-recover=all | FileCheck %s 
--check-prefixes=CHECK,64
+// RUN: %clang_cc1 -triple arm-none-eabi -emit-llvm -o - %s 
-fsanitize=function -fno-sanitize-recover=all | FileCheck %s 
--check-prefixes=CHECK,ARM,32
 
 // CHECK: define{{.*}} void @_Z3funv() #0 !func_sanitize ![[FUNCSAN:.*]] {
 void fun() {}
 
 // CHECK-LABEL: define{{.*}} void @_Z6callerPFvvE(ptr noundef %f)
+// ARM:   ptrtoint ptr {{.*}} to i32, !nosanitize !5
+// ARM:   and i32 {{.*}}, -2, !nosanitize !5
+// ARM:   inttoptr i32 {{.*}} to ptr, !nosanitize !5
 // CHECK: getelementptr <{ i32, i32 }>, ptr {{.*}}, i32 -1, i32 0, !nosanitize
 // CHECK: load i32, ptr {{.*}}, align {{.*}}, !nosanitize
 // CHECK: icmp eq i32 {{.*}}, -1056584962, !nosanitize
@@ -16,7 +20,8 @@ void fun() {}
 // CHECK: icmp eq i32 {{.*}}, -1522505972, !nosanitize
 // CHECK: br i1 {{.*}}, label %[[LABEL3:.*]], label %[[LABEL2:[^,]*]], 
{{.*}}!nosanitize
 // CHECK: [[LABEL2]]:
-// CHECK: call void @__ubsan_handle_function_type_mismatch_abort(ptr @[[#]], 
i64 %[[#]]) #[[#]], !nosanitize
+// 64:call void @__ubsan_handle_function_type_misma

[clang] 10e4228 - [ARM,AArch64] Add a full set of -mtp= options.

2023-06-15 Thread Simon Tatham via cfe-commits

Author: Simon Tatham
Date: 2023-06-15T09:27:41+01:00
New Revision: 10e42281144ecca019764b554f3f0f709bba0f71

URL: 
https://github.com/llvm/llvm-project/commit/10e42281144ecca019764b554f3f0f709bba0f71
DIFF: 
https://github.com/llvm/llvm-project/commit/10e42281144ecca019764b554f3f0f709bba0f71.diff

LOG: [ARM,AArch64] Add a full set of -mtp= options.

AArch64 has five system registers intended to be useful as thread
pointers: one for each exception level which is RW at that level and
inaccessible to lower ones, and the special TPIDRRO_EL0 which is
readable but not writable at EL0. AArch32 has three, corresponding to
the AArch64 ones that aren't specific to EL2 or EL3.

Currently clang supports only a subset of these registers, and not
even a consistent subset between AArch64 and AArch32:

 - For AArch64, clang permits you to choose between the four TPIDR_ELn
   thread registers, but not the fifth one, TPIDRRO_EL0.

 - In AArch32, on the other hand, the //only// thread register you can
   choose (apart from 'none, use a function call') is TPIDRURO, which
   corresponds to (the bottom 32 bits of) AArch64's TPIDRRO_EL0.

So there is no thread register that you can currently use in both
targets!

For custom and bare-metal purposes, users might very reasonably want
to use any of these thread registers. There's no reason they shouldn't
all be supported as options, even if the default choices follow
existing practice on typical operating systems.

This commit extends the range of values acceptable to the `-mtp=`
clang option, so that you can specify any of these registers by (the
lower-case version of) their official names in the ArmARM:

 - For AArch64: tpidr_el0, tpidrro_el0, tpidr_el1, tpidr_el2, tpidr_el3
 - For AArch32: tpidrurw, tpidruro, tpidrprw

All existing values of the option are still supported and behave the
same as before. Defaults are also unchanged. No command line that
worked already should change behaviour as a result of this.

The new values for the `-mtp=` option have been agreed with Arm's gcc
developers (although I don't know whether they plan to implement them
in the near future).

Reviewed By: nickdesaulniers

Differential Revision: https://reviews.llvm.org/D152433

Added: 
clang/test/Driver/aarch64-thread-pointer.c
clang/test/Driver/arm-thread-pointer.c

Modified: 
clang/include/clang/Driver/Options.td
clang/lib/Driver/ToolChains/Arch/AArch64.cpp
clang/lib/Driver/ToolChains/Arch/ARM.cpp
clang/lib/Driver/ToolChains/Arch/ARM.h
clang/lib/Driver/ToolChains/Clang.cpp
clang/test/Driver/clang-translation.c
llvm/lib/Target/AArch64/AArch64.td
llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
llvm/lib/Target/ARM/ARM.td
llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
llvm/lib/Target/ARM/ARMInstrInfo.td
llvm/lib/Target/ARM/ARMInstrThumb2.td
llvm/lib/Target/ARM/ARMPredicates.td
llvm/lib/Target/ARM/ARMSubtarget.h
llvm/test/CodeGen/AArch64/arm64-builtins-linux.ll
llvm/test/CodeGen/ARM/readtp.ll
llvm/test/CodeGen/ARM/stack-guard-tls.ll
llvm/test/CodeGen/ARM/thread_pointer.ll

Removed: 




diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 750b6ab343852..06f02a05b7f13 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -3595,8 +3595,10 @@ def mexecute_only : Flag<["-"], "mexecute-only">, 
Group,
 def mno_execute_only : Flag<["-"], "mno-execute-only">, 
Group,
   HelpText<"Allow generation of data access to code sections (ARM only)">;
 let Flags = [TargetSpecific] in {
-def mtp_mode_EQ : Joined<["-"], "mtp=">, Group, 
Values<"soft,cp15,el0,el1,el2,el3">,
-  HelpText<"Thread pointer access method (AArch32/AArch64 only)">;
+def mtp_mode_EQ : Joined<["-"], "mtp=">, Group, 
Values<"soft,cp15,tpidrurw,tpidruro,tpidrprw,el0,el1,el2,el3,tpidr_el0,tpidr_el1,tpidr_el2,tpidr_el3,tpidrro_el0">,
+  HelpText<"Thread pointer access method. "
+   "For AArch32: 'soft' uses a function call, or 'tpidrurw', 
'tpidruro' or 'tpidrprw' use the three CP15 registers. 'cp15' is an alias for 
'tpidruro'. "
+   "For AArch64: 'tpidr_el0', 'tpidr_el1', 'tpidr_el2', 'tpidr_el3' or 
'tpidrro_el0' use the five system registers. 'elN' is an alias for 
'tpidr_elN'.">;
 def mpure_code : Flag<["-"], "mpure-code">, Alias; // Alias for 
GCC compatibility
 def mno_pure_code : Flag<["-"], "mno-pure-code">, Alias;
 def mtvos_version_min_EQ : Joined<["-"], "mtvos-version-min=">, Group;

diff  --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp 
b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index f3bc00188c784..3547031635795 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -291,13 +291,15 @@ void aarch64::getAArch64TargetFeatures(const Driver &D,
 
   if (Arg *A = Args.getLastArg(options::OPT_mtp_mode_EQ)) {
 String

[clang] [Clang][Driver] Skip empty strings in getAArch64MultilibFlags (PR #97827)

2024-07-05 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm created 
https://github.com/llvm/llvm-project/pull/97827

In a multilib setting, if you compile with a command line such as `clang 
--target=aarch64-none-elf -march=armv8.9-a+rcpc3`, `getAArch64MultilibFlags` 
returns an ill-formed string containing two consecutive `+` signs, of the form 
`...+rcpc++rcpc3+...`, causing later stages of multilib selection to get 
confused.

The `++` arises from the entry in `AArch64::Extensions` for the 
SubtargetFeature `rcpc-immo`, which is a dependency of the `rcpc3` 
SubtargetFeature, but doesn't have an _extension_ name for the purposes of the 
`-march=foo+bar` option. So its `UserVisibleName` field is the empty string.

To fix this, I've excluded extensions from consideration in 
`getAArch64MultilibFlags` if they have an empty `UserVisibleName`. Since the 
input to this function is not derived from a completely general set of 
SubtargetFeatures, but from a set that has only just been converted _from_ a 
clang driver command line, the only extensions skipped by this check should be 
cases like this one, where the anonymous extension was only included because it 
was a dependency of one mentioned explicitly.

I've also made the analogous change in `getARMMultilibFlags`. I don't think 
it's necessary right now, because the architecture extensions for ARM (defined 
in `ARMTargetParser.def` rather than Tablegen) don't include any anonymous 
ones. But it seems sensible to add the check anyway, in case future refactoring 
introduces anonymous array elements in the same way that AArch64 did, and also 
in case someone writes a function for another platform by using either of these 
as example code.

>From 8b39cbbdbe3646062dd1cdb60eab18339f9ca490 Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Fri, 5 Jul 2024 11:57:19 +0100
Subject: [PATCH] [Clang][Driver] Skip empty strings in getAArch64MultilibFlags

In a multilib setting, if you compile with a command line such as
`clang --target=aarch64-none-elf -march=armv8.9-a+rcpc3`,
`getAArch64MultilibFlags` returns an ill-formed string containing two
consecutive `+` signs, of the form `...+rcpc++rcpc3+...`, causing
later stages of multilib selection to get confused.

The `++` arises from the entry in `AArch64::Extensions` for the
SubtargetFeature `rcpc-immo`, which is a dependency of the `rcpc3`
SubtargetFeature, but doesn't have an _extension_ name for the
purposes of the `-march=foo+bar` option. So its `UserVisibleName`
field is the empty string.

To fix this, I've excluded extensions from consideration in
`getAArch64MultilibFlags` if they have an empty `UserVisibleName`.
Since the input to this function is not derived from a completely
general set of SubtargetFeatures, but from a set that has only just
been converted _from_ a clang driver command line, the only extensions
skipped by this check should be cases like this one, where the
anonymous extension was only included because it was a dependency of
one mentioned explicitly.

I've also made the analogous change in `getARMMultilibFlags`. I don't
think it's necessary right now, because the architecture extensions
for ARM (defined in `ARMTargetParser.def` rather than Tablegen) don't
include any anonymous ones. But it seems sensible to add the check
anyway, in case future refactoring introduces anonymous array elements
in the same way that AArch64 did, and also in case someone writes a
function for another platform by using either of these as example
code.
---
 clang/lib/Driver/ToolChain.cpp | 20 
 clang/test/Driver/aarch64-multilib-rcpc3.c |  4 
 2 files changed, 16 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/Driver/aarch64-multilib-rcpc3.c

diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 977e08390800d..9ac428caab3b9 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -195,11 +195,13 @@ static void getAArch64MultilibFlags(const Driver &D,
UnifiedFeatures.end());
   std::vector MArch;
   for (const auto &Ext : AArch64::Extensions)
-if (FeatureSet.contains(Ext.PosTargetFeature))
-  MArch.push_back(Ext.UserVisibleName.str());
+if (!Ext.UserVisibleName.empty())
+  if (FeatureSet.contains(Ext.PosTargetFeature))
+MArch.push_back(Ext.UserVisibleName.str());
   for (const auto &Ext : AArch64::Extensions)
-if (FeatureSet.contains(Ext.NegTargetFeature))
-  MArch.push_back(("no" + Ext.UserVisibleName).str());
+if (!Ext.UserVisibleName.empty())
+  if (FeatureSet.contains(Ext.NegTargetFeature))
+MArch.push_back(("no" + Ext.UserVisibleName).str());
   StringRef ArchName;
   for (const auto &ArchInfo : AArch64::ArchInfos)
 if (FeatureSet.contains(ArchInfo->ArchFeature))
@@ -221,11 +223,13 @@ static void getARMMultilibFlags(const Driver &D,
UnifiedFeatures.end());
   std::vector MArch;
   for (const auto &Ext : ARM::AR

[clang] [Clang][Driver] Skip empty strings in getAArch64MultilibFlags (PR #97827)

2024-07-05 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm updated 
https://github.com/llvm/llvm-project/pull/97827

>From 81d77bf87dd47684683492ab70cc45ab6eb4364e Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Fri, 5 Jul 2024 11:57:19 +0100
Subject: [PATCH] [Clang][Driver] Skip empty strings in getAArch64MultilibFlags

In a multilib setting, if you compile with a command line such as
`clang --target=aarch64-none-elf -march=armv8.9-a+rcpc3`,
`getAArch64MultilibFlags` returns an ill-formed string containing two
consecutive `+` signs, of the form `...+rcpc++rcpc3+...`, causing
later stages of multilib selection to get confused.

The `++` arises from the entry in `AArch64::Extensions` for the
SubtargetFeature `rcpc-immo`, which is a dependency of the `rcpc3`
SubtargetFeature, but doesn't have an _extension_ name for the
purposes of the `-march=foo+bar` option. So its `UserVisibleName`
field is the empty string.

To fix this, I've excluded extensions from consideration in
`getAArch64MultilibFlags` if they have an empty `UserVisibleName`.
Since the input to this function is not derived from a completely
general set of SubtargetFeatures, but from a set that has only just
been converted _from_ a clang driver command line, the only extensions
skipped by this check should be cases like this one, where the
anonymous extension was only included because it was a dependency of
one mentioned explicitly.

I've also made the analogous change in `getARMMultilibFlags`. I don't
think it's necessary right now, because the architecture extensions
for ARM (defined in `ARMTargetParser.def` rather than Tablegen) don't
include any anonymous ones. But it seems sensible to add the check
anyway, in case future refactoring introduces anonymous array elements
in the same way that AArch64 did, and also in case someone writes a
function for another platform by using either of these as example
code.
---
 clang/lib/Driver/ToolChain.cpp | 20 
 clang/test/Driver/aarch64-multilib-rcpc3.c |  4 
 2 files changed, 16 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/Driver/aarch64-multilib-rcpc3.c

diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 977e08390800d7..85ae4d2a26fee2 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -195,11 +195,13 @@ static void getAArch64MultilibFlags(const Driver &D,
UnifiedFeatures.end());
   std::vector MArch;
   for (const auto &Ext : AArch64::Extensions)
-if (FeatureSet.contains(Ext.PosTargetFeature))
-  MArch.push_back(Ext.UserVisibleName.str());
+if (!Ext.UserVisibleName.empty())
+  if (FeatureSet.contains(Ext.PosTargetFeature))
+MArch.push_back(Ext.UserVisibleName.str());
   for (const auto &Ext : AArch64::Extensions)
-if (FeatureSet.contains(Ext.NegTargetFeature))
-  MArch.push_back(("no" + Ext.UserVisibleName).str());
+if (!Ext.UserVisibleName.empty())
+  if (FeatureSet.contains(Ext.NegTargetFeature))
+MArch.push_back(("no" + Ext.UserVisibleName).str());
   StringRef ArchName;
   for (const auto &ArchInfo : AArch64::ArchInfos)
 if (FeatureSet.contains(ArchInfo->ArchFeature))
@@ -221,11 +223,13 @@ static void getARMMultilibFlags(const Driver &D,
UnifiedFeatures.end());
   std::vector MArch;
   for (const auto &Ext : ARM::ARCHExtNames)
-if (FeatureSet.contains(Ext.Feature))
-  MArch.push_back(Ext.Name.str());
+if (!Ext.Name.empty())
+  if (FeatureSet.contains(Ext.Feature))
+MArch.push_back(Ext.Name.str());
   for (const auto &Ext : ARM::ARCHExtNames)
-if (FeatureSet.contains(Ext.NegFeature))
-  MArch.push_back(("no" + Ext.Name).str());
+if (!Ext.Name.empty())
+  if (FeatureSet.contains(Ext.NegFeature))
+MArch.push_back(("no" + Ext.Name).str());
   MArch.insert(MArch.begin(), ("-march=" + Triple.getArchName()).str());
   Result.push_back(llvm::join(MArch, "+"));
 
diff --git a/clang/test/Driver/aarch64-multilib-rcpc3.c 
b/clang/test/Driver/aarch64-multilib-rcpc3.c
new file mode 100644
index 00..b839079e0442d6
--- /dev/null
+++ b/clang/test/Driver/aarch64-multilib-rcpc3.c
@@ -0,0 +1,4 @@
+// RUN: %clang --target=aarch64-none-elf -march=armv8.9-a+rcpc3 
-print-multi-flags-experimental -c %s 2>&1 | FileCheck %s
+
+// CHECK: -march=armv8.9-a
+// CHECK-SAME: +rcpc+rcpc3+

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Driver] Skip empty strings in getAArch64MultilibFlags (PR #97827)

2024-07-08 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm updated 
https://github.com/llvm/llvm-project/pull/97827

>From 81d77bf87dd47684683492ab70cc45ab6eb4364e Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Fri, 5 Jul 2024 11:57:19 +0100
Subject: [PATCH 1/2] [Clang][Driver] Skip empty strings in
 getAArch64MultilibFlags

In a multilib setting, if you compile with a command line such as
`clang --target=aarch64-none-elf -march=armv8.9-a+rcpc3`,
`getAArch64MultilibFlags` returns an ill-formed string containing two
consecutive `+` signs, of the form `...+rcpc++rcpc3+...`, causing
later stages of multilib selection to get confused.

The `++` arises from the entry in `AArch64::Extensions` for the
SubtargetFeature `rcpc-immo`, which is a dependency of the `rcpc3`
SubtargetFeature, but doesn't have an _extension_ name for the
purposes of the `-march=foo+bar` option. So its `UserVisibleName`
field is the empty string.

To fix this, I've excluded extensions from consideration in
`getAArch64MultilibFlags` if they have an empty `UserVisibleName`.
Since the input to this function is not derived from a completely
general set of SubtargetFeatures, but from a set that has only just
been converted _from_ a clang driver command line, the only extensions
skipped by this check should be cases like this one, where the
anonymous extension was only included because it was a dependency of
one mentioned explicitly.

I've also made the analogous change in `getARMMultilibFlags`. I don't
think it's necessary right now, because the architecture extensions
for ARM (defined in `ARMTargetParser.def` rather than Tablegen) don't
include any anonymous ones. But it seems sensible to add the check
anyway, in case future refactoring introduces anonymous array elements
in the same way that AArch64 did, and also in case someone writes a
function for another platform by using either of these as example
code.
---
 clang/lib/Driver/ToolChain.cpp | 20 
 clang/test/Driver/aarch64-multilib-rcpc3.c |  4 
 2 files changed, 16 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/Driver/aarch64-multilib-rcpc3.c

diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 977e08390800d..85ae4d2a26fee 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -195,11 +195,13 @@ static void getAArch64MultilibFlags(const Driver &D,
UnifiedFeatures.end());
   std::vector MArch;
   for (const auto &Ext : AArch64::Extensions)
-if (FeatureSet.contains(Ext.PosTargetFeature))
-  MArch.push_back(Ext.UserVisibleName.str());
+if (!Ext.UserVisibleName.empty())
+  if (FeatureSet.contains(Ext.PosTargetFeature))
+MArch.push_back(Ext.UserVisibleName.str());
   for (const auto &Ext : AArch64::Extensions)
-if (FeatureSet.contains(Ext.NegTargetFeature))
-  MArch.push_back(("no" + Ext.UserVisibleName).str());
+if (!Ext.UserVisibleName.empty())
+  if (FeatureSet.contains(Ext.NegTargetFeature))
+MArch.push_back(("no" + Ext.UserVisibleName).str());
   StringRef ArchName;
   for (const auto &ArchInfo : AArch64::ArchInfos)
 if (FeatureSet.contains(ArchInfo->ArchFeature))
@@ -221,11 +223,13 @@ static void getARMMultilibFlags(const Driver &D,
UnifiedFeatures.end());
   std::vector MArch;
   for (const auto &Ext : ARM::ARCHExtNames)
-if (FeatureSet.contains(Ext.Feature))
-  MArch.push_back(Ext.Name.str());
+if (!Ext.Name.empty())
+  if (FeatureSet.contains(Ext.Feature))
+MArch.push_back(Ext.Name.str());
   for (const auto &Ext : ARM::ARCHExtNames)
-if (FeatureSet.contains(Ext.NegFeature))
-  MArch.push_back(("no" + Ext.Name).str());
+if (!Ext.Name.empty())
+  if (FeatureSet.contains(Ext.NegFeature))
+MArch.push_back(("no" + Ext.Name).str());
   MArch.insert(MArch.begin(), ("-march=" + Triple.getArchName()).str());
   Result.push_back(llvm::join(MArch, "+"));
 
diff --git a/clang/test/Driver/aarch64-multilib-rcpc3.c 
b/clang/test/Driver/aarch64-multilib-rcpc3.c
new file mode 100644
index 0..b839079e0442d
--- /dev/null
+++ b/clang/test/Driver/aarch64-multilib-rcpc3.c
@@ -0,0 +1,4 @@
+// RUN: %clang --target=aarch64-none-elf -march=armv8.9-a+rcpc3 
-print-multi-flags-experimental -c %s 2>&1 | FileCheck %s
+
+// CHECK: -march=armv8.9-a
+// CHECK-SAME: +rcpc+rcpc3+

>From 7179fc771dcf15c4ab5be24b985fd7be56960cc9 Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Mon, 8 Jul 2024 17:30:56 +0100
Subject: [PATCH 2/2] fixup! [Clang][Driver] Skip empty strings in
 getAArch64MultilibFlags

---
 clang/test/Driver/aarch64-multilib-rcpc3.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/clang/test/Driver/aarch64-multilib-rcpc3.c 
b/clang/test/Driver/aarch64-multilib-rcpc3.c
index b839079e0442d..88b23de5a6510 100644
--- a/clang/test/Driver/aarch64-multilib-rcpc3.c
+++ b/clang/test/Driver/aarch64-multilib-rcpc3.c
@@ -1,4 +

[clang] [Clang][Driver] Skip empty strings in getAArch64MultilibFlags (PR #97827)

2024-07-08 Thread Simon Tatham via cfe-commits


@@ -0,0 +1,4 @@
+// RUN: %clang --target=aarch64-none-elf -march=armv8.9-a+rcpc3 
-print-multi-flags-experimental -c %s 2>&1 | FileCheck %s
+
+// CHECK: -march=armv8.9-a
+// CHECK-SAME: +rcpc+rcpc3+

statham-arm wrote:

Done. I wasn't sure whether it would need to come before or after the 
`rcpc+rcpc3` check. The answer turns out to be both.

https://github.com/llvm/llvm-project/pull/97827
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][Driver] Add a custom error option in multilib.yaml. (PR #105684)

2024-09-05 Thread Simon Tatham via cfe-commits

statham-arm wrote:

To be clear, are you asking _me_ to make a followup PR to change that 
identifier in this already-landed patch, or are you going to do it?

(Just to avoid the situation where both of us do it, or both of us think the 
other is going to)

https://github.com/llvm/llvm-project/pull/105684
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Fix silent truncation of inline ASM `srcloc` cookie when going through a `DiagnosticInfoSrcMgr` (PR #84559)

2024-06-14 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm approved this pull request.

LGTM, thanks!

https://github.com/llvm/llvm-project/pull/84559
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Fix silent truncation of inline ASM `srcloc` cookie when going through a `DiagnosticInfoSrcMgr` (PR #84559)

2024-06-14 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm closed 
https://github.com/llvm/llvm-project/pull/84559
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [Clang][Driver] Skip empty strings in getAArch64MultilibFlags (PR #97827)

2024-07-17 Thread Simon Tatham via cfe-commits

statham-arm wrote:

@asmok-g , I'm confused. This commit doesn't have anything to do with the 
processing of `-W` options on the clang command line.

Are you sure you've commented on the right PR? If you have, can you provide a 
full example command line? What was the behaviour before, and what is it 
afterwards? Why do you say that _this_ commit has caused the change?

https://github.com/llvm/llvm-project/pull/97827
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang][Driver] Add a custom error option in multilib.yaml. (PR #105684)

2024-08-27 Thread Simon Tatham via cfe-commits

https://github.com/statham-arm updated 
https://github.com/llvm/llvm-project/pull/105684

>From 806ac0bee0478fda32ec0bf5bfb9e28e1bef618d Mon Sep 17 00:00:00 2001
From: Simon Tatham 
Date: Wed, 21 Aug 2024 15:50:32 +0100
Subject: [PATCH 1/3] [clang][Driver] Add a custom error option in
 multilib.yaml.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sometimes a collection of multilibs has a gap in it, where a set of
driver command-line options can't work with any of the available
libraries.

For example, the Arm MVE extension requires special startup code (you
need to initialize FPSCR.LTPSIZE), and also benefits greatly from
-mfloat-abi=hard. So a multilib provider might build a library for
systems without MVE, and another for MVE with -mfloat-abi=hard,
anticipating that that's what most MVE users would want. But then if a
user compiles for MVE _without_ -mfloat-abi=hard, thhey can't use
either of those libraries – one has an ABI mismatch, and the other
will fail to set up LTPSIZE.

In that situation, it's useful to include a multilib.yaml entry for
the unworkable intermediate situation, and have it map to a fatal
error message rather than a set of actual libraries. Then the user
gets a build failure with a sensible explanation, instead of selecting
an unworkable library and silently generating bad output. The new
regression test demonstrates this case.

This patch introduces extra syntax into multilib.yaml, so that a
record in the `Variants` list can omit the `Dir` key, and in its
place, provide a `FatalError` key. Then, if that variant is selected,
the error message is emitted as a clang diagnostic, and multilib
selection fails.

In order to emit the error message in `MultilibSet::select`, I had to
pass a `Driver &` to that function, which involved plumbing one
through to every call site, and in the unit tests, constructing one
specially.
---
 clang/docs/Multilib.rst   |  6 ++
 .../clang/Basic/DiagnosticDriverKinds.td  |  2 +
 clang/include/clang/Driver/Multilib.h | 17 +++-
 clang/lib/Driver/Driver.cpp   |  3 +-
 clang/lib/Driver/Multilib.cpp | 47 +++---
 clang/lib/Driver/ToolChains/BareMetal.cpp |  9 +-
 clang/lib/Driver/ToolChains/Fuchsia.cpp   |  2 +-
 clang/lib/Driver/ToolChains/Gnu.cpp   | 57 ++--
 clang/lib/Driver/ToolChains/OHOS.cpp  |  7 +-
 .../baremetal-multilib-custom-error.yaml  | 63 ++
 .../unittests/Driver/MultilibBuilderTest.cpp  |  9 +-
 clang/unittests/Driver/MultilibTest.cpp   | 87 +++
 .../Driver/SimpleDiagnosticConsumer.h | 16 
 13 files changed, 236 insertions(+), 89 deletions(-)
 create mode 100644 clang/test/Driver/baremetal-multilib-custom-error.yaml

diff --git a/clang/docs/Multilib.rst b/clang/docs/Multilib.rst
index 063fe9a336f2fe..6d77fda3623b20 100644
--- a/clang/docs/Multilib.rst
+++ b/clang/docs/Multilib.rst
@@ -200,6 +200,12 @@ For a more comprehensive example see
 # to be a match.
 Flags: [--target=thumbv7m-none-eabi, -mfpu=fpv4-sp-d16]
 
+  # If there is no multilib available for a particular set of flags, and the
+  # other multilibs are not adequate fallbacks, then you can define a variant
+  # record with a FatalError key in place of the Dir key.
+  - FatalError: this multilib collection has no hard-float ABI support 
+Flags: [--target=thumbv7m-none-eabi, -mfloat-abi=hard]
+
 
   # The second section of the file is a list of regular expressions that are
   # used to map from flags generated from command line options to custom flags.
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td 
b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index ba90742fbdaabc..97573fcf20c1fb 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -810,6 +810,8 @@ def warn_drv_missing_multilib : Warning<
   InGroup>;
 def note_drv_available_multilibs : Note<
   "available multilibs are:%0">;
+def err_drv_multilib_custom_error : Error<
+  "multilib configuration error: %0">;
 
 def err_drv_experimental_crel : Error<
   "-Wa,--allow-experimental-crel must be specified to use -Wa,--crel. "
diff --git a/clang/include/clang/Driver/Multilib.h 
b/clang/include/clang/Driver/Multilib.h
index 9a2cc9bb1ba134..2b6a64187f7783 100644
--- a/clang/include/clang/Driver/Multilib.h
+++ b/clang/include/clang/Driver/Multilib.h
@@ -18,6 +18,7 @@
 #include "llvm/Support/SourceMgr.h"
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -25,6 +26,8 @@
 namespace clang {
 namespace driver {
 
+class Driver;
+
 /// This corresponds to a single GCC Multilib, or a segment of one controlled
 /// by a command line flag.
 /// See also MultilibBuilder for building a multilib by mutating it
@@ -48,13 +51,19 @@ class Multilib {
   // directory is not mutually exclusive with anything else.
   std::string ExclusiveGroup;
 
+  // So

  1   2   >