simon_tatham created this revision.
simon_tatham added reviewers: ostannard, MarkMurrayARM, dmgreen.
Herald added subscribers: cfe-commits, kristof.beyls.
Herald added a project: clang.
simon_tatham added a parent revision: D70088: [ARM,MVE] Add intrinsics for 
contiguous load/stores..

This batch of intrinsics includes lots of things that move vector data
around or change its type without really affecting its value very
much. It includes the `vreinterpretq` family (cast one vector type to
another); `vuninitializedq` (create a vector of a given type with
don't-care contents); `vcreateq` (make a 128-bit vector out of two
`uint64_t` halves); and the `vgetq_lane` and `vsetq_lane` families, to
read and write an individual lane of a vector.

These are all implemented using completely standard IR that's already
tested in existing LLVM unit tests, so I've just written a clang test
to check the IR is correct, and left it at that.

One of the new `vgetq_lane` intrinsics returns a `float16_t`, which
causes a compile error if `%clang_cc1` doesn't get the option
`-fallow-half-arguments-and-returns`. The driver passes that option to
cc1 already, but I've had to edit all the explicit cc1 command lines
in the existing MVE intrinsics tests.

I've also added some richer infrastructure to the MveEmitter Tablegen
backend, to make it specify the exact integer type of integer
arguments passed to IR construction functions, and wrap those
arguments in a `static_cast` in the autogenerated C++. That was
necessary to prevent an overloading ambiguity when passing the integer
literal `0` to `IRBuilder::CreateInsertElement`, because otherwise, it
could mean either a null pointer `llvm::Value *` or a zero `uint64_t`.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D70133

Files:
  clang/include/clang/Basic/arm_mve.td
  clang/include/clang/Basic/arm_mve_defs.td
  clang/test/CodeGen/arm-mve-intrinsics/admin.c
  clang/test/CodeGen/arm-mve-intrinsics/load-store.c
  clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
  clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c
  clang/test/CodeGen/arm-mve-intrinsics/vadc.c
  clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
  clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
  clang/test/CodeGen/arm-mve-intrinsics/vld24.c
  clang/test/CodeGen/arm-mve-intrinsics/vldr.c
  clang/test/CodeGen/arm-mve-intrinsics/vminvq.c
  clang/test/Sema/arm-mve-immediates.c
  clang/utils/TableGen/MveEmitter.cpp

Index: clang/utils/TableGen/MveEmitter.cpp
===================================================================
--- clang/utils/TableGen/MveEmitter.cpp
+++ clang/utils/TableGen/MveEmitter.cpp
@@ -632,10 +632,10 @@
   StringRef CallPrefix;
   std::vector<Ptr> Args;
   std::set<unsigned> AddressArgs;
-  std::set<unsigned> IntConstantArgs;
+  std::map<unsigned, std::string> IntConstantArgs;
   IRBuilderResult(StringRef CallPrefix, std::vector<Ptr> Args,
                   std::set<unsigned> AddressArgs,
-                  std::set<unsigned> IntConstantArgs)
+                  std::map<unsigned, std::string> IntConstantArgs)
     : CallPrefix(CallPrefix), Args(Args), AddressArgs(AddressArgs),
         IntConstantArgs(IntConstantArgs) {}
   void genCode(raw_ostream &OS,
@@ -644,11 +644,13 @@
     const char *Sep = "";
     for (unsigned i = 0, e = Args.size(); i < e; ++i) {
       Ptr Arg = Args[i];
-      if (IntConstantArgs.find(i) != IntConstantArgs.end()) {
+      auto it = IntConstantArgs.find(i);
+      if (it != IntConstantArgs.end()) {
         assert(Arg->hasIntegerConstantValue());
-        OS << Sep
+        OS << Sep << "static_cast<" << it->second << ">("
            << ParamAlloc.allocParam("unsigned",
-                                    utostr(Arg->integerConstantValue()));
+                                    utostr(Arg->integerConstantValue()))
+           << ")";
       } else {
         OS << Sep << Arg->varname();
       }
@@ -763,6 +765,14 @@
   // shares with at least one other intrinsic.
   std::string ShortName, FullName;
 
+  // A very small number of intrinsics _only_ have a polymorphic
+  // variant (vuninitializedq taking an unevaluated argument).
+  bool PolymorphicOnly;
+
+  // Another rarely-used flag indicating that the builtin doesn't
+  // evaluate its argument(s) at all.
+  bool NonEvaluating;
+
   const Type *ReturnType;
   std::vector<const Type *> ArgTypes;
   std::map<unsigned, ImmediateArg> ImmediateArgs;
@@ -796,6 +806,8 @@
     return false;
   }
   bool polymorphic() const { return ShortName != FullName; }
+  bool polymorphicOnly() const { return PolymorphicOnly; }
+  bool nonEvaluating() const { return NonEvaluating; }
 
   // External entry point for code generation, called from MveEmitter.
   void genCode(raw_ostream &OS, CodeGenParamAllocator &ParamAlloc,
@@ -1126,11 +1138,15 @@
       Args.push_back(getCodeForDagArg(D, i, Scope, Param));
     if (Op->isSubClassOf("IRBuilderBase")) {
       std::set<unsigned> AddressArgs;
-      for (unsigned i : Op->getValueAsListOfInts("address_params"))
-        AddressArgs.insert(i);
-      std::set<unsigned> IntConstantArgs;
-      for (unsigned i : Op->getValueAsListOfInts("int_constant_params"))
-        IntConstantArgs.insert(i);
+      std::map<unsigned, std::string> IntConstantArgs;
+      for (Record *sp : Op->getValueAsListOfDefs("special_params")) {
+        unsigned Index = sp->getValueAsInt("index");
+        if (sp->isSubClassOf("IRBuilderAddrParam")) {
+          AddressArgs.insert(Index);
+        } else if (sp->isSubClassOf("IRBuilderIntParam")) {
+          IntConstantArgs[Index] = sp->getValueAsString("type");
+        }
+      }
       return std::make_shared<IRBuilderResult>(
           Op->getValueAsString("prefix"), Args, AddressArgs, IntConstantArgs);
     } else if (Op->isSubClassOf("IRIntBase")) {
@@ -1235,6 +1251,9 @@
   }
   ShortName = join(std::begin(NameParts), std::end(NameParts), "_");
 
+  PolymorphicOnly = R->getValueAsBit("polymorphicOnly");
+  NonEvaluating = R->getValueAsBit("nonEvaluating");
+
   // Process the intrinsic's argument list.
   DagInit *ArgsDag = R->getValueAsDag("args");
   Result::Scope Scope;
@@ -1269,7 +1288,7 @@
         } else if (Bounds->getName() == "IB_LaneIndex") {
           IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
           IA.i1 = 0;
-          IA.i2 = 128 / Param->sizeInBits();
+          IA.i2 = 128 / Param->sizeInBits() - 1;
         } else if (Bounds->getName() == "IB_EltBit") {
           IA.boundsType = ImmediateArg::BoundsType::ExplicitRange;
           IA.i1 = Bounds->getValueAsInt("base");
@@ -1404,6 +1423,8 @@
     for (bool Polymorphic : {false, true}) {
       if (Polymorphic && !Int.polymorphic())
         continue;
+      if (!Polymorphic && Int.polymorphicOnly())
+        continue;
 
       // We also generate each intrinsic under a name like __arm_vfooq
       // (which is in C language implementation namespace, so it's
@@ -1557,7 +1578,10 @@
     if (Int.polymorphic()) {
       StringRef Name = Int.shortName();
       if (ShortNamesSeen.find(Name) == ShortNamesSeen.end()) {
-        OS << "BUILTIN(__builtin_arm_mve_" << Name << ", \"vi.\", \"nt\")\n";
+        OS << "BUILTIN(__builtin_arm_mve_" << Name << ", \"vi.\", \"nt";
+        if (Int.nonEvaluating())
+          OS << "u"; // indicate that this builtin doesn't evaluate its args
+        OS << "\")\n";
         ShortNamesSeen.insert(Name);
       }
     }
Index: clang/test/Sema/arm-mve-immediates.c
===================================================================
--- clang/test/Sema/arm-mve-immediates.c
+++ clang/test/Sema/arm-mve-immediates.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -verify -fsyntax-only %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -fallow-half-arguments-and-returns -target-feature +mve.fp -verify -fsyntax-only %s
 
 #include <arm_mve.h>
 
@@ -54,3 +54,47 @@
   vstrwq_scatter_base(addr32, 2, addr32); // expected-error {{argument should be a multiple of 4}}
   vstrwq_scatter_base(addr32, 1, addr32); // expected-error {{argument should be a multiple of 4}}
 }
+
+void test_lane_indices(uint8x16_t v16, uint16x8_t v8,
+                       uint32x4_t v4, uint64x2_t v2)
+{
+  vgetq_lane_u8(v16, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}}
+  vgetq_lane_u8(v16, 0);
+  vgetq_lane_u8(v16, 15);
+  vgetq_lane_u8(v16, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+
+  vgetq_lane_u16(v8, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}}
+  vgetq_lane_u16(v8, 0);
+  vgetq_lane_u16(v8, 7);
+  vgetq_lane_u16(v8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+
+  vgetq_lane_u32(v4, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
+  vgetq_lane_u32(v4, 0);
+  vgetq_lane_u32(v4, 3);
+  vgetq_lane_u32(v4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+
+  vgetq_lane_u64(v2, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
+  vgetq_lane_u64(v2, 0);
+  vgetq_lane_u64(v2, 1);
+  vgetq_lane_u64(v2, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+
+  vsetq_lane_u8(23, v16, -1); // expected-error {{argument value -1 is outside the valid range [0, 15]}}
+  vsetq_lane_u8(23, v16, 0);
+  vsetq_lane_u8(23, v16, 15);
+  vsetq_lane_u8(23, v16, 16); // expected-error {{argument value 16 is outside the valid range [0, 15]}}
+
+  vsetq_lane_u16(23, v8, -1); // expected-error {{argument value -1 is outside the valid range [0, 7]}}
+  vsetq_lane_u16(23, v8, 0);
+  vsetq_lane_u16(23, v8, 7);
+  vsetq_lane_u16(23, v8, 8); // expected-error {{argument value 8 is outside the valid range [0, 7]}}
+
+  vsetq_lane_u32(23, v4, -1); // expected-error {{argument value -1 is outside the valid range [0, 3]}}
+  vsetq_lane_u32(23, v4, 0);
+  vsetq_lane_u32(23, v4, 3);
+  vsetq_lane_u32(23, v4, 4); // expected-error {{argument value 4 is outside the valid range [0, 3]}}
+
+  vsetq_lane_u64(23, v2, -1); // expected-error {{argument value -1 is outside the valid range [0, 1]}}
+  vsetq_lane_u64(23, v2, 0);
+  vsetq_lane_u64(23, v2, 1);
+  vsetq_lane_u64(23, v2, 2); // expected-error {{argument value 2 is outside the valid range [0, 1]}}
+}
Index: clang/test/CodeGen/arm-mve-intrinsics/vminvq.c
===================================================================
--- clang/test/CodeGen/arm-mve-intrinsics/vminvq.c
+++ clang/test/CodeGen/arm-mve-intrinsics/vminvq.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 
Index: clang/test/CodeGen/arm-mve-intrinsics/vldr.c
===================================================================
--- clang/test/CodeGen/arm-mve-intrinsics/vldr.c
+++ clang/test/CodeGen/arm-mve-intrinsics/vldr.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 
Index: clang/test/CodeGen/arm-mve-intrinsics/vld24.c
===================================================================
--- clang/test/CodeGen/arm-mve-intrinsics/vld24.c
+++ clang/test/CodeGen/arm-mve-intrinsics/vld24.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
 
 #include <arm_mve.h>
 
Index: clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
===================================================================
--- clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
+++ clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 
Index: clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
===================================================================
--- clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
+++ clang/test/CodeGen/arm-mve-intrinsics/vaddq.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 
Index: clang/test/CodeGen/arm-mve-intrinsics/vadc.c
===================================================================
--- clang/test/CodeGen/arm-mve-intrinsics/vadc.c
+++ clang/test/CodeGen/arm-mve-intrinsics/vadc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 
Index: clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c
===================================================================
--- clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c
+++ clang/test/CodeGen/arm-mve-intrinsics/scatter-gather.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 
Index: clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
===================================================================
--- clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
+++ clang/test/CodeGen/arm-mve-intrinsics/scalar-shifts.c
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 
Index: clang/test/CodeGen/arm-mve-intrinsics/load-store.c
===================================================================
--- clang/test/CodeGen/arm-mve-intrinsics/load-store.c
+++ clang/test/CodeGen/arm-mve-intrinsics/load-store.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
-// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
 
 #include <arm_mve.h>
 
Index: clang/test/CodeGen/arm-mve-intrinsics/admin.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/arm-mve-intrinsics/admin.c
@@ -0,0 +1,1846 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vcreateq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <8 x half>
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vcreateq_f16(uint64_t a, uint64_t b)
+{
+    return vcreateq_f16(a, b);
+}
+
+// CHECK-LABEL: @test_vcreateq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float32x4_t test_vcreateq_f32(uint64_t a, uint64_t b)
+{
+    return vcreateq_f32(a, b);
+}
+
+// CHECK-LABEL: @test_vcreateq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+int16x8_t test_vcreateq_s16(uint64_t a, uint64_t b)
+{
+    return vcreateq_s16(a, b);
+}
+
+// CHECK-LABEL: @test_vcreateq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int32x4_t test_vcreateq_s32(uint64_t a, uint64_t b)
+{
+    return vcreateq_s32(a, b);
+}
+
+// CHECK-LABEL: @test_vcreateq_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+int64x2_t test_vcreateq_s64(uint64_t a, uint64_t b)
+{
+    return vcreateq_s64(a, b);
+}
+
+// CHECK-LABEL: @test_vcreateq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+int8x16_t test_vcreateq_s8(uint64_t a, uint64_t b)
+{
+    return vcreateq_s8(a, b);
+}
+
+// CHECK-LABEL: @test_vcreateq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+//
+uint16x8_t test_vcreateq_u16(uint64_t a, uint64_t b)
+{
+    return vcreateq_u16(a, b);
+}
+
+// CHECK-LABEL: @test_vcreateq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+uint32x4_t test_vcreateq_u32(uint64_t a, uint64_t b)
+{
+    return vcreateq_u32(a, b);
+}
+
+// CHECK-LABEL: @test_vcreateq_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-NEXT:    ret <2 x i64> [[TMP1]]
+//
+uint64x2_t test_vcreateq_u64(uint64_t a, uint64_t b)
+{
+    return vcreateq_u64(a, b);
+}
+
+// CHECK-LABEL: @test_vcreateq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> undef, i64 [[A:%.*]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> [[TMP0]], i64 [[B:%.*]], i64 1
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+//
+uint8x16_t test_vcreateq_u8(uint64_t a, uint64_t b)
+{
+    return vcreateq_u8(a, b);
+}
+
+// CHECK-LABEL: @test_vgetq_lane_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <8 x half> [[A:%.*]], i32 2
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast half [[TMP0]] to i16
+// CHECK-NEXT:    [[TMP_0_INSERT_EXT:%.*]] = zext i16 [[TMP1]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[TMP_0_INSERT_EXT]] to float
+// CHECK-NEXT:    ret float [[TMP2]]
+//
+float16_t test_vgetq_lane_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 2);
+#else /* POLYMORPHIC */
+    return vgetq_lane_f16(a, 2);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 3
+// CHECK-NEXT:    ret float [[TMP0]]
+//
+float32_t test_vgetq_lane_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 3);
+#else /* POLYMORPHIC */
+    return vgetq_lane_f32(a, 3);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <8 x i16> [[A:%.*]], i32 4
+// CHECK-NEXT:    ret i16 [[TMP0]]
+//
+int16_t test_vgetq_lane_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 4);
+#else /* POLYMORPHIC */
+    return vgetq_lane_s16(a, 4);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 0
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+int32_t test_vgetq_lane_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 0);
+#else /* POLYMORPHIC */
+    return vgetq_lane_s32(a, 0);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[A:%.*]], i32 0
+// CHECK-NEXT:    ret i64 [[TMP0]]
+//
+int64_t test_vgetq_lane_s64(int64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 0);
+#else /* POLYMORPHIC */
+    return vgetq_lane_s64(a, 0);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <16 x i8> [[A:%.*]], i32 10
+// CHECK-NEXT:    ret i8 [[TMP0]]
+//
+int8_t test_vgetq_lane_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 10);
+#else /* POLYMORPHIC */
+    return vgetq_lane_s8(a, 10);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <8 x i16> [[A:%.*]], i32 3
+// CHECK-NEXT:    ret i16 [[TMP0]]
+//
+uint16_t test_vgetq_lane_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 3);
+#else /* POLYMORPHIC */
+    return vgetq_lane_u16(a, 3);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 3
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t test_vgetq_lane_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 3);
+#else /* POLYMORPHIC */
+    return vgetq_lane_u32(a, 3);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[A:%.*]], i32 1
+// CHECK-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t test_vgetq_lane_u64(uint64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 1);
+#else /* POLYMORPHIC */
+    return vgetq_lane_u64(a, 1);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vgetq_lane_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = extractelement <16 x i8> [[A:%.*]], i32 1
+// CHECK-NEXT:    ret i8 [[TMP0]]
+//
+uint8_t test_vgetq_lane_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vgetq_lane(a, 1);
+#else /* POLYMORPHIC */
+    return vgetq_lane_u8(a, 1);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f16_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <8 x half>
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vreinterpretq_f16_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f16_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f16_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <8 x half>
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vreinterpretq_f16_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f16_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f16_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x half>
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vreinterpretq_f16_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f16_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f16_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x half>
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vreinterpretq_f16_s64(int64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f16_s64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f16_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x half>
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vreinterpretq_f16_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f16_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f16_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <8 x half>
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f16_u16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f16_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x half>
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f16_u32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f16_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x half>
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f16_u64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f16_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x half>
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f16_u8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f32_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vreinterpretq_f32_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f32_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f32_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vreinterpretq_f32_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f32_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f32_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vreinterpretq_f32_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f32_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f32_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vreinterpretq_f32_s64(int64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f32_s64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f32_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vreinterpretq_f32_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f32_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f32_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f32_u16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f32_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f32_u32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f32_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f32_u64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_f32_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_f32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_f32_u8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vreinterpretq_s16_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s16_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s16_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vreinterpretq_s16_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s16_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s16_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vreinterpretq_s16_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s16_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s16_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vreinterpretq_s16_s64(int64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s16_s64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s16_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vreinterpretq_s16_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s16_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s16_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x i16> [[A:%.*]]
+//
+int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s16_u16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s16_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s16_u32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s16_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s16_u64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s16_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s16_u8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s32_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vreinterpretq_s32_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s32_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vreinterpretq_s32_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s32_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s32_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vreinterpretq_s32_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s32_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s32_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vreinterpretq_s32_s64(int64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s32_s64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s32_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vreinterpretq_s32_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s32_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s32_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s32_u16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s32_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> [[A:%.*]]
+//
+int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s32_u32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s32_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s32_u64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s32_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s32_u8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s64_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+int64x2_t test_vreinterpretq_s64_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s64_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s64_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+int64x2_t test_vreinterpretq_s64_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s64_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s64_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+int64x2_t test_vreinterpretq_s64_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s64_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s64_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+int64x2_t test_vreinterpretq_s64_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s64_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s64_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+int64x2_t test_vreinterpretq_s64_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s64_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s64_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s64_u16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s64_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s64_u32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s64_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <2 x i64> [[A:%.*]]
+//
+int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s64_u64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s64_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s64_u8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vreinterpretq_s8_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s8_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s8_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vreinterpretq_s8_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s8_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s8_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vreinterpretq_s8_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s8_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s8_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vreinterpretq_s8_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s8_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s8_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vreinterpretq_s8_s64(int64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s8_s64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s8_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s8_u16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s8_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s8_u32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s8_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s8_u64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_s8_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <16 x i8> [[A:%.*]]
+//
+int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_s8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_s8_u8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u16_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u16_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u16_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u16_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x i16> [[A:%.*]]
+//
+uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u16_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u16_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u16_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u16_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u16_s64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u16_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u16_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u16_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u16_u32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u16_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u16_u64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u16_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u16(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u16_u8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u32_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u32_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u32_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u32_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u32_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u32_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> [[A:%.*]]
+//
+uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u32_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u32_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u32_s64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u32_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u32_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u32_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u32_u16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u32_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u32_u64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u32_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u32(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u32_u8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u64_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u64_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u64_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u64_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u64_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u64_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u64_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u64_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u64_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <2 x i64> [[A:%.*]]
+//
+uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u64_s64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u64_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u64_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u64_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u64_u16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u64_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u64_u32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u64_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i8> [[A:%.*]] to <2 x i64>
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u64(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u64_u8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u8_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x half> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u8_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u8_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u8_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u8_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u8_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u8_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u8_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u8_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u8_s64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u8_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <16 x i8> [[A:%.*]]
+//
+uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u8_s8(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u8_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u8_u16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u8_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u8_u32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vreinterpretq_u8_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
+// CHECK-NEXT:    ret <16 x i8> [[TMP0]]
+//
+uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a)
+{
+#ifdef POLYMORPHIC
+    return vreinterpretq_u8(a);
+#else /* POLYMORPHIC */
+    return vreinterpretq_u8_u64(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast float [[A_COERCE:%.*]] to i32
+// CHECK-NEXT:    [[TMP_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[TMP_0_EXTRACT_TRUNC]] to half
+// CHECK-NEXT:    [[TMP2:%.*]] = insertelement <8 x half> [[B:%.*]], half [[TMP1]], i32 4
+// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+//
+float16x8_t test_vsetq_lane_f16(float16_t a, float16x8_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 4);
+#else /* POLYMORPHIC */
+    return vsetq_lane_f16(a, b, 4);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A:%.*]], i32 2
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 2);
+#else /* POLYMORPHIC */
+    return vsetq_lane_f32(a, b, 2);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[A:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x i8> [[B:%.*]], i8 [[A]], i32 12
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 12);
+#else /* POLYMORPHIC */
+    return vsetq_lane_s8(a, b, 12);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[A:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i16> [[B:%.*]], i16 [[A]], i32 6
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 6);
+#else /* POLYMORPHIC */
+    return vsetq_lane_s16(a, b, 6);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> [[B:%.*]], i32 [[A:%.*]], i32 2
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 2);
+#else /* POLYMORPHIC */
+    return vsetq_lane_s32(a, b, 2);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> [[B:%.*]], i64 [[A:%.*]], i32 0
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 0);
+#else /* POLYMORPHIC */
+    return vsetq_lane_s64(a, b, 0);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i8 [[A:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <16 x i8> [[B:%.*]], i8 [[A]], i32 2
+// CHECK-NEXT:    ret <16 x i8> [[TMP1]]
+//
+uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 2);
+#else /* POLYMORPHIC */
+    return vsetq_lane_u8(a, b, 2);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[A:%.*]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i16> [[B:%.*]], i16 [[A]], i32 7
+// CHECK-NEXT:    ret <8 x i16> [[TMP1]]
+//
+uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 7);
+#else /* POLYMORPHIC */
+    return vsetq_lane_u16(a, b, 7);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> [[B:%.*]], i32 [[A:%.*]], i32 0
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 0);
+#else /* POLYMORPHIC */
+    return vsetq_lane_u32(a, b, 0);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vsetq_lane_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i64> [[B:%.*]], i64 [[A:%.*]], i32 1
+// CHECK-NEXT:    ret <2 x i64> [[TMP0]]
+//
+uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b)
+{
+#ifdef POLYMORPHIC
+    return vsetq_lane(a, b, 1);
+#else /* POLYMORPHIC */
+    return vsetq_lane_u64(a, b, 1);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vuninitializedq_polymorphic_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x half> undef
+//
+float16x8_t test_vuninitializedq_polymorphic_f16(float16x8_t (*funcptr)(void))
+{
+    return vuninitializedq(funcptr());
+}
+
+// CHECK-LABEL: @test_vuninitializedq_polymorphic_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x float> undef
+//
+float32x4_t test_vuninitializedq_polymorphic_f32(float32x4_t (*funcptr)(void))
+{
+    return vuninitializedq(funcptr());
+}
+
+// CHECK-LABEL: @test_vuninitializedq_polymorphic_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <16 x i8> undef
+//
+int8x16_t test_vuninitializedq_polymorphic_s8(int8x16_t (*funcptr)(void))
+{
+    return vuninitializedq(funcptr());
+}
+
+// CHECK-LABEL: @test_vuninitializedq_polymorphic_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x i16> undef
+//
+int16x8_t test_vuninitializedq_polymorphic_s16(int16x8_t (*funcptr)(void))
+{
+    return vuninitializedq(funcptr());
+}
+
+// CHECK-LABEL: @test_vuninitializedq_polymorphic_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> undef
+//
+int32x4_t test_vuninitializedq_polymorphic_s32(int32x4_t (*funcptr)(void))
+{
+    return vuninitializedq(funcptr());
+}
+
+// CHECK-LABEL: @test_vuninitializedq_polymorphic_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <2 x i64> undef
+//
+int64x2_t test_vuninitializedq_polymorphic_s64(int64x2_t (*funcptr)(void))
+{
+    return vuninitializedq(funcptr());
+}
+
+// CHECK-LABEL: @test_vuninitializedq_polymorphic_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <16 x i8> undef
+//
+uint8x16_t test_vuninitializedq_polymorphic_u8(uint8x16_t (*funcptr)(void))
+{
+    return vuninitializedq(funcptr());
+}
+
+// CHECK-LABEL: @test_vuninitializedq_polymorphic_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x i16> undef
+//
+uint16x8_t test_vuninitializedq_polymorphic_u16(uint16x8_t (*funcptr)(void))
+{
+    return vuninitializedq(funcptr());
+}
+
+// CHECK-LABEL: @test_vuninitializedq_polymorphic_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> undef
+//
+uint32x4_t test_vuninitializedq_polymorphic_u32(uint32x4_t (*funcptr)(void))
+{
+    return vuninitializedq(funcptr());
+}
+
+// CHECK-LABEL: @test_vuninitializedq_polymorphic_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <2 x i64> undef
+//
+uint64x2_t test_vuninitializedq_polymorphic_u64(uint64x2_t (*funcptr)(void))
+{
+    return vuninitializedq(funcptr());
+}
+
+// CHECK-LABEL: @test_vuninitializedq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x half> undef
+//
+float16x8_t test_vuninitializedq_f16(void)
+{
+    return vuninitializedq_f16();
+}
+
+// CHECK-LABEL: @test_vuninitializedq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x float> undef
+//
+float32x4_t test_vuninitializedq_f32(void)
+{
+    return vuninitializedq_f32();
+}
+
+// CHECK-LABEL: @test_vuninitializedq_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x i16> undef
+//
+int16x8_t test_vuninitializedq_s16(void)
+{
+    return vuninitializedq_s16();
+}
+
+// CHECK-LABEL: @test_vuninitializedq_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> undef
+//
+int32x4_t test_vuninitializedq_s32(void)
+{
+    return vuninitializedq_s32();
+}
+
+// CHECK-LABEL: @test_vuninitializedq_s64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <2 x i64> undef
+//
+int64x2_t test_vuninitializedq_s64(void)
+{
+    return vuninitializedq_s64();
+}
+
+// CHECK-LABEL: @test_vuninitializedq_s8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <16 x i8> undef
+//
+int8x16_t test_vuninitializedq_s8(void)
+{
+    return vuninitializedq_s8();
+}
+
+// CHECK-LABEL: @test_vuninitializedq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <8 x i16> undef
+//
+uint16x8_t test_vuninitializedq_u16(void)
+{
+    return vuninitializedq_u16();
+}
+
+// CHECK-LABEL: @test_vuninitializedq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <4 x i32> undef
+//
+uint32x4_t test_vuninitializedq_u32(void)
+{
+    return vuninitializedq_u32();
+}
+
+// CHECK-LABEL: @test_vuninitializedq_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <2 x i64> undef
+//
+uint64x2_t test_vuninitializedq_u64(void)
+{
+    return vuninitializedq_u64();
+}
+
+// CHECK-LABEL: @test_vuninitializedq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <16 x i8> undef
+//
+uint8x16_t test_vuninitializedq_u8(void)
+{
+    return vuninitializedq_u8();
+}
+
Index: clang/include/clang/Basic/arm_mve_defs.td
===================================================================
--- clang/include/clang/Basic/arm_mve_defs.td
+++ clang/include/clang/Basic/arm_mve_defs.td
@@ -29,6 +29,11 @@
 // -----------------------------------------------------------------------------
 // Family of nodes for use in the codegen dag for an intrinsic, corresponding
 // to function calls that return LLVM IR nodes.
+class IRBuilderParam<int index_> { int index = index_; }
+class IRBuilderAddrParam<int index_> : IRBuilderParam<index_>;
+class IRBuilderIntParam<int index_, string type_> : IRBuilderParam<index_> {
+  string type = type_;
+}
 class IRBuilderBase {
   // The prefix of the function call, including an open parenthesis.
   string prefix;
@@ -36,8 +41,7 @@
   // Any parameters that have types that have to be treated specially by the
   // Tablegen back end. Generally these will be types other than llvm::Value *,
   // although not all other types need special treatment (e.g. llvm::Type *).
-  list<int> address_params = []; // indices of parameters with type Address
-  list<int> int_constant_params = []; // indices of plain integer parameters
+  list<IRBuilderParam> special_params = [];
 }
 class IRBuilder<string func> : IRBuilderBase {
   // The usual case: a method called on the code gen function's instance of
@@ -61,12 +65,27 @@
 def lshr: IRBuilder<"CreateLShr">;
 def fadd: IRBuilder<"CreateFAdd">;
 def fsub: IRBuilder<"CreateFSub">;
-def load: IRBuilder<"CreateLoad"> { let address_params = [0]; }
-def store: IRBuilder<"CreateStore"> { let address_params = [1]; }
-def xval: IRBuilder<"CreateExtractValue"> { let int_constant_params = [1]; }
+def load: IRBuilder<"CreateLoad"> {
+  let special_params = [IRBuilderAddrParam<0>];
+}
+def store: IRBuilder<"CreateStore"> {
+  let special_params = [IRBuilderAddrParam<1>];
+}
+def xval: IRBuilder<"CreateExtractValue"> {
+  let special_params = [IRBuilderIntParam<1, "unsigned">];
+}
+def ielt_const: IRBuilder<"CreateInsertElement"> {
+  let special_params = [IRBuilderIntParam<2, "uint64_t">];
+}
+def ielt_var: IRBuilder<"CreateInsertElement">;
+def xelt_var: IRBuilder<"CreateExtractElement">;
 def trunc: IRBuilder<"CreateTrunc">;
-def extend: CGHelperFn<"SignOrZeroExtend"> { let int_constant_params = [2]; }
+def bitcast: IRBuilder<"CreateBitCast">;
+def extend: CGHelperFn<"SignOrZeroExtend"> {
+  let special_params = [IRBuilderIntParam<2, "bool">];
+}
 def zeroinit: IRFunction<"llvm::Constant::getNullValue">;
+def undef: IRFunction<"UndefValue::get">;
 
 // A node that makes an Address out of a pointer-typed Value, by
 // providing an alignment as the second argument.
@@ -268,7 +287,7 @@
 
 // imm_lane has to be the index of a vector lane in the main vector type, i.e
 // it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane)
-def imm_lane : Immediate<u32, IB_LaneIndex>;
+def imm_lane : Immediate<sint, IB_LaneIndex>;
 
 // imm_1to32 can be in the range 1 to 32, unconditionally. (e.g. scalar shift
 // intrinsics)
@@ -350,6 +369,12 @@
 
   // Default to PNT_Type, which is by far the most common case.
   PolymorphicNameType pnt = PNT_Type;
+
+  // A very few intrinsics _only_ have a polymorphic name.
+  bit polymorphicOnly = 0;
+
+  // True if the builtin has to avoid evaluating its arguments.
+  bit nonEvaluating = 0;
 }
 
 // Sometimes you have to use two separate Intrinsic declarations to
Index: clang/include/clang/Basic/arm_mve.td
===================================================================
--- clang/include/clang/Basic/arm_mve.td
+++ clang/include/clang/Basic/arm_mve.td
@@ -373,3 +373,48 @@
          (store (and 1, (lshr (xval $pair, 1), 29)), $carry),
          (xval $pair, 0))>;
 }
+
+foreach desttype = T.All in {
+  // We want a vreinterpretq between every pair of supported vector types
+  // _except_ that there shouldn't be one from a type to itself.
+  //
+  // So this foldl expression implements what you'd write in Python as
+  // [srctype for srctype in T.All if srctype != desttype]
+  let params = !foldl([]<Type>, T.All, tlist, srctype, !listconcat(tlist,
+  !if(!eq(!cast<string>(desttype),!cast<string>(srctype)),[],[srctype]))) in {
+    def "vreinterpretq_" # desttype: Intrinsic<
+        VecOf<desttype>, (args Vector:$x), (bitcast $x, VecOf<desttype>)>;
+  }
+}
+
+let params = T.All in {
+  let pnt = PNT_None in {
+    def vcreateq: Intrinsic<Vector, (args u64:$a, u64:$b),
+        (bitcast (ielt_const (ielt_const (undef VecOf<u64>), $a, 0),
+                             $b, 1), Vector)>;
+    def vuninitializedq: Intrinsic<Vector, (args), (undef Vector)>;
+  }
+
+  // This is the polymorphic form of vuninitializedq, which takes no type
+  // suffix, but takes an _unevaluated_ vector parameter and returns an
+  // uninitialized vector of the same vector type.
+  //
+  // This intrinsic has no _non_-polymorphic form exposed to the user. But each
+  // separately typed version of it still has to have its own clang builtin id,
+  // which can't be called vuninitializedq_u32 or similar because that would
+  // collide with the explicit nullary versions above. So I'm calling them
+  // vuninitializedq_polymorphic_u32 (and so on) for builtin id purposes; that
+  // full name never appears in the header file due to the polymorphicOnly
+  // flag, and the _polymorphic suffix is omitted from the shortened name by
+  // the custom PolymorphicNameType here.
+  let polymorphicOnly = 1, nonEvaluating = 1,
+      pnt = PolymorphicNameType<1, "polymorphic"> in {
+    def vuninitializedq_polymorphic: Intrinsic<
+        Vector, (args Vector), (undef Vector)>;
+  }
+
+  def vgetq_lane: Intrinsic<Scalar, (args Vector:$v, imm_lane:$lane),
+                            (xelt_var $v, $lane)>;
+  def vsetq_lane: Intrinsic<Vector, (args Scalar:$e, Vector:$v, imm_lane:$lane),
+                            (ielt_var $v, (trunc $e, Scalar), $lane)>;
+}
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to