Author: Eric Christopher Date: 2019-12-09T16:47:38-08:00 New Revision: 9c6b7f68b807250e7c3aa01938339fdbd239c4ea
URL: https://github.com/llvm/llvm-project/commit/9c6b7f68b807250e7c3aa01938339fdbd239c4ea DIFF: https://github.com/llvm/llvm-project/commit/9c6b7f68b807250e7c3aa01938339fdbd239c4ea.diff LOG: Revert "[ARM][MVE] Add intrinsics for immediate shifts." and two follow-on commits: one warning fix and one functionality. As it's breaking at least the lto bot: http://lab.llvm.org:8011/builders/clang-with-lto-ubuntu/builds/15132/steps/test-stage1-compiler/logs/stdio This reverts commits: 8d70f3c933a5b81a87a5ab1af0e3e98ee2cd7c67 ff4dceef9201c5ae3924e92f6955977f243ac71d d97b3e3e65cd77a81b39732af84a1a4229e95091 Added: Modified: clang/include/clang/Basic/arm_mve.td clang/include/clang/Basic/arm_mve_defs.td clang/lib/CodeGen/CGBuiltin.cpp clang/utils/TableGen/MveEmitter.cpp llvm/include/llvm/IR/IntrinsicsARM.td llvm/lib/Target/ARM/ARMInstrMVE.td Removed: clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll ################################################################################ diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index 426d3b5a2f44..f3d3f4124101 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -609,33 +609,6 @@ defm vstrhq: scatter_offset_both<!listconcat(T.All16, T.Int32), u16, 1>; defm vstrwq: scatter_offset_both<T.All32, u32, 2>; defm vstrdq: scatter_offset_both<T.Int64, u64, 3>; -multiclass PredicatedImmediateVectorShift< - Immediate immtype, string predIntrName, list<dag> unsignedFlag = []> { - foreach predIntr = [IRInt<predIntrName, [Vector, Predicate]>] in { - def _m_n: Intrinsic<Vector, (args Vector:$inactive, Vector:$v, - immtype:$sh, Predicate:$pred), - !con((predIntr $v, $sh), !dag(predIntr, unsignedFlag, ?), - (predIntr $pred, $inactive))>; - def _x_n: Intrinsic<Vector, (args Vector:$v, immtype:$sh, - Predicate:$pred), - !con((predIntr $v, $sh), !dag(predIntr, unsignedFlag, ?), - (predIntr $pred, (undef Vector)))>; - } -} - -let params = T.Int in { - def vshlq_n: Intrinsic<Vector, (args Vector:$v, imm_0toNm1:$sh), - (shl $v, (splat (Scalar $sh)))>; - defm vshlq: PredicatedImmediateVectorShift<imm_0toNm1, "shl_imm_predicated">; - - let pnt = PNT_NType in { - def vshrq_n: Intrinsic<Vector, (args Vector:$v, imm_1toN:$sh), - (immshr $v, $sh, (unsignedflag Scalar))>; - defm vshrq: PredicatedImmediateVectorShift<imm_1toN, "shr_imm_predicated", - [(unsignedflag Scalar)]>; - } -} - // Base class for the scalar shift intrinsics. class ScalarShift<Type argtype, dag shiftCountArg, dag shiftCodeGen>: Intrinsic<argtype, !con((args argtype:$value), shiftCountArg), shiftCodeGen> { diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td index 6bc9b35f0fc4..1d72cc45796c 100644 --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -66,10 +66,6 @@ def xor: IRBuilder<"CreateXor">; def sub: IRBuilder<"CreateSub">; def shl: IRBuilder<"CreateShl">; def lshr: IRBuilder<"CreateLShr">; -def immshr: CGHelperFn<"MVEImmediateShr"> { - let special_params = [IRBuilderIntParam<1, "unsigned">, - IRBuilderIntParam<2, "bool">]; -} def fadd: IRBuilder<"CreateFAdd">; def fmul: IRBuilder<"CreateFMul">; def fsub: IRBuilder<"CreateFSub">; @@ -322,8 +318,8 @@ def imm_simd_vmvn : Immediate<u32, IB_UEltValue> { // // imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1 // inclusive. -def imm_1toN : Immediate<sint, IB_EltBit<1>>; -def imm_0toNm1 : Immediate<sint, IB_EltBit<0>>; +def imm_1toN : Immediate<u32, IB_EltBit<1>>; +def imm_0toNm1 : Immediate<u32, IB_EltBit<0>>; // imm_lane has to be the index of a vector lane in the main vector type, i.e // it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 890019ac51c2..7447a5841599 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6802,14 +6802,6 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, } } -template<typename Integer> -static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) { - llvm::APSInt IntVal; - assert(E->isIntegerConstantExpr(IntVal, Context) && - "Sema should have checked this was a constant"); - return IntVal.getExtValue(); -} - static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned) { // Helper function called by Tablegen-constructed ARM MVE builtin codegen, @@ -6817,27 +6809,6 @@ static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T); } -static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, - uint32_t Shift, bool Unsigned) { - // MVE helper function for integer shift right. This must handle signed vs - // unsigned, and also deal specially with the case where the shift count is - // equal to the lane size. In LLVM IR, an LShr with that parameter would be - // undefined behavior, but in MVE it's legal, so we must convert it to code - // that is not undefined in IR. - unsigned LaneBits = - V->getType()->getVectorElementType()->getPrimitiveSizeInBits(); - if (Shift == LaneBits) { - // An unsigned shift of the full lane size always generates zero, so we can - // simply emit a zero vector. A signed shift of the full lane size does the - // same thing as shifting by one bit fewer. - if (Unsigned) - return llvm::Constant::getNullValue(V->getType()); - else - --Shift; - } - return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift); -} - static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) { // MVE-specific helper function for a vector splat, which infers the element // count of the output vector by knowing that MVE vectors are all 128 bits diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c deleted file mode 100644 index 200273c03654..000000000000 --- a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c +++ /dev/null @@ -1,722 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s -// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s - -#include <arm_mve.h> - -// CHECK-LABEL: @test_vshlq_n_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5> -// CHECK-NEXT: ret <16 x i8> [[TMP0]] -// -int8x16_t test_vshlq_n_s8(int8x16_t a) -{ -#ifdef POLYMORPHIC - return vshlq_n(a, 5); -#else /* POLYMORPHIC */ - return vshlq_n_s8(a, 5); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_n_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5> -// CHECK-NEXT: ret <8 x i16> [[TMP0]] -// -int16x8_t test_vshlq_n_s16(int16x8_t a) -{ -#ifdef POLYMORPHIC - return vshlq_n(a, 5); -#else /* POLYMORPHIC */ - return vshlq_n_s16(a, 5); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_n_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], <i32 18, i32 18, i32 18, i32 18> -// CHECK-NEXT: ret <4 x i32> [[TMP0]] -// -int32x4_t test_vshlq_n_s32(int32x4_t a) -{ -#ifdef POLYMORPHIC - return vshlq_n(a, 18); -#else /* POLYMORPHIC */ - return vshlq_n_s32(a, 18); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_n_s8_trivial( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], zeroinitializer -// CHECK-NEXT: ret <16 x i8> [[TMP0]] -// -int8x16_t test_vshlq_n_s8_trivial(int8x16_t a) -{ -#ifdef POLYMORPHIC - return vshlq_n(a, 0); -#else /* POLYMORPHIC */ - return vshlq_n_s8(a, 0); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_n_s16_trivial( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], zeroinitializer -// CHECK-NEXT: ret <8 x i16> [[TMP0]] -// -int16x8_t test_vshlq_n_s16_trivial(int16x8_t a) -{ -#ifdef POLYMORPHIC - return vshlq_n(a, 0); -#else /* POLYMORPHIC */ - return vshlq_n_s16(a, 0); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_n_s32_trivial( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], zeroinitializer -// CHECK-NEXT: ret <4 x i32> [[TMP0]] -// -int32x4_t test_vshlq_n_s32_trivial(int32x4_t a) -{ -#ifdef POLYMORPHIC - return vshlq_n(a, 0); -#else /* POLYMORPHIC */ - return vshlq_n_s32(a, 0); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_n_u8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> -// CHECK-NEXT: ret <16 x i8> [[TMP0]] -// -uint8x16_t test_vshlq_n_u8(uint8x16_t a) -{ -#ifdef POLYMORPHIC - return vshlq_n(a, 3); -#else /* POLYMORPHIC */ - return vshlq_n_u8(a, 3); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_n_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11> -// CHECK-NEXT: ret <8 x i16> [[TMP0]] -// -uint16x8_t test_vshlq_n_u16(uint16x8_t a) -{ -#ifdef POLYMORPHIC - return vshlq_n(a, 11); -#else /* POLYMORPHIC */ - return vshlq_n_u16(a, 11); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_n_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], <i32 7, i32 7, i32 7, i32 7> -// CHECK-NEXT: ret <4 x i32> [[TMP0]] -// -uint32x4_t test_vshlq_n_u32(uint32x4_t a) -{ -#ifdef POLYMORPHIC - return vshlq_n(a, 7); -#else /* POLYMORPHIC */ - return vshlq_n_u32(a, 7); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_n_u8_trivial( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], zeroinitializer -// CHECK-NEXT: ret <16 x i8> [[TMP0]] -// -uint8x16_t test_vshlq_n_u8_trivial(uint8x16_t a) -{ -#ifdef POLYMORPHIC - return vshlq_n(a, 0); -#else /* POLYMORPHIC */ - return vshlq_n_u8(a, 0); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_n_u16_trivial( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], zeroinitializer -// CHECK-NEXT: ret <8 x i16> [[TMP0]] -// -uint16x8_t test_vshlq_n_u16_trivial(uint16x8_t a) -{ -#ifdef POLYMORPHIC - return vshlq_n(a, 0); -#else /* POLYMORPHIC */ - return vshlq_n_u16(a, 0); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_n_u32_trivial( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], zeroinitializer -// CHECK-NEXT: ret <4 x i32> [[TMP0]] -// -uint32x4_t test_vshlq_n_u32_trivial(uint32x4_t a) -{ -#ifdef POLYMORPHIC - return vshlq_n(a, 0); -#else /* POLYMORPHIC */ - return vshlq_n_u32(a, 0); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_n_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> -// CHECK-NEXT: ret <16 x i8> [[TMP0]] -// -int8x16_t test_vshrq_n_s8(int8x16_t a) -{ -#ifdef POLYMORPHIC - return vshrq(a, 4); -#else /* POLYMORPHIC */ - return vshrq_n_s8(a, 4); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_n_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> -// CHECK-NEXT: ret <8 x i16> [[TMP0]] -// -int16x8_t test_vshrq_n_s16(int16x8_t a) -{ -#ifdef POLYMORPHIC - return vshrq(a, 10); -#else /* POLYMORPHIC */ - return vshrq_n_s16(a, 10); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_n_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], <i32 19, i32 19, i32 19, i32 19> -// CHECK-NEXT: ret <4 x i32> [[TMP0]] -// -int32x4_t test_vshrq_n_s32(int32x4_t a) -{ -#ifdef POLYMORPHIC - return vshrq(a, 19); -#else /* POLYMORPHIC */ - return vshrq_n_s32(a, 19); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_n_s8_trivial( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7> -// CHECK-NEXT: ret <16 x i8> [[TMP0]] -// -int8x16_t test_vshrq_n_s8_trivial(int8x16_t a) -{ -#ifdef POLYMORPHIC - return vshrq(a, 8); -#else /* POLYMORPHIC */ - return vshrq_n_s8(a, 8); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_n_s16_trivial( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> -// CHECK-NEXT: ret <8 x i16> [[TMP0]] -// -int16x8_t test_vshrq_n_s16_trivial(int16x8_t a) -{ -#ifdef POLYMORPHIC - return vshrq(a, 16); -#else /* POLYMORPHIC */ - return vshrq_n_s16(a, 16); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_n_s32_trivial( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], <i32 31, i32 31, i32 31, i32 31> -// CHECK-NEXT: ret <4 x i32> [[TMP0]] -// -int32x4_t test_vshrq_n_s32_trivial(int32x4_t a) -{ -#ifdef POLYMORPHIC - return vshrq(a, 32); -#else /* POLYMORPHIC */ - return vshrq_n_s32(a, 32); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_n_u8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = lshr <16 x i8> [[A:%.*]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> -// CHECK-NEXT: ret <16 x i8> [[TMP0]] -// -uint8x16_t test_vshrq_n_u8(uint8x16_t a) -{ -#ifdef POLYMORPHIC - return vshrq(a, 1); -#else /* POLYMORPHIC */ - return vshrq_n_u8(a, 1); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_n_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i16> [[A:%.*]], <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> -// CHECK-NEXT: ret <8 x i16> [[TMP0]] -// -uint16x8_t test_vshrq_n_u16(uint16x8_t a) -{ -#ifdef POLYMORPHIC - return vshrq(a, 10); -#else /* POLYMORPHIC */ - return vshrq_n_u16(a, 10); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_n_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = lshr <4 x i32> [[A:%.*]], <i32 10, i32 10, i32 10, i32 10> -// CHECK-NEXT: ret <4 x i32> [[TMP0]] -// -uint32x4_t test_vshrq_n_u32(uint32x4_t a) -{ -#ifdef POLYMORPHIC - return vshrq(a, 10); -#else /* POLYMORPHIC */ - return vshrq_n_u32(a, 10); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_n_u8_trivial( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret <16 x i8> zeroinitializer -// -uint8x16_t test_vshrq_n_u8_trivial(uint8x16_t a) -{ -#ifdef POLYMORPHIC - return vshrq(a, 8); -#else /* POLYMORPHIC */ - return vshrq_n_u8(a, 8); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_n_u16_trivial( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret <8 x i16> zeroinitializer -// -uint16x8_t test_vshrq_n_u16_trivial(uint16x8_t a) -{ -#ifdef POLYMORPHIC - return vshrq(a, 16); -#else /* POLYMORPHIC */ - return vshrq_n_u16(a, 16); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_n_u32_trivial( -// CHECK-NEXT: entry: -// CHECK-NEXT: ret <4 x i32> zeroinitializer -// -uint32x4_t test_vshrq_n_u32_trivial(uint32x4_t a) -{ -#ifdef POLYMORPHIC - return vshrq(a, 32); -#else /* POLYMORPHIC */ - return vshrq_n_u32(a, 32); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_m_n_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 6, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] -// -int8x16_t test_vshlq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshlq_m_n(inactive, a, 6, p); -#else /* POLYMORPHIC */ - return vshlq_m_n_s8(inactive, a, 6, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_m_n_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 13, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] -// -int16x8_t test_vshlq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshlq_m_n(inactive, a, 13, p); -#else /* POLYMORPHIC */ - return vshlq_m_n_s16(inactive, a, 13, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_m_n_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] -// -int32x4_t test_vshlq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshlq_m_n(inactive, a, 0, p); -#else /* POLYMORPHIC */ - return vshlq_m_n_s32(inactive, a, 0, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_m_n_u8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 3, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] -// -uint8x16_t test_vshlq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshlq_m_n(inactive, a, 3, p); -#else /* POLYMORPHIC */ - return vshlq_m_n_u8(inactive, a, 3, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_m_n_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] -// -uint16x8_t test_vshlq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshlq_m_n(inactive, a, 1, p); -#else /* POLYMORPHIC */ - return vshlq_m_n_u16(inactive, a, 1, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_m_n_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 24, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] -// -uint32x4_t test_vshlq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshlq_m_n(inactive, a, 24, p); -#else /* POLYMORPHIC */ - return vshlq_m_n_u32(inactive, a, 24, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_m_n_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] -// -int8x16_t test_vshrq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshrq_m(inactive, a, 2, p); -#else /* POLYMORPHIC */ - return vshrq_m_n_s8(inactive, a, 2, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_m_n_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 3, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] -// -int16x8_t test_vshrq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshrq_m(inactive, a, 3, p); -#else /* POLYMORPHIC */ - return vshrq_m_n_s16(inactive, a, 3, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_m_n_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 13, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] -// -int32x4_t test_vshrq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshrq_m(inactive, a, 13, p); -#else /* POLYMORPHIC */ - return vshrq_m_n_s32(inactive, a, 13, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_m_n_u8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] -// -uint8x16_t test_vshrq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshrq_m(inactive, a, 4, p); -#else /* POLYMORPHIC */ - return vshrq_m_n_u8(inactive, a, 4, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_m_n_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 14, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] -// -uint16x8_t test_vshrq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshrq_m(inactive, a, 14, p); -#else /* POLYMORPHIC */ - return vshrq_m_n_u16(inactive, a, 14, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_m_n_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 21, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] -// -uint32x4_t test_vshrq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshrq_m(inactive, a, 21, p); -#else /* POLYMORPHIC */ - return vshrq_m_n_u32(inactive, a, 21, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_x_n_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] -// -int8x16_t test_vshlq_x_n_s8(int8x16_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshlq_x_n(a, 1, p); -#else /* POLYMORPHIC */ - return vshlq_x_n_s8(a, 1, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_x_n_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 15, <8 x i1> [[TMP1]], <8 x i16> undef) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] -// -int16x8_t test_vshlq_x_n_s16(int16x8_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshlq_x_n(a, 15, p); -#else /* POLYMORPHIC */ - return vshlq_x_n_s16(a, 15, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_x_n_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 13, <4 x i1> [[TMP1]], <4 x i32> undef) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] -// -int32x4_t test_vshlq_x_n_s32(int32x4_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshlq_x_n(a, 13, p); -#else /* POLYMORPHIC */ - return vshlq_x_n_s32(a, 13, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_x_n_u8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, <16 x i1> [[TMP1]], <16 x i8> undef) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] -// -uint8x16_t test_vshlq_x_n_u8(uint8x16_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshlq_x_n(a, 4, p); -#else /* POLYMORPHIC */ - return vshlq_x_n_u8(a, 4, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_x_n_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, <8 x i1> [[TMP1]], <8 x i16> undef) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] -// -uint16x8_t test_vshlq_x_n_u16(uint16x8_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshlq_x_n(a, 10, p); -#else /* POLYMORPHIC */ - return vshlq_x_n_u16(a, 10, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshlq_x_n_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 30, <4 x i1> [[TMP1]], <4 x i32> undef) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] -// -uint32x4_t test_vshlq_x_n_u32(uint32x4_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshlq_x_n(a, 30, p); -#else /* POLYMORPHIC */ - return vshlq_x_n_u32(a, 30, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_x_n_s8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 0, <16 x i1> [[TMP1]], <16 x i8> undef) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] -// -int8x16_t test_vshrq_x_n_s8(int8x16_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshrq_x(a, 4, p); -#else /* POLYMORPHIC */ - return vshrq_x_n_s8(a, 4, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_x_n_s16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] -// -int16x8_t test_vshrq_x_n_s16(int16x8_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshrq_x(a, 10, p); -#else /* POLYMORPHIC */ - return vshrq_x_n_s16(a, 10, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_x_n_s32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 7, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] -// -int32x4_t test_vshrq_x_n_s32(int32x4_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshrq_x(a, 7, p); -#else /* POLYMORPHIC */ - return vshrq_x_n_s32(a, 7, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_x_n_u8( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 7, i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) -// CHECK-NEXT: ret <16 x i8> [[TMP2]] -// -uint8x16_t test_vshrq_x_n_u8(uint8x16_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshrq_x(a, 7, p); -#else /* POLYMORPHIC */ - return vshrq_x_n_u8(a, 7, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_x_n_u16( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 7, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) -// CHECK-NEXT: ret <8 x i16> [[TMP2]] -// -uint16x8_t test_vshrq_x_n_u16(uint16x8_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshrq_x(a, 7, p); -#else /* POLYMORPHIC */ - return vshrq_x_n_u16(a, 7, p); -#endif /* POLYMORPHIC */ -} - -// CHECK-LABEL: @test_vshrq_x_n_u32( -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 -// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) -// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 6, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) -// CHECK-NEXT: ret <4 x i32> [[TMP2]] -// -uint32x4_t test_vshrq_x_n_u32(uint32x4_t a, mve_pred16_t p) -{ -#ifdef POLYMORPHIC - return vshrq_x(a, 6, p); -#else /* POLYMORPHIC */ - return vshrq_x_n_u32(a, 6, p); -#endif /* POLYMORPHIC */ -} diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp index 35a5e52bf4a1..81c08a2baa71 100644 --- a/clang/utils/TableGen/MveEmitter.cpp +++ b/clang/utils/TableGen/MveEmitter.cpp @@ -470,10 +470,6 @@ class Result { virtual void genCode(raw_ostream &OS, CodeGenParamAllocator &) const = 0; virtual bool hasIntegerConstantValue() const { return false; } virtual uint32_t integerConstantValue() const { return 0; } - virtual bool hasIntegerValue() const { return false; } - virtual std::string getIntegerValue(const std::string &) { - llvm_unreachable("non-working Result::getIntegerValue called"); - } virtual std::string typeName() const { return "Value *"; } // Mostly, when a code-generation operation has a dependency on prior @@ -548,9 +544,8 @@ class BuiltinArgResult : public Result { public: unsigned ArgNum; bool AddressType; - bool Immediate; - BuiltinArgResult(unsigned ArgNum, bool AddressType, bool Immediate) - : ArgNum(ArgNum), AddressType(AddressType), Immediate(Immediate) {} + BuiltinArgResult(unsigned ArgNum, bool AddressType) + : ArgNum(ArgNum), AddressType(AddressType) {} void genCode(raw_ostream &OS, CodeGenParamAllocator &) const override { OS << (AddressType ? "EmitPointerWithAlignment" : "EmitScalarExpr") << "(E->getArg(" << ArgNum << "))"; @@ -564,11 +559,6 @@ class BuiltinArgResult : public Result { return "(" + varname() + ".getPointer())"; return Result::asValue(); } - bool hasIntegerValue() const override { return Immediate; } - std::string getIntegerValue(const std::string &IntType) override { - return "GetIntegerConstantValue<" + IntType + ">(E->getArg(" + - utostr(ArgNum) + "), getContext())"; - } }; // Result subclass for an integer literal appearing in Tablegen. This may need @@ -643,34 +633,27 @@ class IRBuilderResult : public Result { StringRef CallPrefix; std::vector<Ptr> Args; std::set<unsigned> AddressArgs; - std::map<unsigned, std::string> IntegerArgs; + std::map<unsigned, std::string> IntConstantArgs; IRBuilderResult(StringRef CallPrefix, std::vector<Ptr> Args, std::set<unsigned> AddressArgs, - std::map<unsigned, std::string> IntegerArgs) - : CallPrefix(CallPrefix), Args(Args), AddressArgs(AddressArgs), - IntegerArgs(IntegerArgs) {} + std::map<unsigned, std::string> IntConstantArgs) + : CallPrefix(CallPrefix), Args(Args), AddressArgs(AddressArgs), + IntConstantArgs(IntConstantArgs) {} void genCode(raw_ostream &OS, CodeGenParamAllocator &ParamAlloc) const override { OS << CallPrefix; const char *Sep = ""; for (unsigned i = 0, e = Args.size(); i < e; ++i) { Ptr Arg = Args[i]; - auto it = IntegerArgs.find(i); - - OS << Sep; - Sep = ", "; - - if (it != IntegerArgs.end()) { - if (Arg->hasIntegerConstantValue()) - OS << "static_cast<" << it->second << ">(" - << ParamAlloc.allocParam(it->second, - utostr(Arg->integerConstantValue())) - << ")"; - else if (Arg->hasIntegerValue()) - OS << ParamAlloc.allocParam(it->second, - Arg->getIntegerValue(it->second)); + auto it = IntConstantArgs.find(i); + if (it != IntConstantArgs.end()) { + assert(Arg->hasIntegerConstantValue()); + OS << Sep << "static_cast<" << it->second << ">(" + << ParamAlloc.allocParam("unsigned", + utostr(Arg->integerConstantValue())) + << ")"; } else { - OS << Arg->varname(); + OS << Sep << Arg->varname(); } Sep = ", "; } @@ -679,8 +662,7 @@ class IRBuilderResult : public Result { void morePrerequisites(std::vector<Ptr> &output) const override { for (unsigned i = 0, e = Args.size(); i < e; ++i) { Ptr Arg = Args[i]; - if (IntegerArgs.find(i) != IntegerArgs.end() && - Arg->hasIntegerConstantValue()) + if (IntConstantArgs.find(i) != IntConstantArgs.end()) continue; output.push_back(Arg); } @@ -999,8 +981,8 @@ class MveEmitter { const Type *Param); Result::Ptr getCodeForDagArg(DagInit *D, unsigned ArgNum, const Result::Scope &Scope, const Type *Param); - Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType, bool Promote, - bool Immediate); + Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType, + bool Promote); // Constructor and top-level functions. @@ -1173,17 +1155,17 @@ Result::Ptr MveEmitter::getCodeForDag(DagInit *D, const Result::Scope &Scope, Args.push_back(getCodeForDagArg(D, i, Scope, Param)); if (Op->isSubClassOf("IRBuilderBase")) { std::set<unsigned> AddressArgs; - std::map<unsigned, std::string> IntegerArgs; + std::map<unsigned, std::string> IntConstantArgs; for (Record *sp : Op->getValueAsListOfDefs("special_params")) { unsigned Index = sp->getValueAsInt("index"); if (sp->isSubClassOf("IRBuilderAddrParam")) { AddressArgs.insert(Index); } else if (sp->isSubClassOf("IRBuilderIntParam")) { - IntegerArgs[Index] = sp->getValueAsString("type"); + IntConstantArgs[Index] = sp->getValueAsString("type"); } } - return std::make_shared<IRBuilderResult>(Op->getValueAsString("prefix"), - Args, AddressArgs, IntegerArgs); + return std::make_shared<IRBuilderResult>( + Op->getValueAsString("prefix"), Args, AddressArgs, IntConstantArgs); } else if (Op->isSubClassOf("IRIntBase")) { std::vector<const Type *> ParamTypes; for (Record *RParam : Op->getValueAsListOfDefs("params")) @@ -1233,9 +1215,9 @@ Result::Ptr MveEmitter::getCodeForDagArg(DagInit *D, unsigned ArgNum, } Result::Ptr MveEmitter::getCodeForArg(unsigned ArgNum, const Type *ArgType, - bool Promote, bool Immediate) { - Result::Ptr V = std::make_shared<BuiltinArgResult>( - ArgNum, isa<PointerType>(ArgType), Immediate); + bool Promote) { + Result::Ptr V = + std::make_shared<BuiltinArgResult>(ArgNum, isa<PointerType>(ArgType)); if (Promote) { if (const auto *ST = dyn_cast<ScalarType>(ArgType)) { @@ -1309,14 +1291,17 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param) const Type *ArgType = ME.getType(TypeInit, Param); ArgTypes.push_back(ArgType); + // The argument will usually have a name in the arguments dag, which goes + // into the variable-name scope that the code gen will refer to. + StringRef ArgName = ArgsDag->getArgNameStr(i); + if (!ArgName.empty()) + Scope[ArgName] = ME.getCodeForArg(i, ArgType, Promote); + // If the argument is a subclass of Immediate, record the details about // what values it can take, for Sema checking. - bool Immediate = false; if (auto TypeDI = dyn_cast<DefInit>(TypeInit)) { Record *TypeRec = TypeDI->getDef(); if (TypeRec->isSubClassOf("Immediate")) { - Immediate = true; - Record *Bounds = TypeRec->getValueAsDef("bounds"); ImmediateArg &IA = ImmediateArgs[i]; if (Bounds->isSubClassOf("IB_ConstRange")) { @@ -1330,7 +1315,7 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param) IA.boundsType = ImmediateArg::BoundsType::ExplicitRange; IA.i1 = 0; IA.i2 = 128 / Param->sizeInBits() - 1; - } else if (Bounds->isSubClassOf("IB_EltBit")) { + } else if (Bounds->getName() == "IB_EltBit") { IA.boundsType = ImmediateArg::BoundsType::ExplicitRange; IA.i1 = Bounds->getValueAsInt("base"); IA.i2 = IA.i1 + Param->sizeInBits() - 1; @@ -1347,12 +1332,6 @@ ACLEIntrinsic::ACLEIntrinsic(MveEmitter &ME, Record *R, const Type *Param) } } } - - // The argument will usually have a name in the arguments dag, which goes - // into the variable-name scope that the code gen will refer to. - StringRef ArgName = ArgsDag->getArgNameStr(i); - if (!ArgName.empty()) - Scope[ArgName] = ME.getCodeForArg(i, ArgType, Promote, Immediate); } // Finally, go through the codegen dag and translate it into a Result object diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index d20540480a82..6e63022d4cf5 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -913,14 +913,6 @@ defm int_arm_mve_vstr_scatter_offset: MVEPredicated< [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty, llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem]>; -def int_arm_mve_shl_imm_predicated: Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], - [IntrNoMem]>; -def int_arm_mve_shr_imm_predicated: Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, // extra i32 is unsigned flag - llvm_anyvector_ty, LLVMMatchType<0>], - [IntrNoMem]>; - // MVE scalar shifts. class ARM_MVE_qrshift_single<list<LLVMType> value, list<LLVMType> saturate = []> : diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 2f56d183e11a..d351ae8905b6 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2816,39 +2816,27 @@ def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> { let Inst{21} = 0b1; } -multiclass MVE_immediate_shift_patterns_inner< - MVEVectorVTInfo VTI, Operand imm_operand_type, SDNode unpred_op, - Intrinsic pred_int, Instruction inst, list<int> unsignedFlag = []> { - - def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src), imm_operand_type:$imm)), - (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm))>; - - def : Pat<(VTI.Vec !con((pred_int (VTI.Vec MQPR:$src), imm_operand_type:$imm), - !dag(pred_int, unsignedFlag, ?), - (pred_int (VTI.Pred VCCR:$mask), - (VTI.Vec MQPR:$inactive)))), - (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm, - ARMVCCThen, (VTI.Pred VCCR:$mask), - (VTI.Vec MQPR:$inactive)))>; -} - -multiclass MVE_immediate_shift_patterns<MVEVectorVTInfo VTI, - Operand imm_operand_type> { - defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type, - ARMvshlImm, int_arm_mve_shl_imm_predicated, - !cast<Instruction>("MVE_VSHL_immi" # VTI.BitsSuffix)>; - defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type, - ARMvshruImm, int_arm_mve_shr_imm_predicated, - !cast<Instruction>("MVE_VSHR_immu" # VTI.BitsSuffix), [1]>; - defm : MVE_immediate_shift_patterns_inner<VTI, imm_operand_type, - ARMvshrsImm, int_arm_mve_shr_imm_predicated, - !cast<Instruction>("MVE_VSHR_imms" # VTI.BitsSuffix), [0]>; -} - let Predicates = [HasMVEInt] in { - defm : MVE_immediate_shift_patterns<MVE_v16i8, imm0_7>; - defm : MVE_immediate_shift_patterns<MVE_v8i16, imm0_15>; - defm : MVE_immediate_shift_patterns<MVE_v4i32, imm0_31>; + def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)), + (v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>; + def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)), + (v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>; + def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)), + (v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>; + + def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)), + (v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>; + def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)), + (v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>; + def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)), + (v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>; + + def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)), + (v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>; + def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)), + (v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>; + def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)), + (v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>; } // end of mve_shift instructions diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll deleted file mode 100644 index 86228ef94b38..000000000000 --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll +++ /dev/null @@ -1,398 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s - -define arm_aapcs_vfpcc <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) { -; CHECK-LABEL: test_vshlq_n_s8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshl.i8 q0, q0, #5 -; CHECK-NEXT: bx lr -entry: - %0 = shl <16 x i8> %a, <i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5, i8 5> - ret <16 x i8> %0 -} - -define arm_aapcs_vfpcc <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) { -; CHECK-LABEL: test_vshlq_n_s16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshl.i16 q0, q0, #5 -; CHECK-NEXT: bx lr -entry: - %0 = shl <8 x i16> %a, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5> - ret <8 x i16> %0 -} - -define arm_aapcs_vfpcc <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) { -; CHECK-LABEL: test_vshlq_n_s32: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshl.i32 q0, q0, #18 -; CHECK-NEXT: bx lr -entry: - %0 = shl <4 x i32> %a, <i32 18, i32 18, i32 18, i32 18> - ret <4 x i32> %0 -} - -define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) { -; CHECK-LABEL: test_vshrq_n_s8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshr.s8 q0, q0, #4 -; CHECK-NEXT: bx lr -entry: - %0 = ashr <16 x i8> %a, <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4> - ret <16 x i8> %0 -} - -define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) { -; CHECK-LABEL: test_vshrq_n_s16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshr.s16 q0, q0, #10 -; CHECK-NEXT: bx lr -entry: - %0 = ashr <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> - ret <8 x i16> %0 -} - -define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) { -; CHECK-LABEL: test_vshrq_n_s32: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshr.s32 q0, q0, #19 -; CHECK-NEXT: bx lr -entry: - %0 = ashr <4 x i32> %a, <i32 19, i32 19, i32 19, i32 19> - ret <4 x i32> %0 -} - -define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) { -; CHECK-LABEL: test_vshrq_n_u8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshr.u8 q0, q0, #1 -; CHECK-NEXT: bx lr -entry: - %0 = lshr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> - ret <16 x i8> %0 -} - -define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) { -; CHECK-LABEL: test_vshrq_n_u16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshr.u16 q0, q0, #10 -; CHECK-NEXT: bx lr -entry: - %0 = lshr <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> - ret <8 x i16> %0 -} - -define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) { -; CHECK-LABEL: test_vshrq_n_u32: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshr.u32 q0, q0, #10 -; CHECK-NEXT: bx lr -entry: - %0 = lshr <4 x i32> %a, <i32 10, i32 10, i32 10, i32 10> - ret <4 x i32> %0 -} - -define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshlq_m_n_s8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshlt.i8 q0, q1, #6 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) - %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 6, <16 x i1> %1, <16 x i8> %inactive) - ret <16 x i8> %2 -} - -define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshlq_m_n_s16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshlt.i16 q0, q1, #13 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) - %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, <8 x i1> %1, <8 x i16> %inactive) - ret <8 x i16> %2 -} - -define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshlq_m_n_s32: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshlt.i32 q0, q1, #0 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) - %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1, <4 x i32> %inactive) - ret <4 x i32> %2 -} - -define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshrq_m_n_s8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshrt.s8 q0, q1, #2 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) - %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, <16 x i1> %1, <16 x i8> %inactive) - ret <16 x i8> %2 -} - -define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshrq_m_n_s16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshrt.s16 q0, q1, #3 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) - %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 3, i32 0, <8 x i1> %1, <8 x i16> %inactive) - ret <8 x i16> %2 -} - -define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshrq_m_n_s32: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshrt.s32 q0, q1, #13 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) - %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, i32 0, <4 x i1> %1, <4 x i32> %inactive) - ret <4 x i32> %2 -} - -define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshrq_m_n_u8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshrt.u8 q0, q1, #4 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) - %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 1, <16 x i1> %1, <16 x i8> %inactive) - ret <16 x i8> %2 -} - -define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshrq_m_n_u16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshrt.u16 q0, q1, #14 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) - %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 14, i32 1, <8 x i1> %1, <8 x i16> %inactive) - ret <8 x i16> %2 -} - -define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshrq_m_n_u32: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshrt.u32 q0, q1, #21 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) - %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 21, i32 1, <4 x i1> %1, <4 x i32> %inactive) - ret <4 x i32> %2 -} - -define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_s8(<16 x i8> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshlq_x_n_s8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshlt.i8 q0, q0, #1 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) - %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, <16 x i1> %1, <16 x i8> undef) - ret <16 x i8> %2 -} - -define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_s16(<8 x i16> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshlq_x_n_s16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshlt.i16 q0, q0, #15 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) - %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 15, <8 x i1> %1, <8 x i16> undef) - ret <8 x i16> %2 -} - -define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_s32(<4 x i32> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshlq_x_n_s32: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshlt.i32 q0, q0, #13 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) - %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, <4 x i1> %1, <4 x i32> undef) - ret <4 x i32> %2 -} - -define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_u8(<16 x i8> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshlq_x_n_u8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshlt.i8 q0, q0, #4 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) - %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, <16 x i1> %1, <16 x i8> undef) - ret <16 x i8> %2 -} - -define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_u16(<8 x i16> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshlq_x_n_u16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshlt.i16 q0, q0, #10 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) - %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, <8 x i1> %1, <8 x i16> undef) - ret <8 x i16> %2 -} - -define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_u32(<4 x i32> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshlq_x_n_u32: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshlt.i32 q0, q0, #30 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) - %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 30, <4 x i1> %1, <4 x i32> undef) - ret <4 x i32> %2 -} - -define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_s8(<16 x i8> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshrq_x_n_s8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshrt.s8 q0, q0, #4 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) - %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 0, <16 x i1> %1, <16 x i8> undef) - ret <16 x i8> %2 -} - -define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_s16(<8 x i16> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshrq_x_n_s16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshrt.s16 q0, q0, #10 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) - %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, i32 0, <8 x i1> %1, <8 x i16> undef) - ret <8 x i16> %2 -} - -define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_s32(<4 x i32> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshrq_x_n_s32: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshrt.s32 q0, q0, #7 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) - %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 7, i32 0, <4 x i1> %1, <4 x i32> undef) - ret <4 x i32> %2 -} - -define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_u8(<16 x i8> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshrq_x_n_u8: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshrt.u8 q0, q0, #7 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) - %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 7, i32 1, <16 x i1> %1, <16 x i8> undef) - ret <16 x i8> %2 -} - -define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_u16(<8 x i16> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshrq_x_n_u16: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshrt.u16 q0, q0, #7 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) - %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 7, i32 1, <8 x i1> %1, <8 x i16> undef) - ret <8 x i16> %2 -} - -define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_u32(<4 x i32> %a, i16 zeroext %p) { -; CHECK-LABEL: test_vshrq_x_n_u32: -; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vshrt.u32 q0, q0, #6 -; CHECK-NEXT: bx lr -entry: - %0 = zext i16 %p to i32 - %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) - %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 6, i32 1, <4 x i1> %1, <4 x i32> undef) - ret <4 x i32> %2 -} - -declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) -declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) -declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) - -declare <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>) -declare <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>) -declare <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>) - -declare <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>) -declare <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>) -declare <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits