Author: Benjamin Kramer Date: 2020-04-15T15:52:49+02:00 New Revision: 316b49d37336258e11f16b5822d078c6407cd34f
URL: https://github.com/llvm/llvm-project/commit/316b49d37336258e11f16b5822d078c6407cd34f DIFF: https://github.com/llvm/llvm-project/commit/316b49d37336258e11f16b5822d078c6407cd34f.diff LOG: Pass shufflevector indices as int instead of unsigned. No functionality change intended. Added: Modified: clang/lib/CodeGen/CGBuiltin.cpp llvm/lib/IR/AutoUpgrade.cpp llvm/lib/Target/X86/X86InterleavedAccess.cpp Removed: ################################################################################ diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 87f52102fff4..f4832ef4afb2 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -11747,7 +11747,7 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, // Splat the 8-bits of immediate 4 times to help the loop wrap around. Imm = (Imm & 0xff) * 0x01010101; - uint32_t Indices[16]; + int Indices[16]; for (unsigned l = 0; l != NumElts; l += NumLaneElts) { for (unsigned i = 0; i != NumLaneElts; ++i) { unsigned Index = Imm % NumLaneElts; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index e2997df3cc51..d61b79725d5e 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -912,7 +912,7 @@ static Value *UpgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, // we'll just return the zero vector. if (Shift < 16) { - uint32_t Idxs[64]; + int Idxs[64]; // 256/512-bit version is split into 2/4 16-byte lanes. for (unsigned l = 0; l != NumElts; l += 16) for (unsigned i = 0; i != 16; ++i) { @@ -946,7 +946,7 @@ static Value *UpgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, // If shift is less than 16, emit a shuffle to move the bytes. Otherwise, // we'll just return the zero vector. if (Shift < 16) { - uint32_t Idxs[64]; + int Idxs[64]; // 256/512-bit version is split into 2/4 16-byte lanes. for (unsigned l = 0; l != NumElts; l += 16) for (unsigned i = 0; i != 16; ++i) { @@ -972,7 +972,7 @@ static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask, // If we have less than 8 elements, then the starting mask was an i8 and // we need to extract down to the right number of elements. if (NumElts < 8) { - uint32_t Indices[4]; + int Indices[4]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i; Mask = Builder.CreateShuffleVector(Mask, Mask, @@ -1041,7 +1041,7 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Op0 = llvm::Constant::getNullValue(Op0->getType()); } - uint32_t Indices[64]; + int Indices[64]; // 256-bit palignr operates on 128-bit lanes so we need to handle that for (unsigned l = 0; l < NumElts; l += 16) { for (unsigned i = 0; i != 16; ++i) { @@ -1352,7 +1352,7 @@ static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, } if (NumElts < 8) { - uint32_t Indices[8]; + int Indices[8]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i; for (unsigned i = NumElts; i != 8; ++i) @@ -1878,7 +1878,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { unsigned NumElts = CI->getType()->getScalarSizeInBits(); Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts); Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts); - uint32_t Indices[64]; + int Indices[64]; for (unsigned i = 0; i != NumElts; ++i) Indices[i] = i; @@ -2127,8 +2127,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { unsigned NumDstElts = DstTy->getNumElements(); if (NumDstElts < SrcTy->getNumElements()) { assert(NumDstElts == 2 && "Unexpected vector size"); - uint32_t ShuffleMask[2] = { 0, 1 }; - Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask); + Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1}); } bool IsPS2PD = SrcTy->getElementType()->isFloatTy(); @@ -2159,8 +2158,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { unsigned NumDstElts = DstTy->getNumElements(); if (NumDstElts != SrcTy->getNumElements()) { assert(NumDstElts == 4 && "Unexpected vector size"); - uint32_t ShuffleMask[4] = {0, 1, 2, 3}; - Rep = Builder.CreateShuffleVector(Rep, Rep, ShuffleMask); + Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3}); } Rep = Builder.CreateBitCast( Rep, VectorType::get(Type::getHalfTy(C), NumDstElts)); @@ -2310,7 +2308,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { unsigned NumDstElts = DstTy->getNumElements(); // Extract a subvector of the first NumDstElts lanes and sign/zero extend. - SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); + SmallVector<int, 8> ShuffleMask(NumDstElts); for (unsigned i = 0; i != NumDstElts; ++i) ShuffleMask[i] = i; @@ -2356,7 +2354,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits(); unsigned ControlBitsMask = NumLanes - 1; unsigned NumControlBits = NumLanes / 2; - SmallVector<uint32_t, 8> ShuffleMask(0); + SmallVector<int, 8> ShuffleMask(0); for (unsigned l = 0; l != NumLanes; ++l) { unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask; @@ -2376,7 +2374,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { cast<VectorType>(CI->getArgOperand(0)->getType())->getNumElements(); unsigned NumDstElts = cast<VectorType>(CI->getType())->getNumElements(); - SmallVector<uint32_t, 8> ShuffleMask(NumDstElts); + SmallVector<int, 8> ShuffleMask(NumDstElts); for (unsigned i = 0; i != NumDstElts; ++i) ShuffleMask[i] = i % NumSrcElts; @@ -2466,7 +2464,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { VectorType *VecTy = cast<VectorType>(CI->getType()); unsigned NumElts = VecTy->getNumElements(); - SmallVector<uint32_t, 16> Idxs(NumElts); + SmallVector<int, 16> Idxs(NumElts); for (unsigned i = 0; i != NumElts; ++i) Idxs[i] = ((Imm >> (i%8)) & 1) ? i + NumElts : i; @@ -2486,7 +2484,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Extend the second operand into a vector the size of the destination. Value *UndefV = UndefValue::get(Op1->getType()); - SmallVector<uint32_t, 8> Idxs(DstNumElts); + SmallVector<int, 8> Idxs(DstNumElts); for (unsigned i = 0; i != SrcNumElts; ++i) Idxs[i] = i; for (unsigned i = SrcNumElts; i != DstNumElts; ++i) @@ -2529,7 +2527,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Imm = Imm % Scale; // Get indexes for the subvector of the input vector. - SmallVector<uint32_t, 8> Idxs(DstNumElts); + SmallVector<int, 8> Idxs(DstNumElts); for (unsigned i = 0; i != DstNumElts; ++i) { Idxs[i] = i + (Imm * DstNumElts); } @@ -2548,7 +2546,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { VectorType *VecTy = cast<VectorType>(CI->getType()); unsigned NumElts = VecTy->getNumElements(); - SmallVector<uint32_t, 8> Idxs(NumElts); + SmallVector<int, 8> Idxs(NumElts); for (unsigned i = 0; i != NumElts; ++i) Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3); @@ -2571,7 +2569,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements(); unsigned HalfSize = NumElts / 2; - SmallVector<uint32_t, 8> ShuffleMask(NumElts); + SmallVector<int, 8> ShuffleMask(NumElts); // Determine which operand(s) are actually in use for this instruction. Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0); @@ -2605,7 +2603,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { unsigned IdxSize = 64 / VecTy->getScalarSizeInBits(); unsigned IdxMask = ((1 << IdxSize) - 1); - SmallVector<uint32_t, 8> Idxs(NumElts); + SmallVector<int, 8> Idxs(NumElts); // Lookup the bits for this element, wrapping around the immediate every // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need // to offset by the first index of each group. @@ -2623,7 +2621,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements(); - SmallVector<uint32_t, 16> Idxs(NumElts); + SmallVector<int, 16> Idxs(NumElts); for (unsigned l = 0; l != NumElts; l += 8) { for (unsigned i = 0; i != 4; ++i) Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l; @@ -2642,7 +2640,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue(); unsigned NumElts = cast<VectorType>(CI->getType())->getNumElements(); - SmallVector<uint32_t, 16> Idxs(NumElts); + SmallVector<int, 16> Idxs(NumElts); for (unsigned l = 0; l != NumElts; l += 8) { for (unsigned i = 0; i != 4; ++i) Idxs[i + l] = i + l; @@ -2664,7 +2662,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); unsigned HalfLaneElts = NumLaneElts / 2; - SmallVector<uint32_t, 16> Idxs(NumElts); + SmallVector<int, 16> Idxs(NumElts); for (unsigned i = 0; i != NumElts; ++i) { // Base index is the starting element of the lane. Idxs[i] = i - (i % NumLaneElts); @@ -2691,7 +2689,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { if (Name.startswith("avx512.mask.movshdup.")) Offset = 1; - SmallVector<uint32_t, 16> Idxs(NumElts); + SmallVector<int, 16> Idxs(NumElts); for (unsigned l = 0; l != NumElts; l += NumLaneElts) for (unsigned i = 0; i != NumLaneElts; i += 2) { Idxs[i + l + 0] = i + l + Offset; @@ -2709,7 +2707,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { int NumElts = cast<VectorType>(CI->getType())->getNumElements(); int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); - SmallVector<uint32_t, 64> Idxs(NumElts); + SmallVector<int, 64> Idxs(NumElts); for (int l = 0; l != NumElts; l += NumLaneElts) for (int i = 0; i != NumLaneElts; ++i) Idxs[i + l] = l + (i / 2) + NumElts * (i % 2); @@ -2725,7 +2723,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { int NumElts = cast<VectorType>(CI->getType())->getNumElements(); int NumLaneElts = 128/CI->getType()->getScalarSizeInBits(); - SmallVector<uint32_t, 64> Idxs(NumElts); + SmallVector<int, 64> Idxs(NumElts); for (int l = 0; l != NumElts; l += NumLaneElts) for (int i = 0; i != NumLaneElts; ++i) Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2); @@ -3304,7 +3302,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { if (IsSubAdd) std::swap(Even, Odd); - SmallVector<uint32_t, 32> Idxs(NumElts); + SmallVector<int, 32> Idxs(NumElts); for (int i = 0; i != NumElts; ++i) Idxs[i] = i + (i % 2) * NumElts; diff --git a/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/llvm/lib/Target/X86/X86InterleavedAccess.cpp index 8463315756b2..81879b590b16 100644 --- a/llvm/lib/Target/X86/X86InterleavedAccess.cpp +++ b/llvm/lib/Target/X86/X86InterleavedAccess.cpp @@ -229,11 +229,11 @@ static MVT scaleVectorType(MVT VT) { VT.getVectorNumElements() / 2); } -static uint32_t Concat[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 }; +static constexpr int Concat[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63}; // genShuffleBland - Creates shuffle according to two vectors.This function is // only works on instructions with lane inside 256 registers. According to @@ -251,9 +251,9 @@ static uint32_t Concat[] = { // By computing the shuffle on a sequence of 16 elements(one lane) and add the // correct offset. We are creating a vpsuffed + blend sequence between two // shuffles. -static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask, - SmallVectorImpl<uint32_t> &Out, int LowOffset, - int HighOffset) { +static void genShuffleBland(MVT VT, ArrayRef<int> Mask, + SmallVectorImpl<int> &Out, int LowOffset, + int HighOffset) { assert(VT.getSizeInBits() >= 256 && "This function doesn't accept width smaller then 256"); unsigned NumOfElm = VT.getVectorNumElements(); @@ -282,9 +282,9 @@ static void genShuffleBland(MVT VT, ArrayRef<uint32_t> Mask, // Invec[2] - |2|5|8|11| TransposedMatrix[2] - |8|9|10|11| static void reorderSubVector(MVT VT, SmallVectorImpl<Value *> &TransposedMatrix, - ArrayRef<Value *> Vec, ArrayRef<uint32_t> VPShuf, - unsigned VecElems, unsigned Stride, - IRBuilder<> &Builder) { + ArrayRef<Value *> Vec, ArrayRef<int> VPShuf, + unsigned VecElems, unsigned Stride, + IRBuilder<> &Builder) { if (VecElems == 16) { for (unsigned i = 0; i < Stride; i++) @@ -293,7 +293,7 @@ static void reorderSubVector(MVT VT, SmallVectorImpl<Value *> &TransposedMatrix, return; } - SmallVector<uint32_t, 32> OptimizeShuf; + SmallVector<int, 32> OptimizeShuf; Value *Temp[8]; for (unsigned i = 0; i < (VecElems / 16) * Stride; i += 2) { @@ -433,7 +433,7 @@ void X86InterleavedAccessGroup::interleave8bitStride4( // For example shuffle pattern for VF 16 register size 256 -> lanes = 2 // {<[0|3|6|1|4|7|2|5]-[8|11|14|9|12|15|10|13]>} static void createShuffleStride(MVT VT, int Stride, - SmallVectorImpl<uint32_t> &Mask) { + SmallVectorImpl<int> &Mask) { int VectorSize = VT.getSizeInBits(); int VF = VT.getVectorNumElements(); int LaneCount = std::max(VectorSize / 128, 1); @@ -446,7 +446,7 @@ static void createShuffleStride(MVT VT, int Stride, // inside mask a shuffleMask. A mask contains exactly 3 groups, where // each group is a monotonically increasing sequence with stride 3. // For example shuffleMask {0,3,6,1,4,7,2,5} => {3,3,2} -static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) { +static void setGroupSize(MVT VT, SmallVectorImpl<int> &SizeInfo) { int VectorSize = VT.getSizeInBits(); int VF = VT.getVectorNumElements() / std::max(VectorSize / 128, 1); for (int i = 0, FirstGroupElement = 0; i < 3; i++) { @@ -470,7 +470,7 @@ static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) { // direction of the alignment. (false - align to the "right" side while true - // align to the "left" side) static void DecodePALIGNRMask(MVT VT, unsigned Imm, - SmallVectorImpl<uint32_t> &ShuffleMask, + SmallVectorImpl<int> &ShuffleMask, bool AlignDirection = true, bool Unary = false) { unsigned NumElts = VT.getVectorNumElements(); unsigned NumLanes = std::max((int)VT.getSizeInBits() / 128, 1); @@ -547,11 +547,11 @@ void X86InterleavedAccessGroup::deinterleave8bitStride3( // Matrix[2]= b5 c5 a6 b6 c6 a7 b7 c7 TransposedMatrix.resize(3); - SmallVector<uint32_t, 32> VPShuf; - SmallVector<uint32_t, 32> VPAlign[2]; - SmallVector<uint32_t, 32> VPAlign2; - SmallVector<uint32_t, 32> VPAlign3; - SmallVector<uint32_t, 3> GroupSize; + SmallVector<int, 32> VPShuf; + SmallVector<int, 32> VPAlign[2]; + SmallVector<int, 32> VPAlign2; + SmallVector<int, 32> VPAlign3; + SmallVector<int, 3> GroupSize; Value *Vec[6], *TempVector[3]; MVT VT = MVT::getVT(Shuffles[0]->getType()); @@ -605,8 +605,8 @@ void X86InterleavedAccessGroup::deinterleave8bitStride3( // group2Shuffle reorder the shuffle stride back into continuous order. // For example For VF16 with Mask1 = {0,3,6,9,12,15,2,5,8,11,14,1,4,7,10,13} => // MaskResult = {0,11,6,1,12,7,2,13,8,3,14,9,4,15,10,5}. -static void group2Shuffle(MVT VT, SmallVectorImpl<uint32_t> &Mask, - SmallVectorImpl<uint32_t> &Output) { +static void group2Shuffle(MVT VT, SmallVectorImpl<int> &Mask, + SmallVectorImpl<int> &Output) { int IndexGroup[3] = {0, 0, 0}; int Index = 0; int VectorWidth = VT.getSizeInBits(); @@ -633,11 +633,11 @@ void X86InterleavedAccessGroup::interleave8bitStride3( // Matrix[2]= c0 c1 c2 c3 c3 a7 b7 c7 TransposedMatrix.resize(3); - SmallVector<uint32_t, 3> GroupSize; - SmallVector<uint32_t, 32> VPShuf; - SmallVector<uint32_t, 32> VPAlign[3]; - SmallVector<uint32_t, 32> VPAlign2; - SmallVector<uint32_t, 32> VPAlign3; + SmallVector<int, 3> GroupSize; + SmallVector<int, 32> VPShuf; + SmallVector<int, 32> VPAlign[3]; + SmallVector<int, 32> VPAlign2; + SmallVector<int, 32> VPAlign3; Value *Vec[3], *TempVector[3]; MVT VT = MVT::getVectorVT(MVT::i8, VecElems); @@ -692,25 +692,25 @@ void X86InterleavedAccessGroup::transpose_4x4( TransposedMatrix.resize(4); // dst = src1[0,1],src2[0,1] - uint32_t IntMask1[] = {0, 1, 4, 5}; - ArrayRef<uint32_t> Mask = makeArrayRef(IntMask1, 4); + static constexpr int IntMask1[] = {0, 1, 4, 5}; + ArrayRef<int> Mask = makeArrayRef(IntMask1, 4); Value *IntrVec1 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask); Value *IntrVec2 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask); // dst = src1[2,3],src2[2,3] - uint32_t IntMask2[] = {2, 3, 6, 7}; + static constexpr int IntMask2[] = {2, 3, 6, 7}; Mask = makeArrayRef(IntMask2, 4); Value *IntrVec3 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask); Value *IntrVec4 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask); // dst = src1[0],src2[0],src1[2],src2[2] - uint32_t IntMask3[] = {0, 4, 2, 6}; + static constexpr int IntMask3[] = {0, 4, 2, 6}; Mask = makeArrayRef(IntMask3, 4); TransposedMatrix[0] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask); TransposedMatrix[2] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask); // dst = src1[1],src2[1],src1[3],src2[3] - uint32_t IntMask4[] = {1, 5, 3, 7}; + static constexpr int IntMask4[] = {1, 5, 3, 7}; Mask = makeArrayRef(IntMask4, 4); TransposedMatrix[1] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask); TransposedMatrix[3] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits