craig.topper created this revision. craig.topper added reviewers: chandlerc, RKSimon, spatel.
Chandler is working changes that will loosen some of the range checks in SemaChecking to only be warning that can be disabled. This patch adds explicit masking to avoid using the upper bits of immediates. https://reviews.llvm.org/D48462 Files: lib/CodeGen/CGBuiltin.cpp test/CodeGen/avx-builtins.c test/CodeGen/sse2-builtins.c test/CodeGen/sse41-builtins.c test/CodeGen/vector.c
Index: test/CodeGen/vector.c =================================================================== --- test/CodeGen/vector.c +++ test/CodeGen/vector.c @@ -45,13 +45,13 @@ unsigned long test_epi8(__m128i x) { return _mm_extract_epi8(x, 4); } // CHECK: @test_epi8 -// CHECK: extractelement <16 x i8> {{.*}}, i32 4 +// CHECK: extractelement <16 x i8> {{.*}}, {{i32|i64}} 4 // CHECK: zext i8 {{.*}} to i32 unsigned long test_epi16(__m128i x) { return _mm_extract_epi16(x, 3); } // CHECK: @test_epi16 -// CHECK: extractelement <8 x i16> {{.*}}, i32 3 +// CHECK: extractelement <8 x i16> {{.*}}, {{i32|i64}} 3 // CHECK: zext i16 {{.*}} to i32 void extractinttypes() { Index: test/CodeGen/sse41-builtins.c =================================================================== --- test/CodeGen/sse41-builtins.c +++ test/CodeGen/sse41-builtins.c @@ -171,26 +171,26 @@ int test_mm_extract_epi8(__m128i x) { // CHECK-LABEL: test_mm_extract_epi8 - // CHECK: extractelement <16 x i8> %{{.*}}, i32 1 + // CHECK: extractelement <16 x i8> %{{.*}}, {{i32|i64}} 1 // CHECK: zext i8 %{{.*}} to i32 return _mm_extract_epi8(x, 1); } int test_mm_extract_epi32(__m128i x) { // CHECK-LABEL: test_mm_extract_epi32 - // CHECK: extractelement <4 x i32> %{{.*}}, i32 1 + // CHECK: extractelement <4 x i32> %{{.*}}, {{i32|i64}} 1 return _mm_extract_epi32(x, 1); } long long test_mm_extract_epi64(__m128i x) { // CHECK-LABEL: test_mm_extract_epi64 - // CHECK: extractelement <2 x i64> %{{.*}}, i32 1 + // CHECK: extractelement <2 x i64> %{{.*}}, {{i32|i64}} 1 return _mm_extract_epi64(x, 1); } int test_mm_extract_ps(__m128 x) { // CHECK-LABEL: test_mm_extract_ps - // CHECK: extractelement <4 x float> %{{.*}}, i32 1 + // CHECK: extractelement <4 x float> %{{.*}}, {{i32|i64}} 1 return _mm_extract_ps(x, 1); } @@ -220,19 +220,19 @@ __m128i test_mm_insert_epi8(__m128i x, char b) { // CHECK-LABEL: test_mm_insert_epi8 - // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, i32 1 + // CHECK: insertelement <16 x i8> %{{.*}}, i8 %{{.*}}, {{i32|i64}} 1 return _mm_insert_epi8(x, b, 1); } __m128i test_mm_insert_epi32(__m128i x, int b) { // CHECK-LABEL: test_mm_insert_epi32 - // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 1 + // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, {{i32|i64}} 1 return _mm_insert_epi32(x, b, 1); } __m128i test_mm_insert_epi64(__m128i x, long long b) { // CHECK-LABEL: test_mm_insert_epi64 - // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, i32 1 + // CHECK: insertelement <2 x i64> %{{.*}}, i64 %{{.*}}, {{i32|i64}} 1 return _mm_insert_epi64(x, b, 1); } Index: test/CodeGen/sse2-builtins.c =================================================================== --- test/CodeGen/sse2-builtins.c +++ test/CodeGen/sse2-builtins.c @@ -613,14 +613,14 @@ // Lowering to pextrw requires optimization. int test_mm_extract_epi16(__m128i A) { // CHECK-LABEL: test_mm_extract_epi16 - // CHECK: extractelement <8 x i16> %{{.*}}, i32 1 + // CHECK: extractelement <8 x i16> %{{.*}}, {{i32|i64}} 1 // CHECK: zext i16 %{{.*}} to i32 return _mm_extract_epi16(A, 1); } __m128i test_mm_insert_epi16(__m128i A, int B) { // CHECK-LABEL: test_mm_insert_epi16 - // CHECK: insertelement <8 x i16> %{{.*}}, i32 0 + // CHECK: insertelement <8 x i16> %{{.*}}, {{i32|i64}} 0 return _mm_insert_epi16(A, B, 0); } Index: test/CodeGen/avx-builtins.c =================================================================== --- test/CodeGen/avx-builtins.c +++ test/CodeGen/avx-builtins.c @@ -316,27 +316,27 @@ int test_mm256_extract_epi8(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi8 - // CHECK: extractelement <32 x i8> %{{.*}}, i32 31 + // CHECK: extractelement <32 x i8> %{{.*}}, {{i32|i64}} 31 // CHECK: zext i8 %{{.*}} to i32 return _mm256_extract_epi8(A, 31); } int test_mm256_extract_epi16(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi16 - // CHECK: extractelement <16 x i16> %{{.*}}, i32 15 + // CHECK: extractelement <16 x i16> %{{.*}}, {{i32|i64}} 15 // CHECK: zext i16 %{{.*}} to i32 return _mm256_extract_epi16(A, 15); } int test_mm256_extract_epi32(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi32 - // CHECK: extractelement <8 x i32> %{{.*}}, i32 7 + // CHECK: extractelement <8 x i32> %{{.*}}, {{i32|i64}} 7 return _mm256_extract_epi32(A, 7); } long long test_mm256_extract_epi64(__m256i A) { // CHECK-LABEL: test_mm256_extract_epi64 - // CHECK: extractelement <4 x i64> %{{.*}}, i32 3 + // CHECK: extractelement <4 x i64> %{{.*}}, {{i32|i64}} 3 return _mm256_extract_epi64(A, 3); } @@ -396,25 +396,25 @@ __m256i test_mm256_insert_epi8(__m256i x, char b) { // CHECK-LABEL: test_mm256_insert_epi8 - // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, i32 14 + // CHECK: insertelement <32 x i8> %{{.*}}, i8 %{{.*}}, {{i32|i64}} 14 return _mm256_insert_epi8(x, b, 14); } __m256i test_mm256_insert_epi16(__m256i x, int b) { // CHECK-LABEL: test_mm256_insert_epi16 - // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, i32 4 + // CHECK: insertelement <16 x i16> %{{.*}}, i16 %{{.*}}, {{i32|i64}} 4 return _mm256_insert_epi16(x, b, 4); } __m256i test_mm256_insert_epi32(__m256i x, int b) { // CHECK-LABEL: test_mm256_insert_epi32 - // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, i32 5 + // CHECK: insertelement <8 x i32> %{{.*}}, i32 %{{.*}}, {{i32|i64}} 5 return _mm256_insert_epi32(x, b, 5); } __m256i test_mm256_insert_epi64(__m256i x, long long b) { // CHECK-LABEL: test_mm256_insert_epi64 - // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, i32 2 + // CHECK: insertelement <4 x i64> %{{.*}}, i64 %{{.*}}, {{i32|i64}} 2 return _mm256_insert_epi64(x, b, 2); } Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -9031,21 +9031,29 @@ case X86::BI__builtin_ia32_vec_ext_v32qi: case X86::BI__builtin_ia32_vec_ext_v16hi: case X86::BI__builtin_ia32_vec_ext_v8si: - case X86::BI__builtin_ia32_vec_ext_v4di: + case X86::BI__builtin_ia32_vec_ext_v4di: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue(); + Index &= NumElts - 1; // These builtins exist so we can ensure the index is an ICE and in range. // Otherwise we could just do this in the header file. - return Builder.CreateExtractElement(Ops[0], Ops[1]); + return Builder.CreateExtractElement(Ops[0], Index); + } case X86::BI__builtin_ia32_vec_set_v16qi: case X86::BI__builtin_ia32_vec_set_v8hi: case X86::BI__builtin_ia32_vec_set_v4si: case X86::BI__builtin_ia32_vec_set_v2di: case X86::BI__builtin_ia32_vec_set_v32qi: case X86::BI__builtin_ia32_vec_set_v16hi: case X86::BI__builtin_ia32_vec_set_v8si: - case X86::BI__builtin_ia32_vec_set_v4di: + case X86::BI__builtin_ia32_vec_set_v4di: { + unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue(); + Index &= NumElts - 1; // These builtins exist so we can ensure the index is an ICE and in range. // Otherwise we could just do this in the header file. - return Builder.CreateInsertElement(Ops[0], Ops[1], Ops[2]); + return Builder.CreateInsertElement(Ops[0], Ops[1], Index); + } case X86::BI_mm_setcsr: case X86::BI__builtin_ia32_ldmxcsr: { Address Tmp = CreateMemTemp(E->getArg(0)->getType()); @@ -9311,8 +9319,7 @@ // extract (0, 1) unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1; - llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index); - Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract"); + Ops[1] = Builder.CreateExtractElement(Ops[1], Index, "extract"); // cast pointer to i64 & store Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy); @@ -9336,7 +9343,12 @@ case X86::BI__builtin_ia32_extracti64x2_512_mask: { llvm::Type *DstTy = ConvertType(E->getType()); unsigned NumElts = DstTy->getVectorNumElements(); - unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue() * NumElts; + unsigned SrcNumElts = Ops[0]->getType()->getVectorNumElements(); + unsigned SubVectors = SrcNumElts / NumElts; + unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue(); + assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); + Index &= SubVectors - 1; // Remove any extra bits. + Index *= NumElts; uint32_t Indices[16]; for (unsigned i = 0; i != NumElts; ++i) @@ -9370,7 +9382,11 @@ case X86::BI__builtin_ia32_inserti64x2_512: { unsigned DstNumElts = Ops[0]->getType()->getVectorNumElements(); unsigned SrcNumElts = Ops[1]->getType()->getVectorNumElements(); - unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue() * SrcNumElts; + unsigned SubVectors = DstNumElts / SrcNumElts; + unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue(); + assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors"); + Index &= SubVectors - 1; // Remove any extra bits. + Index *= SrcNumElts; uint32_t Indices[16]; for (unsigned i = 0; i != DstNumElts; ++i) @@ -9571,7 +9587,7 @@ case X86::BI__builtin_ia32_palignr128: case X86::BI__builtin_ia32_palignr256: case X86::BI__builtin_ia32_palignr512: { - unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff; unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); assert(NumElts % 16 == 0); @@ -9611,7 +9627,7 @@ case X86::BI__builtin_ia32_alignq256: case X86::BI__builtin_ia32_alignq512: { unsigned NumElts = Ops[0]->getType()->getVectorNumElements(); - unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff; // Mask the shift amount to width of two vectors. ShiftVal &= (2 * NumElts) - 1; @@ -9696,7 +9712,7 @@ case X86::BI__builtin_ia32_pslldqi128_byteshift: case X86::BI__builtin_ia32_pslldqi256_byteshift: case X86::BI__builtin_ia32_pslldqi512_byteshift: { - unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); + unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; llvm::Type *ResultType = Ops[0]->getType(); // Builtin type is vXi64 so multiply by 8 to get bytes. unsigned NumElts = ResultType->getVectorNumElements() * 8; @@ -9726,7 +9742,7 @@ case X86::BI__builtin_ia32_psrldqi128_byteshift: case X86::BI__builtin_ia32_psrldqi256_byteshift: case X86::BI__builtin_ia32_psrldqi512_byteshift: { - unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue(); + unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff; llvm::Type *ResultType = Ops[0]->getType(); // Builtin type is vXi64 so multiply by 8 to get bytes. unsigned NumElts = ResultType->getVectorNumElements() * 8; @@ -10170,7 +10186,7 @@ case X86::BI__builtin_ia32_cmpps256: case X86::BI__builtin_ia32_cmppd: case X86::BI__builtin_ia32_cmppd256: { - unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue(); + unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f; // If this one of the SSE immediates, we can use native IR. if (CC < 8) { FCmpInst::Predicate Pred;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits