Author: Anton Afanasyev Date: 2020-12-14T15:51:43+03:00 New Revision: fac7c7ec3ccd64d19b6d33af0a8bc2f3f7f7b047
URL: https://github.com/llvm/llvm-project/commit/fac7c7ec3ccd64d19b6d33af0a8bc2f3f7f7b047 DIFF: https://github.com/llvm/llvm-project/commit/fac7c7ec3ccd64d19b6d33af0a8bc2f3f7f7b047.diff LOG: [SLP] Fix vector element size for the store chains Vector element size could be different for different store chains. This patch prevents wrong computation of maximum number of elements for that case. Differential Revision: https://reviews.llvm.org/D93192 Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll Removed: ################################################################################ diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index c5ba3709f6b1..e1c1c6edf08c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -6076,7 +6076,7 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores, // If a vector register can't hold 1 element, we are done. unsigned MaxVecRegSize = R.getMaxVecRegSize(); - unsigned EltSize = R.getVectorElementSize(Stores[0]); + unsigned EltSize = R.getVectorElementSize(Operands[0]); if (MaxVecRegSize % EltSize != 0) continue; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll index 63e3178c0278..2fdef624d48f 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/combined-stores-chains.ll @@ -23,28 +23,21 @@ define void @foo(i8* %v0, i8* readonly %v1) { ; CHECK-NEXT: [[T252:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 9 ; CHECK-NEXT: [[T292:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 10 ; CHECK-NEXT: [[T322:%.*]] = getelementptr inbounds i64, i64* [[T02]], i64 11 -; CHECK-NEXT: [[T19:%.*]] = load i32, i32* [[T14]], align 4 -; CHECK-NEXT: [[T23:%.*]] = load i32, i32* [[T18]], align 4 -; CHECK-NEXT: [[T27:%.*]] = load i32, i32* [[T22]], align 4 -; CHECK-NEXT: [[T30:%.*]] = load i32, i32* [[T26]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[T142]] to <2 x i64>* -; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[T222]] to <2 x i64>* +; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[T14]] to <4 x i32>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i64* [[T142]] to <2 x i64>* ; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8 -; CHECK-NEXT: [[T20:%.*]] = add nsw i32 [[T19]], 4 -; CHECK-NEXT: [[T24:%.*]] = add nsw i32 [[T23]], 4 -; CHECK-NEXT: [[T28:%.*]] = add nsw i32 [[T27]], 6 -; CHECK-NEXT: [[T31:%.*]] = add nsw i32 [[T30]], 7 -; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i64> [[TMP2]], <i64 4, i64 4> -; CHECK-NEXT: [[TMP6:%.*]] = add nsw <2 x i64> [[TMP4]], <i64 6, i64 7> -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i64* [[T212]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP5]], <2 x i64>* [[TMP7]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = bitcast i64* [[T292]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP8]], align 8 -; CHECK-NEXT: store i32 [[T20]], i32* [[T21]], align 4 -; CHECK-NEXT: store i32 [[T24]], i32* [[T25]], align 4 -; CHECK-NEXT: store i32 [[T28]], i32* [[T29]], align 4 -; CHECK-NEXT: store i32 [[T31]], i32* [[T32]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i64* [[T222]] to <2 x i64>* +; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP2]], <i32 4, i32 4, i32 6, i32 7> +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <2 x i64> [[TMP4]], <i64 4, i64 4> +; CHECK-NEXT: [[TMP9:%.*]] = add nsw <2 x i64> [[TMP6]], <i64 6, i64 7> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64* [[T212]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP8]], <2 x i64>* [[TMP10]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64* [[T292]] to <2 x i64>* +; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[TMP11]], align 8 +; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[T21]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP12]], align 4 ; CHECK-NEXT: ret void ; %t0 = bitcast i8* %v0 to i32* _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits