Author: Bing1 Yu
Date: 2020-12-22T13:27:38+08:00
New Revision: e8ade4569b7b5343ae8d4d7c9d83706eca0e8e90
URL:
https://github.com/llvm/llvm-project/commit/e8ade4569b7b5343ae8d4d7c9d83706eca0e8e90
DIFF:
https://github.com/llvm/llvm-project/commit/e8ade4569b7b5343ae8d4d7c9d83706eca0e8e90.diff
LOG: [LegalizeType] When LegalizeType procedure widens a masked_gather, set
MemoryType's EltNum equal to Result's EltNum
When LegalizeType procedure widens a masked_gather, set MemoryType's EltNum
equal to Result's EltNum.
As I mentioned in https://reviews.llvm.org/D91092, in previous code, If we have
a v17i32's masked_gather in avx512, we widen it to a v32i32's masked_gather
with a v17i32's MemoryType. When the SplitVecRes_MGATHER process this v32i32's
masked_gather, GetSplitDestVTs will assert fail since what you are going to
split is v17i32.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D93610
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll
Removed:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f21ec1dbdfe5..57cb364f1939 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4044,10 +4044,13 @@ SDValue
DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
Index = ModifyToType(Index, WideIndexVT);
SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
Scale };
+
+ // Widen the MemoryType
+ EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(),
+ N->getMemoryVT().getScalarType(), NumElts);
SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
-N->getMemoryVT(), dl, Ops,
-N->getMemOperand(), N->getIndexType(),
-N->getExtensionType());
+WideMemVT, dl, Ops, N->getMemOperand(),
+N->getIndexType(), N->getExtensionType());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -4881,6 +4884,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N,
unsigned OpNo) {
SDValue Mask = MSC->getMask();
SDValue Index = MSC->getIndex();
SDValue Scale = MSC->getScale();
+ EVT WideMemVT = MSC->getMemoryVT();
if (OpNo == 1) {
DataOp = GetWidenedVector(DataOp);
@@ -4897,6 +4901,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N,
unsigned OpNo) {
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
MaskVT.getVectorElementType(), NumElts);
Mask = ModifyToType(Mask, WideMaskVT, true);
+
+// Widen the MemoryType
+WideMemVT = EVT::getVectorVT(*DAG.getContext(),
+ MSC->getMemoryVT().getScalarType(), NumElts);
} else if (OpNo == 4) {
// Just widen the index. It's allowed to have extra elements.
Index = GetWidenedVector(Index);
@@ -4905,9 +4913,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N,
unsigned OpNo) {
SDValue Ops[] = {MSC->getChain(), DataOp, Mask, MSC->getBasePtr(), Index,
Scale};
- return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
- MSC->getMemoryVT(), SDLoc(N), Ops,
- MSC->getMemOperand(), MSC->getIndexType(),
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N),
+ Ops, MSC->getMemOperand(), MSC->getIndexType(),
MSC->isTruncatingStore());
}
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll
b/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll
index ab62c3b92692..517553d455ae 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter_widen.ll
@@ -247,6 +247,303 @@ define void @test_scatter_v2i32_data_index(<2 x i32> %a1,
i32* %base, <2 x i32>
ret void
}
+define void @test_mscatter_v17f32(float* %base, <17 x i32> %index, <17 x
float> %val)
+; WIDEN_SKX-LABEL: test_mscatter_v17f32:
+; WIDEN_SKX: # %bb.0:
+; WIDEN_SKX-NEXT:vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
+; WIDEN_SKX-NEXT:vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
+; WIDEN_SKX-NEXT:vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
+; WIDEN_SKX-NEXT:vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
+; WIDEN_SKX-NEXT:vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
+; WIDEN_SKX-NEXT:vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
+; WIDEN_SKX-NEXT:vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; WIDEN_SKX-NEXT:vmovss {{.*#