ABataev created this revision. Herald added subscribers: hiraditya, inglorion. ABataev requested review of this revision. Herald added projects: clang, LLVM. Herald added subscribers: llvm-commits, cfe-commits.
Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D108826 Files: clang/lib/Driver/ToolChains/Clang.cpp llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -172,6 +172,10 @@ cl::desc("The maximum number of users to visit while visiting the " "predecessors. This prevents compilation time increase.")); +static cl::opt<bool> SLPLimitToRegSize( + "slp-limit-to-reg-size", cl::init(false), cl::Hidden, + cl::desc("Try to vectorize using only maximal vector register size.")); + static cl::opt<bool> ViewSLPTree("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz")); @@ -7453,7 +7457,8 @@ const unsigned MinVF = R.getMinVecRegSize() / Sz; unsigned VF = Chain.size(); - if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF) + if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF || + (SLPLimitToRegSize && VF < R.getMaxVecRegSize() / Sz)) return false; LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << Idx @@ -7717,6 +7722,7 @@ Type *ScalarTy = VL[0]->getType(); if (auto *IE = dyn_cast<InsertElementInst>(VL[0])) ScalarTy = IE->getOperand(1)->getType(); + unsigned MaxRegSz = R.getMaxVecRegSize() / Sz; unsigned NextInst = 0, MaxInst = VL.size(); for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) { @@ -7737,7 +7743,8 @@ if (!isPowerOf2_32(OpsWidth)) continue; - if ((VF > MinVF && OpsWidth <= VF / 2) || (VF == MinVF && OpsWidth < 2)) + if ((SLPLimitToRegSize && OpsWidth < MaxRegSz) || + (VF > MinVF && OpsWidth <= VF / 2) || (VF == MinVF && OpsWidth < 2)) break; ArrayRef<Value *> Ops = VL.slice(I, OpsWidth); Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6441,8 +6441,13 @@ OptSpecifier SLPVectAliasOption = EnableSLPVec ? options::OPT_O_Group : options::OPT_fslp_vectorize; if (Args.hasFlag(options::OPT_fslp_vectorize, SLPVectAliasOption, - options::OPT_fno_slp_vectorize, EnableSLPVec)) + options::OPT_fno_slp_vectorize, EnableSLPVec)) { CmdArgs.push_back("-vectorize-slp"); + if (IsUsingLTO) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-slp-limit-to-reg-size"); + } + } ParseMPreferVectorWidth(D, Args, CmdArgs);
Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -172,6 +172,10 @@ cl::desc("The maximum number of users to visit while visiting the " "predecessors. This prevents compilation time increase.")); +static cl::opt<bool> SLPLimitToRegSize( + "slp-limit-to-reg-size", cl::init(false), cl::Hidden, + cl::desc("Try to vectorize using only maximal vector register size.")); + static cl::opt<bool> ViewSLPTree("view-slp-tree", cl::Hidden, cl::desc("Display the SLP trees with Graphviz")); @@ -7453,7 +7457,8 @@ const unsigned MinVF = R.getMinVecRegSize() / Sz; unsigned VF = Chain.size(); - if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF) + if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF || + (SLPLimitToRegSize && VF < R.getMaxVecRegSize() / Sz)) return false; LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << Idx @@ -7717,6 +7722,7 @@ Type *ScalarTy = VL[0]->getType(); if (auto *IE = dyn_cast<InsertElementInst>(VL[0])) ScalarTy = IE->getOperand(1)->getType(); + unsigned MaxRegSz = R.getMaxVecRegSize() / Sz; unsigned NextInst = 0, MaxInst = VL.size(); for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; VF /= 2) { @@ -7737,7 +7743,8 @@ if (!isPowerOf2_32(OpsWidth)) continue; - if ((VF > MinVF && OpsWidth <= VF / 2) || (VF == MinVF && OpsWidth < 2)) + if ((SLPLimitToRegSize && OpsWidth < MaxRegSz) || + (VF > MinVF && OpsWidth <= VF / 2) || (VF == MinVF && OpsWidth < 2)) break; ArrayRef<Value *> Ops = VL.slice(I, OpsWidth); Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -6441,8 +6441,13 @@ OptSpecifier SLPVectAliasOption = EnableSLPVec ? options::OPT_O_Group : options::OPT_fslp_vectorize; if (Args.hasFlag(options::OPT_fslp_vectorize, SLPVectAliasOption, - options::OPT_fno_slp_vectorize, EnableSLPVec)) + options::OPT_fno_slp_vectorize, EnableSLPVec)) { CmdArgs.push_back("-vectorize-slp"); + if (IsUsingLTO) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-slp-limit-to-reg-size"); + } + } ParseMPreferVectorWidth(D, Args, CmdArgs);
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits