The attached patch should fix the aforementioned bug. It passes DejaGnu testsuite. Nick also checked that it passes llvm-test and llvm-gcc bootstrap (thanks!).
The patch: 1) changes SCEVSDivExpr into SCEVUDivExpr, 2) replaces PartialFact() function with BinomialCoefficient(); the computations in BinomialCoefficient() are performed with the apprioprate bitwidth necessary to avoid overflow. The short explanation why the patch should be correct is contained in the comments. The longer can be found in the bugzilla discussion: http://llvm.org/bugs/show_bug.cgi?id=1798. However, there is one problem. The fix needs support for integers of arbitrary bitwitdth to work in every possible case. Here is a short explanation why: To evaluate a chrec of length K at a given iteration we need, in general, to generate LLVM code performing accurate multiplication of K numbers. Suppose, W is their bitwidth. Then, multiplication need to use K*W bits, what can potentially be an arbitrary number. I can see two ways what we can do now: 1) wait for the backend support, 2) make the patch unoptimal by using the more bits than needed to perform the multiplication (the minimum power of 2 greater or equal to K*W) What do you think? -Wojtek
Index: test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll =================================================================== --- test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll (revision 46007) +++ test/Analysis/ScalarEvolution/2007-11-14-SignedAddRec.ll (working copy) @@ -1,6 +1,5 @@ ; RUN: llvm-as < %s | opt -indvars | llvm-dis | grep printd | grep 1206807378 ; PR1798 -; XFAIL: * declare void @printd(i32) Index: include/llvm/Analysis/ScalarEvolutionExpander.h =================================================================== --- include/llvm/Analysis/ScalarEvolutionExpander.h (revision 46007) +++ include/llvm/Analysis/ScalarEvolutionExpander.h (working copy) @@ -126,10 +126,10 @@ Value *visitMulExpr(SCEVMulExpr *S); - Value *visitSDivExpr(SCEVSDivExpr *S) { + Value *visitUDivExpr(SCEVUDivExpr *S) { Value *LHS = expand(S->getLHS()); Value *RHS = expand(S->getRHS()); - return InsertBinop(Instruction::SDiv, LHS, RHS, InsertPt); + return InsertBinop(Instruction::UDiv, LHS, RHS, InsertPt); } Value *visitAddRecExpr(SCEVAddRecExpr *S); Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h (revision 46007) +++ include/llvm/Analysis/ScalarEvolution.h (working copy) @@ -225,7 +225,7 @@ Ops.push_back(RHS); return getMulExpr(Ops); } - SCEVHandle getSDivExpr(const SCEVHandle &LHS, const SCEVHandle &RHS); + SCEVHandle getUDivExpr(const SCEVHandle &LHS, const SCEVHandle &RHS); SCEVHandle getAddRecExpr(const SCEVHandle &Start, const SCEVHandle &Step, const Loop *L); SCEVHandle getAddRecExpr(std::vector<SCEVHandle> &Operands, Index: include/llvm/Analysis/ScalarEvolutionExpressions.h =================================================================== --- include/llvm/Analysis/ScalarEvolutionExpressions.h (revision 46007) +++ include/llvm/Analysis/ScalarEvolutionExpressions.h (working copy) @@ -25,7 +25,7 @@ // These should be ordered in terms of increasing complexity to make the // folders simpler. scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr, - scSDivExpr, scAddRecExpr, scSMaxExpr, scUnknown, scCouldNotCompute + scUDivExpr, scAddRecExpr, scSMaxExpr, scUnknown, scCouldNotCompute }; //===--------------------------------------------------------------------===// @@ -322,16 +322,16 @@ //===--------------------------------------------------------------------===// - /// SCEVSDivExpr - This class represents a binary signed division operation. + /// SCEVUDivExpr - This class represents a binary unsigned division operation. /// - class SCEVSDivExpr : public SCEV { + class SCEVUDivExpr : public SCEV { friend class ScalarEvolution; SCEVHandle LHS, RHS; - SCEVSDivExpr(const SCEVHandle &lhs, const SCEVHandle &rhs) - : SCEV(scSDivExpr), LHS(lhs), RHS(rhs) {} + SCEVUDivExpr(const SCEVHandle &lhs, const SCEVHandle &rhs) + : SCEV(scUDivExpr), LHS(lhs), RHS(rhs) {} - virtual ~SCEVSDivExpr(); + virtual ~SCEVUDivExpr(); public: const SCEVHandle &getLHS() const { return LHS; } const SCEVHandle &getRHS() const { return RHS; } @@ -353,7 +353,7 @@ if (L == LHS && R == RHS) return this; else - return SE.getSDivExpr(L, R); + return SE.getUDivExpr(L, R); } @@ -363,9 +363,9 @@ void print(std::ostream *OS) const { if (OS) print(*OS); } /// Methods for support type inquiry through isa, cast, and dyn_cast: - static inline bool classof(const SCEVSDivExpr *S) { return true; } + static inline bool classof(const SCEVUDivExpr *S) { return true; } static inline bool classof(const SCEV *S) { - return S->getSCEVType() == scSDivExpr; + return S->getSCEVType() == scUDivExpr; } }; @@ -540,8 +540,8 @@ return ((SC*)this)->visitAddExpr((SCEVAddExpr*)S); case scMulExpr: return ((SC*)this)->visitMulExpr((SCEVMulExpr*)S); - case scSDivExpr: - return ((SC*)this)->visitSDivExpr((SCEVSDivExpr*)S); + case scUDivExpr: + return ((SC*)this)->visitUDivExpr((SCEVUDivExpr*)S); case scAddRecExpr: return ((SC*)this)->visitAddRecExpr((SCEVAddRecExpr*)S); case scSMaxExpr: Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp (revision 46007) +++ lib/Analysis/ScalarEvolution.cpp (working copy) @@ -328,21 +328,21 @@ } -// SCEVSDivs - Only allow the creation of one SCEVSDivExpr for any particular +// SCEVUDivs - Only allow the creation of one SCEVUDivExpr for any particular // input. Don't use a SCEVHandle here, or else the object will never be // deleted! static ManagedStatic<std::map<std::pair<SCEV*, SCEV*>, - SCEVSDivExpr*> > SCEVSDivs; + SCEVUDivExpr*> > SCEVUDivs; -SCEVSDivExpr::~SCEVSDivExpr() { - SCEVSDivs->erase(std::make_pair(LHS, RHS)); +SCEVUDivExpr::~SCEVUDivExpr() { + SCEVUDivs->erase(std::make_pair(LHS, RHS)); } -void SCEVSDivExpr::print(std::ostream &OS) const { - OS << "(" << *LHS << " /s " << *RHS << ")"; +void SCEVUDivExpr::print(std::ostream &OS) const { + OS << "(" << *LHS << " /u " << *RHS << ")"; } -const Type *SCEVSDivExpr::getType() const { +const Type *SCEVUDivExpr::getType() const { return LHS->getType(); } @@ -532,57 +532,87 @@ } -/// PartialFact - Compute V!/(V-NumSteps)! -static SCEVHandle PartialFact(SCEVHandle V, unsigned NumSteps, - ScalarEvolution &SE) { +/// BinomialCoefficient - Compute BC(It, K). The result is of the same type as +/// It. Assume, K > 0. +static SCEVHandle BinomialCoefficient(SCEVHandle It, unsigned K, + ScalarEvolution &SE) { + // We are using the following formula for BC(It, K): + // + // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K! + // + // Suppose, W is the bitwidth of It (and of the return value as well). We + // must be prepared for overflow. Hence, we must assure that the result of + // our computation is equal to the accurate one modulo 2^W. Unfortunately, + // division isn't safe in modular arithmetic. This means we must perform the + // whole computation accurately and then truncate the result to W bits. + // + // The dividend of the formula is a multiplication of K integers of bitwidth + // W. K*W bits suffice to compute it accurately. + // + // FIXME? We assume the divisor can be accurately computed using 32-bit + // unsigned integer type. It is true up to K = 12. + // + // It is safe to use unsigned division here: the dividend is nonnegative and + // the divisor is positive. + + // Handle the simplest case efficiently. + if (K == 1) + return It; + + // The final number of bits we need is the maximum of K*W (because of the + // dividend) and 32 (because of the divisor). + const IntegerType *ExTy = + IntegerType::get(std::max(K * It->getBitWidth(), 32U)); + const SCEVHandle ExIt = SE.getZeroExtendExpr(It, ExTy); + + // Compute K! We know K >= 2 here. + uint32_t F = 2; + for (unsigned i = 3; i <= K; ++i) + F *= i; + APInt KFactorial(ExTy->getBitWidth(), F); + // Handle this case efficiently, it is common to have constant iteration // counts while computing loop exit values. - if (SCEVConstant *SC = dyn_cast<SCEVConstant>(V)) { - const APInt& Val = SC->getValue()->getValue(); - APInt Result(Val.getBitWidth(), 1); - for (; NumSteps; --NumSteps) - Result *= Val-(NumSteps-1); - return SE.getConstant(Result); + if (SCEVConstant *SC = dyn_cast<SCEVConstant>(ExIt)) { + const APInt& N = SC->getValue()->getValue(); + APInt Dividend(N.getBitWidth(), 1); + for (; K; --K) + Dividend *= N-(K-1); + return SE.getConstant(Dividend.udiv(KFactorial).trunc(It->getBitWidth())); } - - const Type *Ty = V->getType(); - if (NumSteps == 0) - return SE.getIntegerSCEV(1, Ty); - - SCEVHandle Result = V; - for (unsigned i = 1; i != NumSteps; ++i) - Result = SE.getMulExpr(Result, SE.getMinusSCEV(V, - SE.getIntegerSCEV(i, Ty))); - return Result; + + SCEVHandle Dividend = ExIt; + for (unsigned i = 1; i != K; ++i) + Dividend = SE.getMulExpr(Dividend, + SE.getMinusSCEV(ExIt, SE.getIntegerSCEV(i, ExTy))); + return + SE.getTruncateExpr(SE.getUDivExpr(Dividend, SE.getConstant(KFactorial)), + It->getType()); } - /// evaluateAtIteration - Return the value of this chain of recurrences at /// the specified iteration number. We can evaluate this recurrence by /// multiplying each element in the chain by the binomial coefficient /// corresponding to it. In other words, we can evaluate {A,+,B,+,C,+,D} as: /// -/// A*choose(It, 0) + B*choose(It, 1) + C*choose(It, 2) + D*choose(It, 3) +/// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3) /// -/// FIXME/VERIFY: I don't trust that this is correct in the face of overflow. -/// Is the binomial equation safe using modular arithmetic?? +/// where BC(It, k) stands for binomial coefficient. /// SCEVHandle SCEVAddRecExpr::evaluateAtIteration(SCEVHandle It, ScalarEvolution &SE) const { SCEVHandle Result = getStart(); - int Divisor = 1; - const Type *Ty = It->getType(); for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { - SCEVHandle BC = PartialFact(It, i, SE); - Divisor *= i; - SCEVHandle Val = SE.getSDivExpr(SE.getMulExpr(BC, getOperand(i)), - SE.getIntegerSCEV(Divisor,Ty)); + // The computation is correct in the face of overflow provided that the + // multiplication is performed _after_ the evaluation of the binomial + // coefficient. + SCEVHandle Val = SE.getMulExpr(getOperand(i), + BinomialCoefficient(It, i, SE)); Result = SE.getAddExpr(Result, Val); } return Result; } - //===----------------------------------------------------------------------===// // SCEV Expression folder implementations //===----------------------------------------------------------------------===// @@ -1039,24 +1069,22 @@ return Result; } -SCEVHandle ScalarEvolution::getSDivExpr(const SCEVHandle &LHS, const SCEVHandle &RHS) { +SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS, const SCEVHandle &RHS) { if (SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { if (RHSC->getValue()->equalsInt(1)) - return LHS; // X sdiv 1 --> x - if (RHSC->getValue()->isAllOnesValue()) - return getNegativeSCEV(LHS); // X sdiv -1 --> -x + return LHS; // X udiv 1 --> x if (SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { Constant *LHSCV = LHSC->getValue(); Constant *RHSCV = RHSC->getValue(); - return getUnknown(ConstantExpr::getSDiv(LHSCV, RHSCV)); + return getUnknown(ConstantExpr::getUDiv(LHSCV, RHSCV)); } } // FIXME: implement folding of (X*4)/4 when we know X*4 doesn't overflow. - SCEVSDivExpr *&Result = (*SCEVSDivs)[std::make_pair(LHS, RHS)]; - if (Result == 0) Result = new SCEVSDivExpr(LHS, RHS); + SCEVUDivExpr *&Result = (*SCEVUDivs)[std::make_pair(LHS, RHS)]; + if (Result == 0) Result = new SCEVUDivExpr(LHS, RHS); return Result; } @@ -1555,7 +1583,7 @@ return MinOpRes; } - // SCEVSDivExpr, SCEVUnknown + // SCEVUDivExpr, SCEVUnknown return 0; } @@ -1574,8 +1602,8 @@ case Instruction::Mul: return SE.getMulExpr(getSCEV(I->getOperand(0)), getSCEV(I->getOperand(1))); - case Instruction::SDiv: - return SE.getSDivExpr(getSCEV(I->getOperand(0)), + case Instruction::UDiv: + return SE.getUDivExpr(getSCEV(I->getOperand(0)), getSCEV(I->getOperand(1))); case Instruction::Sub: return SE.getMinusSCEV(getSCEV(I->getOperand(0)), @@ -2264,14 +2292,14 @@ return Comm; } - if (SCEVSDivExpr *Div = dyn_cast<SCEVSDivExpr>(V)) { + if (SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) { SCEVHandle LHS = getSCEVAtScope(Div->getLHS(), L); if (LHS == UnknownValue) return LHS; SCEVHandle RHS = getSCEVAtScope(Div->getRHS(), L); if (RHS == UnknownValue) return RHS; if (LHS == Div->getLHS() && RHS == Div->getRHS()) return Div; // must be loop invariant - return SE.getSDivExpr(LHS, RHS); + return SE.getUDivExpr(LHS, RHS); } // If this is a loop recurrence for a loop that does not contain L, then we
_______________________________________________ llvm-commits mailing list llvm-commits@cs.uiuc.edu http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits