Hi Christian, I'm still unable to get the glsl1-while-loop with continue test to pass on R600 with the new structurizer. For this test, the structurizer is producing strange code and I'm not sure if it is correct. I have attached the full before and after dumps to this email. Here is the suspect block:
ENDIF: ; preds = %LOOP %46 = bitcast float %temp.0 to i32 %47 = add i32 %46, 1 %48 = bitcast i32 %47 to float %49 = bitcast float %48 to i32 %50 = icmp slt i32 5, %49 %51 = sext i1 %50 to i32 %52 = bitcast i32 %51 to float %53 = bitcast float %52 to i32 %54 = icmp ne i32 %53, 0 %55 = xor i1 %54, true %56 = xor i1 %54, true %57 = call { i1, i64 } @llvm.SI.if(i1 %56) %58 = extractvalue { i1, i64 } %57, 0 %59 = extractvalue { i1, i64 } %57, 1 %60 = call i64 @llvm.SI.if.break(i1 %55, %i64 %0) br i1 %58, label %ENDIF20, label %Flow4 Notice how @llvm.SI.if and llvm.SI.if.break use the same condition. Is this correct? -Tom On Mon, Feb 04, 2013 at 03:52:17PM +0100, Christian K??nig wrote: > From: Christian K??nig <christian.koe...@amd.com> > > Otherwise we sometimes produce invalid code. > > Signed-off-by: Christian K??nig <christian.koe...@amd.com> > Tested-by: Michel D??nzer <michel.daen...@amd.com> > --- > lib/Target/R600/AMDGPUStructurizeCFG.cpp | 146 > +++++++++++++++++------------- > 1 file changed, 81 insertions(+), 65 deletions(-) > > diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp > b/lib/Target/R600/AMDGPUStructurizeCFG.cpp > index 22338b5..c6f0a66 100644 > --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp > +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp > @@ -41,6 +41,7 @@ typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap; > typedef DenseMap<BasicBlock *, Value *> BBPredicates; > typedef DenseMap<BasicBlock *, BBPredicates> PredMap; > typedef DenseMap<BasicBlock *, unsigned> VisitedMap; > +typedef DenseMap<BasicBlock *, BBVector> BB2BBVecMap; > > // The name for newly created blocks. > > @@ -109,6 +110,7 @@ class AMDGPUStructurizeCFG : public RegionPass { > VisitedMap Visited; > PredMap Predicates; > BBPhiMap DeletedPhis; > + BB2BBVecMap AddedPhis; > BBVector FlowsInserted; > > BasicBlock *LoopStart; > @@ -126,16 +128,18 @@ class AMDGPUStructurizeCFG : public RegionPass { > > void collectInfos(); > > + void delPhiValues(BasicBlock *From, BasicBlock *To); > + > + void addPhiValues(BasicBlock *From, BasicBlock *To); > + > + void setPhiValues(); > + > bool dominatesPredicates(BasicBlock *A, BasicBlock *B); > > void killTerminator(BasicBlock *BB); > > RegionNode *skipChained(RegionNode *Node); > > - void delPhiValues(BasicBlock *From, BasicBlock *To); > - > - void addPhiValues(BasicBlock *From, BasicBlock *To); > - > BasicBlock *getNextFlow(BasicBlock *Prev); > > bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node); > @@ -309,6 +313,76 @@ void AMDGPUStructurizeCFG::collectInfos() { > } > } > > +/// \brief Remove all PHI values coming from "From" into "To" and remember > +/// them in DeletedPhis > +void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { > + PhiMap &Map = DeletedPhis[To]; > + for (BasicBlock::iterator I = To->begin(), E = To->end(); > + I != E && isa<PHINode>(*I);) { > + > + PHINode &Phi = cast<PHINode>(*I++); > + while (Phi.getBasicBlockIndex(From) != -1) { > + Value *Deleted = Phi.removeIncomingValue(From, false); > + Map[&Phi].push_back(std::make_pair(From, Deleted)); > + } > + } > +} > + > +/// \brief Add a dummy PHI value as soon as we knew the new predecessor > +void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { > + for (BasicBlock::iterator I = To->begin(), E = To->end(); > + I != E && isa<PHINode>(*I);) { > + > + PHINode &Phi = cast<PHINode>(*I++); > + Value *Undef = UndefValue::get(Phi.getType()); > + Phi.addIncoming(Undef, From); > + } > + AddedPhis[To].push_back(From); > +} > + > +/// \brief Add the real PHI value as soon as everything is set up > +void AMDGPUStructurizeCFG::setPhiValues() { > + > + SSAUpdater Updater; > + for (BB2BBVecMap::iterator AI = AddedPhis.begin(), AE = AddedPhis.end(); > + AI != AE; ++AI) { > + > + BasicBlock *To = AI->first; > + BBVector &From = AI->second; > + > + if (!DeletedPhis.count(To)) > + continue; > + > + PhiMap &Map = DeletedPhis[To]; > + for (PhiMap::iterator PI = Map.begin(), PE = Map.end(); > + PI != PE; ++PI) { > + > + PHINode *Phi = PI->first; > + Value *Undef = UndefValue::get(Phi->getType()); > + Updater.Initialize(Phi->getType(), ""); > + Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); > + Updater.AddAvailableValue(To, Undef); > + > + for (BBValueVector::iterator VI = PI->second.begin(), > + VE = PI->second.end(); VI != VE; ++VI) { > + > + Updater.AddAvailableValue(VI->first, VI->second); > + } > + > + for (BBVector::iterator FI = From.begin(), FE = From.end(); > + FI != FE; ++FI) { > + > + int Idx = Phi->getBasicBlockIndex(*FI); > + assert(Idx != -1); > + Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(*FI)); > + } > + } > + > + DeletedPhis.erase(To); > + } > + assert(DeletedPhis.empty()); > +} > + > /// \brief Does A dominate all the predicates of B ? > bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) > { > BBPredicates &Preds = Predicates[B]; > @@ -406,57 +480,6 @@ RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode > *Node) { > return ParentRegion->getNode(wireFlowBlock(BB, Next)); > } > > -/// \brief Remove all PHI values coming from "From" into "To" and remember > -/// them in DeletedPhis > -void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { > - PhiMap &Map = DeletedPhis[To]; > - for (BasicBlock::iterator I = To->begin(), E = To->end(); > - I != E && isa<PHINode>(*I);) { > - > - PHINode &Phi = cast<PHINode>(*I++); > - while (Phi.getBasicBlockIndex(From) != -1) { > - Value *Deleted = Phi.removeIncomingValue(From, false); > - Map[&Phi].push_back(std::make_pair(From, Deleted)); > - } > - } > -} > - > -/// \brief Add the PHI values back once we knew the new predecessor > -void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { > - if (!DeletedPhis.count(To)) > - return; > - > - PhiMap &Map = DeletedPhis[To]; > - SSAUpdater Updater; > - > - for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) { > - > - PHINode *Phi = I->first; > - Updater.Initialize(Phi->getType(), ""); > - BasicBlock *Fallback = To; > - bool HaveFallback = false; > - > - for (BBValueVector::iterator VI = I->second.begin(), VE = > I->second.end(); > - VI != VE; ++VI) { > - > - Updater.AddAvailableValue(VI->first, VI->second); > - BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first); > - if (Dom == VI->first) > - HaveFallback = true; > - else if (Dom != Fallback) > - HaveFallback = false; > - Fallback = Dom; > - } > - if (!HaveFallback) { > - Value *Undef = UndefValue::get(Phi->getType()); > - Updater.AddAvailableValue(Fallback, Undef); > - } > - > - Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From); > - } > - DeletedPhis.erase(To); > -} > - > /// \brief Create a new flow node and update dominator tree and region info > BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) { > LLVMContext &Context = Func->getContext(); > @@ -554,6 +577,7 @@ BasicBlock > *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev, > /// branches only have undefined conditions. > void AMDGPUStructurizeCFG::createFlow() { > DeletedPhis.clear(); > + AddedPhis.clear(); > > BasicBlock *Prev = Order.pop_back_val()->getEntry(); > assert(Prev == ParentRegion->getEntry() && "Incorrect node order!"); > @@ -601,18 +625,8 @@ void AMDGPUStructurizeCFG::createFlow() { > if (DT->dominates(ParentRegion->getEntry(), Exit)) > DT->changeImmediateDominator(Exit, Prev); > > - if (LoopStart && LoopEnd) { > - BBVector::iterator FI = std::find(FlowsInserted.begin(), > - FlowsInserted.end(), > - LoopStart); > - for (; *FI != LoopEnd; ++FI) { > - addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0)); > - } > - } > - > assert(Order.empty()); > assert(Visited.empty()); > - assert(DeletedPhis.empty()); > } > > /// \brief Insert the missing branch conditions > @@ -697,12 +711,14 @@ bool AMDGPUStructurizeCFG::runOnRegion(Region *R, > RGPassManager &RGM) { > collectInfos(); > createFlow(); > insertConditions(); > + setPhiValues(); > rebuildSSA(); > > Order.clear(); > Visited.clear(); > Predicates.clear(); > DeletedPhis.clear(); > + AddedPhis.clear(); > FlowsInserted.clear(); > > return true; > -- > 1.7.9.5 >
*** IR Dump Before Insert stack protectors *** define void @main() { main_body: br label %LOOP.outer LOOP.outer: ; preds = %ENDIF20, %main_body %temp4.0.ph = phi float [ 0.000000e+00, %main_body ], [ %23, %ENDIF20 ] %temp.0.ph = phi float [ 0.000000e+00, %main_body ], [ %16, %ENDIF20 ] br label %LOOP LOOP: ; preds = %LOOP.outer, %ENDIF %temp.0 = phi float [ %16, %ENDIF ], [ %temp.0.ph, %LOOP.outer ] %0 = bitcast float %temp.0 to i32 %1 = icmp sge i32 %0, 20 %2 = sext i1 %1 to i32 %3 = bitcast i32 %2 to float %4 = bitcast float %3 to i32 %5 = icmp ne i32 %4, 0 br i1 %5, label %IF, label %ENDIF IF: ; preds = %LOOP %6 = call float @llvm.AMDIL.clamp.(float %temp4.0.ph, float 0.000000e+00, float 1.000000e+00) %7 = call float @llvm.AMDIL.clamp.(float %temp4.0.ph, float 0.000000e+00, float 1.000000e+00) %8 = call float @llvm.AMDIL.clamp.(float %temp4.0.ph, float 0.000000e+00, float 1.000000e+00) %9 = call float @llvm.AMDIL.clamp.(float %temp4.0.ph, float 0.000000e+00, float 1.000000e+00) %10 = insertelement <4 x float> undef, float %6, i32 0 %11 = insertelement <4 x float> %10, float %7, i32 1 %12 = insertelement <4 x float> %11, float %8, i32 2 %13 = insertelement <4 x float> %12, float %9, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %13, i32 0, i32 0) ret void ENDIF: ; preds = %LOOP %14 = bitcast float %temp.0 to i32 %15 = add i32 %14, 1 %16 = bitcast i32 %15 to float %17 = bitcast float %16 to i32 %18 = icmp slt i32 5, %17 %19 = sext i1 %18 to i32 %20 = bitcast i32 %19 to float %21 = bitcast float %20 to i32 %22 = icmp ne i32 %21, 0 br i1 %22, label %LOOP, label %ENDIF20 ENDIF20: ; preds = %ENDIF %23 = fadd float %temp4.0.ph, 0x3FB99999A0000000 br label %LOOP.outer } *** IR Dump Before Preliminary module verification *** define void @main() { main_body: br label %Flow LOOP.outer: ; preds = %Flow %temp4.0.ph = phi float [ %2, %Flow ] %temp.0.ph = phi float [ %3, %Flow ] br label %Flow1 Flow: ; preds = %Flow2, %main_body %0 = phi i64 [ %44, %Flow2 ], [ 0, %main_body ] %1 = phi float [ %10, %Flow2 ], [ undef, %main_body ] %2 = phi float [ %39, %Flow2 ], [ 0.000000e+00, %main_body ] %3 = phi float [ %40, %Flow2 ], [ 0.000000e+00, %main_body ] %4 = phi float [ %41, %Flow2 ], [ undef, %main_body ] %5 = phi i1 [ %42, %Flow2 ], [ false, %main_body ] %6 = phi i1 [ %43, %Flow2 ], [ true, %main_body ] %7 = call { i1, i64 } @llvm.SI.if(i1 %6) %8 = extractvalue { i1, i64 } %7, 0 %9 = extractvalue { i1, i64 } %7, 1 br i1 %8, label %LOOP.outer, label %Flow1 Flow1: ; preds = %LOOP.outer, %Flow %10 = phi float [ %temp4.0.ph, %LOOP.outer ], [ %1, %Flow ] %11 = phi float [ undef, %LOOP.outer ], [ %2, %Flow ] %12 = phi float [ undef, %LOOP.outer ], [ %3, %Flow ] %13 = phi float [ %temp.0.ph, %LOOP.outer ], [ %4, %Flow ] %14 = phi i1 [ true, %LOOP.outer ], [ %5, %Flow ] call void @llvm.SI.end.cf(i64 %9) %15 = call { i1, i64 } @llvm.SI.if(i1 %14) %16 = extractvalue { i1, i64 } %15, 0 %17 = extractvalue { i1, i64 } %15, 1 br i1 %16, label %LOOP, label %Flow2 LOOP: ; preds = %Flow1 %temp.0 = phi float [ %13, %Flow1 ] %18 = bitcast float %temp.0 to i32 %19 = icmp sge i32 %18, 20 %20 = sext i1 %19 to i32 %21 = bitcast i32 %20 to float %22 = bitcast float %21 to i32 %23 = icmp ne i32 %22, 0 %24 = xor i1 %23, true %25 = call { i1, i64 } @llvm.SI.if(i1 %24) %26 = extractvalue { i1, i64 } %25, 0 %27 = extractvalue { i1, i64 } %25, 1 br i1 %26, label %ENDIF, label %Flow3 Flow4: ; preds = %ENDIF20, %ENDIF %28 = phi float [ %68, %ENDIF20 ], [ %11, %ENDIF ] %29 = phi float [ %48, %ENDIF20 ], [ %12, %ENDIF ] call void @llvm.SI.end.cf(i64 %59) br label %Flow3 IF: ; preds = %Flow2 call void @llvm.SI.end.cf(i64 %44) %30 = call float @llvm.AMDIL.clamp.(float %10, float 0.000000e+00, float 1.000000e+00) %31 = call float @llvm.AMDIL.clamp.(float %10, float 0.000000e+00, float 1.000000e+00) %32 = call float @llvm.AMDIL.clamp.(float %10, float 0.000000e+00, float 1.000000e+00) %33 = call float @llvm.AMDIL.clamp.(float %10, float 0.000000e+00, float 1.000000e+00) %34 = insertelement <4 x float> undef, float %30, i32 0 %35 = insertelement <4 x float> %34, float %31, i32 1 %36 = insertelement <4 x float> %35, float %32, i32 2 %37 = insertelement <4 x float> %36, float %33, i32 3 call void @llvm.R600.store.swizzle(<4 x float> %37, i32 0, i32 0) ret void Flow2: ; preds = %Flow3, %Flow1 %38 = phi i64 [ %67, %Flow3 ], [ %0, %Flow1 ] %39 = phi float [ %62, %Flow3 ], [ %11, %Flow1 ] %40 = phi float [ %63, %Flow3 ], [ %12, %Flow1 ] %41 = phi float [ %64, %Flow3 ], [ %13, %Flow1 ] %42 = phi i1 [ %65, %Flow3 ], [ false, %Flow1 ] %43 = phi i1 [ %66, %Flow3 ], [ false, %Flow1 ] %44 = call i64 @llvm.SI.else.break(i64 %17, i64 %38) call void @llvm.SI.end.cf(i64 %17) %45 = call i1 @llvm.SI.loop(i64 %44) br i1 %45, label %IF, label %Flow ENDIF: ; preds = %LOOP %46 = bitcast float %temp.0 to i32 %47 = add i32 %46, 1 %48 = bitcast i32 %47 to float %49 = bitcast float %48 to i32 %50 = icmp slt i32 5, %49 %51 = sext i1 %50 to i32 %52 = bitcast i32 %51 to float %53 = bitcast float %52 to i32 %54 = icmp ne i32 %53, 0 %55 = xor i1 %54, true %56 = xor i1 %54, true %57 = call { i1, i64 } @llvm.SI.if(i1 %56) %58 = extractvalue { i1, i64 } %57, 0 %59 = extractvalue { i1, i64 } %57, 1 %60 = call i64 @llvm.SI.if.break(i1 %55, i64 %0) br i1 %58, label %ENDIF20, label %Flow4 Flow3: ; preds = %Flow4, %LOOP %61 = phi i64 [ %60, %Flow4 ], [ %0, %LOOP ] %62 = phi float [ %28, %Flow4 ], [ %11, %LOOP ] %63 = phi float [ %29, %Flow4 ], [ %12, %LOOP ] %64 = phi float [ %48, %Flow4 ], [ undef, %LOOP ] %65 = phi i1 [ %54, %Flow4 ], [ false, %LOOP ] %66 = phi i1 [ %55, %Flow4 ], [ false, %LOOP ] %67 = call i64 @llvm.SI.else.break(i64 %27, i64 %61) call void @llvm.SI.end.cf(i64 %27) br label %Flow2 ENDIF20: ; preds = %ENDIF %68 = fadd float %10, 0x3FB99999A0000000 br label %Flow4 }
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev