@@ -2181,8 +2181,8 @@ class LSRInstance {
21812181 SmallSetVector<Instruction *, 4 > InsertedNonLCSSAInsts;
21822182
21832183 void OptimizeShadowIV ();
2184- bool FindIVUserForCond (ICmpInst *Cond, IVStrideUse *&CondUse);
2185- ICmpInst *OptimizeMax (ICmpInst *Cond, IVStrideUse* &CondUse);
2184+ bool FindIVUserForCond (Instruction *Cond, IVStrideUse *&CondUse);
2185+ Instruction *OptimizeMax (ICmpInst *Cond, IVStrideUse * &CondUse);
21862186 void OptimizeLoopTermCond ();
21872187
21882188 void ChainInstruction (Instruction *UserInst, Instruction *IVOper,
@@ -2416,7 +2416,7 @@ void LSRInstance::OptimizeShadowIV() {
24162416
24172417// / If Cond has an operand that is an expression of an IV, set the IV user and
24182418// / stride information and return true, otherwise return false.
2419- bool LSRInstance::FindIVUserForCond (ICmpInst *Cond, IVStrideUse *&CondUse) {
2419+ bool LSRInstance::FindIVUserForCond (Instruction *Cond, IVStrideUse *&CondUse) {
24202420 for (IVStrideUse &U : IU)
24212421 if (U.getUser () == Cond) {
24222422 // NOTE: we could handle setcc instructions with multiple uses here, but
@@ -2476,7 +2476,7 @@ bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
24762476// / This function solves this problem by detecting this type of loop and
24772477// / rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
24782478// / the instructions for the maximum computation.
2479- ICmpInst *LSRInstance::OptimizeMax (ICmpInst *Cond, IVStrideUse* &CondUse) {
2479+ Instruction *LSRInstance::OptimizeMax (ICmpInst *Cond, IVStrideUse * &CondUse) {
24802480 // Check that the loop matches the pattern we're looking for.
24812481 if (Cond->getPredicate () != CmpInst::ICMP_EQ &&
24822482 Cond->getPredicate () != CmpInst::ICMP_NE)
@@ -2620,15 +2620,34 @@ LSRInstance::OptimizeLoopTermCond() {
26202620 // one register value.
26212621
26222622 BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator ());
2623- if (!TermBr)
2623+ if (!TermBr || TermBr-> isUnconditional () )
26242624 continue ;
2625- // FIXME: Overly conservative, termination condition could be an 'or' etc..
2626- if (TermBr->isUnconditional () || !isa<ICmpInst>(TermBr->getCondition ()))
2625+
2626+ Instruction *Cond = dyn_cast<Instruction>(TermBr->getCondition ());
2627+ bool CondImmediatelyBeforeTerm = Cond && Cond->getNextNode () == TermBr;
2628+ // If the argument to TermBr is an extractelement, then the source of that
2629+ // instruction is what's generated the condition.
2630+ auto *Extract = dyn_cast_or_null<ExtractElementInst>(Cond);
2631+ if (Extract) {
2632+ Cond = dyn_cast<Instruction>(Extract->getVectorOperand ());
2633+ if (Cond && CondImmediatelyBeforeTerm)
2634+ CondImmediatelyBeforeTerm = Cond->getNextNode () == Extract;
2635+ }
2636+ // FIXME: We could do more here, like handling logical operations where one
2637+ // side is a cmp that uses an induction variable.
2638+ if (!Cond)
2639+ continue ;
2640+
2641+ // If the condition instruction isn't immediately before TermBr then it has
2642+ // to either be a CmpInst, or be immediately before an extract that's
2643+ // immediately before TermBr, as currently we can only move or clone a
2644+ // CmpInst.
2645+ // FIXME: We should be able to do this when it's safe to do so.
2646+ if ((!isa<CmpInst>(Cond) || Extract) && !CondImmediatelyBeforeTerm)
26272647 continue ;
26282648
26292649 // Search IVUsesByStride to find Cond's IVUse if there is one.
26302650 IVStrideUse *CondUse = nullptr ;
2631- ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition ());
26322651 if (!FindIVUserForCond (Cond, CondUse))
26332652 continue ;
26342653
@@ -2638,7 +2657,8 @@ LSRInstance::OptimizeLoopTermCond() {
26382657 // One consequence of doing this now is that it disrupts the count-down
26392658 // optimization. That's not always a bad thing though, because in such
26402659 // cases it may still be worthwhile to avoid a max.
2641- Cond = OptimizeMax (Cond, CondUse);
2660+ if (auto *Cmp = dyn_cast<ICmpInst>(Cond))
2661+ Cond = OptimizeMax (Cmp, CondUse);
26422662
26432663 // If this exiting block dominates the latch block, it may also use
26442664 // the post-inc value if it won't be shared with other uses.
@@ -2703,13 +2723,13 @@ LSRInstance::OptimizeLoopTermCond() {
27032723 // It's possible for the setcc instruction to be anywhere in the loop, and
27042724 // possible for it to have multiple users. If it is not immediately before
27052725 // the exiting block branch, move it.
2706- if (Cond-> getNextNode () != TermBr ) {
2726+ if (!CondImmediatelyBeforeTerm ) {
27072727 if (Cond->hasOneUse ()) {
27082728 Cond->moveBefore (TermBr->getIterator ());
27092729 } else {
27102730 // Clone the terminating condition and insert into the loopend.
2711- ICmpInst *OldCond = Cond;
2712- Cond = cast<ICmpInst>( Cond->clone () );
2731+ Instruction *OldCond = Cond;
2732+ Cond = Cond->clone ();
27132733 Cond->setName (L->getHeader ()->getName () + " .termcond" );
27142734 Cond->insertInto (ExitingBlock, TermBr->getIterator ());
27152735
0 commit comments