Author: Sushant Gokhale Date: 2025-01-16T22:38:18-08:00 New Revision: b2a717940822088ee09c017465a041b50b1dde4f
URL: https://github.com/llvm/llvm-project/commit/b2a717940822088ee09c017465a041b50b1dde4f DIFF: https://github.com/llvm/llvm-project/commit/b2a717940822088ee09c017465a041b50b1dde4f.diff LOG: Revert "[InstCombine] Transform high latency, dependent FSQRT/FDIV into FMUL …" This reverts commit 7253c6fde498c4c9470b681df47d46e6930d6a02. Added: Modified: llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp Removed: llvm/test/Transforms/InstCombine/fsqrtdiv-transform.ll ################################################################################ diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index b6acde9bdd1104..d0b2ded127ff73 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -13,7 +13,6 @@ #include "InstCombineInternal.h" #include "llvm/ADT/APInt.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" @@ -658,94 +657,6 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) { return nullptr; } -// If we have the following pattern, -// X = 1.0/sqrt(a) -// R1 = X * X -// R2 = a/sqrt(a) -// then this method collects all the instructions that match R1 and R2. -static bool getFSqrtDivOptPattern(Instruction *Div, - SmallPtrSetImpl<Instruction *> &R1, - SmallPtrSetImpl<Instruction *> &R2) { - Value *A; - if (match(Div, m_FDiv(m_FPOne(), m_Sqrt(m_Value(A)))) || - match(Div, m_FDiv(m_SpecificFP(-1.0), m_Sqrt(m_Value(A))))) { - for (User *U : Div->users()) { - Instruction *I = cast<Instruction>(U); - if (match(I, m_FMul(m_Specific(Div), m_Specific(Div)))) - R1.insert(I); - } - - CallInst *CI = cast<CallInst>(Div->getOperand(1)); - for (User *U : CI->users()) { - Instruction *I = cast<Instruction>(U); - if (match(I, m_FDiv(m_Specific(A), m_Sqrt(m_Specific(A))))) - R2.insert(I); - } - } - return !R1.empty() && !R2.empty(); -} - -// Check legality for transforming -// x = 1.0/sqrt(a) -// r1 = x * x; -// r2 = a/sqrt(a); -// -// TO -// -// r1 = 1/a -// r2 = sqrt(a) -// x = r1 * r2 -// This transform works only when 'a' is known positive. -static bool isFSqrtDivToFMulLegal(Instruction *X, - SmallPtrSetImpl<Instruction *> &R1, - SmallPtrSetImpl<Instruction *> &R2) { - // Check if the required pattern for the transformation exists. - if (!getFSqrtDivOptPattern(X, R1, R2)) - return false; - - BasicBlock *BBx = X->getParent(); - BasicBlock *BBr1 = (*R1.begin())->getParent(); - BasicBlock *BBr2 = (*R2.begin())->getParent(); - - CallInst *FSqrt = cast<CallInst>(X->getOperand(1)); - if (!FSqrt->hasAllowReassoc() || !FSqrt->hasNoNaNs() || - !FSqrt->hasNoSignedZeros() || !FSqrt->hasNoInfs()) - return false; - - // We change x = 1/sqrt(a) to x = sqrt(a) * 1/a . This change isn't allowed - // by recip fp as it is strictly meant to transform ops of type a/b to - // a * 1/b. So, this can be considered as algebraic rewrite and reassoc flag - // has been used(rather abused)in the past for algebraic rewrites. - if (!X->hasAllowReassoc() || !X->hasAllowReciprocal() || !X->hasNoInfs()) - return false; - - // Check the constraints on X, R1 and R2 combined. - // fdiv instruction and one of the multiplications must reside in the same - // block. If not, the optimized code may execute more ops than before and - // this may hamper the performance. - if (BBx != BBr1 && BBx != BBr2) - return false; - - // Check the constraints on instructions in R1. - if (any_of(R1, [BBr1](Instruction *I) { - // When you have multiple instructions residing in R1 and R2 - // respectively, it's diff icult to generate combinations of (R1,R2) and - // then check if we have the required pattern. So, for now, just be - // conservative. - return (I->getParent() != BBr1 || !I->hasAllowReassoc()); - })) - return false; - - // Check the constraints on instructions in R2. - return all_of(R2, [BBr2](Instruction *I) { - // When you have multiple instructions residing in R1 and R2 - // respectively, it's diff icult to generate combination of (R1,R2) and - // then check if we have the required pattern. So, for now, just be - // conservative. - return (I->getParent() == BBr2 && I->hasAllowReassoc()); - }); -} - Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) { Value *Op0 = I.getOperand(0); Value *Op1 = I.getOperand(1); @@ -2002,75 +1913,6 @@ static Instruction *foldFDivSqrtDivisor(BinaryOperator &I, return BinaryOperator::CreateFMulFMF(Op0, NewSqrt, &I); } -// Change -// X = 1/sqrt(a) -// R1 = X * X -// R2 = a * X -// -// TO -// -// FDiv = 1/a -// FSqrt = sqrt(a) -// FMul = FDiv * FSqrt -// Replace Uses Of R1 With FDiv -// Replace Uses Of R2 With FSqrt -// Replace Uses Of X With FMul -static Instruction * -convertFSqrtDivIntoFMul(CallInst *CI, Instruction *X, - const SmallPtrSetImpl<Instruction *> &R1, - const SmallPtrSetImpl<Instruction *> &R2, - InstCombiner::BuilderTy &B, InstCombinerImpl *IC) { - - B.SetInsertPoint(X); - - // Have an instruction that is representative of all of instructions in R1 and - // get the most common fpmath metadata and fast-math flags on it. - Value *SqrtOp = CI->getArgOperand(0); - auto *FDiv = cast<Instruction>( - B.CreateFDiv(ConstantFP::get(X->getType(), 1.0), SqrtOp)); - auto *R1FPMathMDNode = (*R1.begin())->getMetadata(LLVMContext::MD_fpmath); - FastMathFlags R1FMF = (*R1.begin())->getFastMathFlags(); // Common FMF - for (Instruction *I : R1) { - R1FPMathMDNode = MDNode::getMostGenericFPMath( - R1FPMathMDNode, I->getMetadata(LLVMContext::MD_fpmath)); - R1FMF &= I->getFastMathFlags(); - IC->replaceInstUsesWith(*I, FDiv); - IC->eraseInstFromFunction(*I); - } - FDiv->setMetadata(LLVMContext::MD_fpmath, R1FPMathMDNode); - FDiv->copyFastMathFlags(R1FMF); - - // Have a single sqrt call instruction that is representative of all of - // instructions in R2 and get the most common fpmath metadata and fast-math - // flags on it. - auto *FSqrt = cast<CallInst>(CI->clone()); - FSqrt->insertBefore(CI); - auto *R2FPMathMDNode = (*R2.begin())->getMetadata(LLVMContext::MD_fpmath); - FastMathFlags R2FMF = (*R2.begin())->getFastMathFlags(); // Common FMF - for (Instruction *I : R2) { - R2FPMathMDNode = MDNode::getMostGenericFPMath( - R2FPMathMDNode, I->getMetadata(LLVMContext::MD_fpmath)); - R2FMF &= I->getFastMathFlags(); - IC->replaceInstUsesWith(*I, FSqrt); - IC->eraseInstFromFunction(*I); - } - FSqrt->setMetadata(LLVMContext::MD_fpmath, R2FPMathMDNode); - FSqrt->copyFastMathFlags(R2FMF); - - Instruction *FMul; - // If X = -1/sqrt(a) initially,then FMul = -(FDiv * FSqrt) - if (match(X, m_FDiv(m_SpecificFP(-1.0), m_Specific(CI)))) { - Value *Mul = B.CreateFMul(FDiv, FSqrt); - FMul = cast<Instruction>(B.CreateFNeg(Mul)); - } else - FMul = cast<Instruction>(B.CreateFMul(FDiv, FSqrt)); - FMul->copyMetadata(*X); - FMul->copyFastMathFlags(FastMathFlags::intersectRewrite(R1FMF, R2FMF) | - FastMathFlags::unionValue(R1FMF, R2FMF)); - IC->replaceInstUsesWith(*X, FMul); - return IC->eraseInstFromFunction(*X); -} - Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { Module *M = I.getModule(); @@ -2095,24 +1937,6 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { return R; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - // Convert - // x = 1.0/sqrt(a) - // r1 = x * x; - // r2 = a/sqrt(a); - // - // TO - // - // r1 = 1/a - // r2 = sqrt(a) - // x = r1 * r2 - SmallPtrSet<Instruction *, 2> R1, R2; - if (isFSqrtDivToFMulLegal(&I, R1, R2)) { - CallInst *CI = cast<CallInst>(I.getOperand(1)); - if (Instruction *D = convertFSqrtDivIntoFMul(CI, &I, R1, R2, Builder, this)) - return D; - } - if (isa<Constant>(Op0)) if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) if (Instruction *R = FoldOpIntoSelect(I, SI)) diff --git a/llvm/test/Transforms/InstCombine/fsqrtdiv-transform.ll b/llvm/test/Transforms/InstCombine/fsqrtdiv-transform.ll deleted file mode 100644 index 6296954333e8a7..00000000000000 --- a/llvm/test/Transforms/InstCombine/fsqrtdiv-transform.ll +++ /dev/null @@ -1,631 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; RUN: opt -S -passes='instcombine<no-verify-fixpoint>' < %s | FileCheck %s - -@x = global double 0.000000e+00 -@r1 = global double 0.000000e+00 -@r2 = global double 0.000000e+00 -@r3 = global double 0.000000e+00 -@v = global [2 x double] zeroinitializer -@v1 = global [2 x double] zeroinitializer -@v2 = global [2 x double] zeroinitializer - -; div/mul/div1 in the same block. -define void @bb_constraint_case1(double %a) { -; CHECK-LABEL: define void @bb_constraint_case1( -; CHECK-SAME: double [[A:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]) -; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]] -; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]] -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8 -; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt - store double %div, ptr @x - %mul = fmul reassoc double %div, %div - store double %mul, ptr @r1 - %div1 = fdiv reassoc double %a, %sqrt - store double %div1, ptr @r2 - ret void -} - -; div/mul in one block and div1 in other block with conditional guard. -define void @bb_constraint_case2(double %a, i32 %d) { -; CHECK-LABEL: define void @bb_constraint_case2( -; CHECK-SAME: double [[A:%.*]], i32 [[D:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]) -; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]] -; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]] -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8 -; CHECK-NEXT: [[D_NOT:%.*]] = icmp eq i32 [[D]], 0 -; CHECK-NEXT: br i1 [[D_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt - store double %div, ptr @x - %mul = fmul reassoc double %div, %div - store double %mul, ptr @r1 - %d.not = icmp eq i32 %d, 0 - br i1 %d.not, label %if.end, label %if.then - -if.then: ; preds = %entry - %div1 = fdiv reassoc double %a, %sqrt - store double %div1, ptr @r2 - br label %if.end - -if.end: ; preds = %if.then, %entry - ret void -} - -; div in one block. mul/div1 in other block and conditionally guarded. Don't optimize. -define void @bb_constraint_case3(double %a, i32 %d) { -; CHECK-LABEL: define void @bb_constraint_case3( -; CHECK-SAME: double [[A:%.*]], i32 [[D:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) -; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]] -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: [[D_NOT:%.*]] = icmp eq i32 [[D]], 0 -; CHECK-NEXT: br i1 [[D_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]] -; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 -; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]] -; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt - store double %div, ptr @x - %d.not = icmp eq i32 %d, 0 - br i1 %d.not, label %if.end, label %if.then - -if.then: ; preds = %entry - %mul = fmul reassoc double %div, %div - store double %mul, ptr @r1 - %div1 = fdiv reassoc double %a, %sqrt - store double %div1, ptr @r2 - br label %if.end - -if.end: ; preds = %if.then, %entry - ret void -} - -; div in one block. mul/div1 each in diff erent block and conditionally guarded. Don't optimize. -define void @bb_constraint_case4(double %a, i32 %c, i32 %d) { -; CHECK-LABEL: define void @bb_constraint_case4( -; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) -; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]] -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq i32 [[C]], 0 -; CHECK-NEXT: br i1 [[C_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]] -; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: -; CHECK-NEXT: [[D_NOT:%.*]] = icmp eq i32 [[D]], 0 -; CHECK-NEXT: br i1 [[D_NOT]], label [[IF_END1:%.*]], label [[IF_THEN1:%.*]] -; CHECK: if.then1: -; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]] -; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 -; CHECK-NEXT: br label [[IF_END1]] -; CHECK: if.end1: -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt - store double %div, ptr @x - %c.not = icmp eq i32 %c, 0 - br i1 %c.not, label %if.end, label %if.then - -if.then: ; preds = %entry - %mul = fmul reassoc double %div, %div - store double %mul, ptr @r1 - br label %if.end - -if.end: ; preds = %if.then, %entry - %d.not = icmp eq i32 %d, 0 - br i1 %d.not, label %if.end1, label %if.then1 - -if.then1: ; preds = %if.end - %div1 = fdiv reassoc double %a, %sqrt - store double %div1, ptr @r2 - br label %if.end1 - -if.end1: ; preds = %if.then1, %if.end - ret void -} - -; sqrt value comes from diff erent blocks. Don't optimize. -define void @bb_constraint_case5(double %a, i32 %c) { -; CHECK-LABEL: define void @bb_constraint_case5( -; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq i32 [[C]], 0 -; CHECK-NEXT: br i1 [[C_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[TMP0:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) -; CHECK-NEXT: br label [[IF_END:%.*]] -; CHECK: if.else: -; CHECK-NEXT: [[ADD:%.*]] = fadd double [[A]], 1.000000e+01 -; CHECK-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[ADD]]) -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: -; CHECK-NEXT: [[SQRT:%.*]] = phi double [ [[TMP0]], [[IF_THEN]] ], [ [[TMP1]], [[IF_ELSE]] ] -; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]] -; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]] -; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 -; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]] -; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 -; CHECK-NEXT: ret void -; -entry: - %c.not = icmp eq i32 %c, 0 - br i1 %c.not, label %if.else, label %if.then - -if.then: ; preds = %entry - %0 = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - br label %if.end - -if.else: ; preds = %entry - %add = fadd double %a, 1.000000e+01 - %1 = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %add) - br label %if.end - -if.end: ; preds = %if.else, %if.then - %sqrt = phi double[ %0, %if.then], [ %1, %if.else] - %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt - %mul = fmul reassoc double %div, %div - store double %mul, ptr @r1 - %div1 = fdiv reassoc double %a, %sqrt - store double %div1, ptr @r2 - ret void -} - -; div in one block and conditionally guarded. mul/div1 in other block. Don't optimize. -define void @bb_constraint_case6(double %a, i32 %d) { -; CHECK-LABEL: define void @bb_constraint_case6( -; CHECK-SAME: double [[A:%.*]], i32 [[D:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) -; CHECK-NEXT: [[D_NOT:%.*]] = icmp eq i32 [[D]], 0 -; CHECK-NEXT: br i1 [[D_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.else: -; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @x, align 8 -; CHECK-NEXT: br label [[IF_END:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]] -; CHECK-NEXT: store double [[TMP1]], ptr @x, align 8 -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: -; CHECK-NEXT: [[DIV:%.*]] = phi double [ [[TMP0]], [[IF_ELSE]] ], [ [[TMP1]], [[IF_THEN]] ] -; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]] -; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 -; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]] -; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - %d.not = icmp eq i32 %d, 0 - br i1 %d.not, label %if.else, label %if.then - -if.else: ; preds = %entry - %1 = load double, ptr @x - br label %if.end - -if.then: ; preds = %entry - %2 = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt - store double %2, ptr @x - br label %if.end - -if.end: ; preds = %if.else, %if.then - %div = phi double [ %1, %if.else ], [ %2, %if.then ] - %mul = fmul reassoc double %div, %div - store double %mul, ptr @r1 - %div1 = fdiv reassoc double %a, %sqrt - store double %div1, ptr @r2 - ret void -} - -; value for mul comes from diff erent blocks. Don't optimize. -define void @bb_constraint_case7(double %a, i32 %c, i32 %d) { -; CHECK-LABEL: define void @bb_constraint_case7( -; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) -; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]] -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq i32 [[C]], 0 -; CHECK-NEXT: br i1 [[C_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] -; CHECK: if.then: -; CHECK-NEXT: [[TMP0:%.*]] = fdiv double 3.000000e+00, [[A]] -; CHECK-NEXT: br label [[IF_END:%.*]] -; CHECK: if.else: -; CHECK-NEXT: [[D_NOT:%.*]] = icmp eq i32 [[D]], 0 -; CHECK-NEXT: br i1 [[D_NOT]], label [[IF_ELSE1:%.*]], label [[IF_THEN1:%.*]] -; CHECK: if.then1: -; CHECK-NEXT: [[TMP1:%.*]] = fdiv double 2.000000e+00, [[A]] -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.else1: -; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc double [[DIV]], [[DIV]] -; CHECK-NEXT: br label [[IF_END]] -; CHECK: if.end: -; CHECK-NEXT: [[MUL:%.*]] = phi double [ [[TMP1]], [[IF_THEN1]] ], [ [[TMP2]], [[IF_ELSE1]] ], [ [[TMP0]], [[IF_THEN]] ] -; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 -; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]] -; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt - store double %div, ptr @x - %c.not = icmp eq i32 %c, 0 - br i1 %c.not, label %if.else, label %if.then - -if.then: ; preds = %entry - %1 = fdiv double 3.000000e+00, %a - br label %if.end - -if.else: ; preds = %entry - %d.not = icmp eq i32 %d, 0 - br i1 %d.not, label %if.else1, label %if.then1 - -if.then1: ; preds = %if.else - %2 = fdiv double 2.000000e+00, %a - br label %if.end - -if.else1: ; preds = %if.else - %3 = fmul reassoc double %div, %div - br label %if.end - -if.end: ; preds = %if.then1, %if.else1, %if.then - %mul = phi double [ %2, %if.then1 ], [ %3, %if.else1 ], [ %1, %if.then ] - store double %mul, ptr @r1 - %div1 = fdiv reassoc double %a, %sqrt - store double %div1, ptr @r2 - ret void -} - -; value of mul comes from two diff erent blocks(as shown by select ins). -define void @bb_constraint_case8(double %a, i32 %c) { -; CHECK-LABEL: define void @bb_constraint_case8( -; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]) -; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]] -; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]] -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq i32 [[C]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = fmul double [[A]], [[A]] -; CHECK-NEXT: [[MUL:%.*]] = select i1 [[C_NOT]], double [[TMP1]], double [[TMP0]] -; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 -; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt - store double %div, ptr @x - %c.not = icmp eq i32 %c, 0 - %1 = fmul double %a, %a - %2 = fmul reassoc double %div, %div - %mul = select i1 %c.not, double %1, double %2 - store double %mul, ptr @r1 - %div1 = fdiv reassoc double %a, %sqrt - store double %div1, ptr @r2 - ret void -} - -; multiple instances of multiply ops to optimize. Optimize all. -define void @mutiple_multiply_instances(double %a, i32 %c) { -; CHECK-LABEL: define void @mutiple_multiply_instances( -; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]) -; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc double 1.000000e+00, [[A]] -; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP1]], [[SQRT1]] -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq i32 [[C]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = fmul double [[A]], [[A]] -; CHECK-NEXT: [[TMP3:%.*]] = fmul double [[A]], [[A]] -; CHECK-NEXT: [[MUL1:%.*]] = select i1 [[C_NOT]], double [[TMP2]], double [[TMP1]] -; CHECK-NEXT: [[MUL2:%.*]] = select i1 [[C_NOT]], double [[TMP1]], double [[TMP3]] -; CHECK-NEXT: store double [[MUL1]], ptr @r1, align 8 -; CHECK-NEXT: store double [[MUL2]], ptr @r3, align 8 -; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt - store double %div, ptr @x - %c.not = icmp eq i32 %c, 0 - %1 = fmul double %a, %a - %2 = fmul double %a, %a - %3 = fmul reassoc double %div, %div - %4 = fmul reassoc double %div, %div - %mul1 = select i1 %c.not, double %1, double %3 - %mul2 = select i1 %c.not, double %4, double %2 - store double %mul1, ptr @r1 - store double %mul2, ptr @r3 - %div1 = fdiv reassoc double %a, %sqrt - store double %div1, ptr @r2 - ret void -} - -; missing flags for optimization. -define void @missing_arcp_flag_on_div(double %a) { -; CHECK-LABEL: define void @missing_arcp_flag_on_div( -; CHECK-SAME: double [[A:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) -; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf double 1.000000e+00, [[SQRT]] -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]] -; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 -; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]] -; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - %div = fdiv reassoc ninf double 1.000000e+00, %sqrt - store double %div, ptr @x - %mul = fmul reassoc double %div, %div - store double %mul, ptr @r1 - %div1 = fdiv reassoc double %a, %sqrt - store double %div1, ptr @r2 - ret void -} - -; missing flags for optimization. -define void @missing_reassoc_flag_on_mul(double %a) { -; CHECK-LABEL: define void @missing_reassoc_flag_on_mul( -; CHECK-SAME: double [[A:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) -; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]] -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: [[MUL:%.*]] = fmul double [[DIV]], [[DIV]] -; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 -; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]] -; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt - store double %div, ptr @x - %mul = fmul double %div, %div - store double %mul, ptr @r1 - %div1 = fdiv reassoc double %a, %sqrt - store double %div1, ptr @r2 - ret void -} - -; missing flags for optimization. -define void @missing_reassoc_flag_on_div1(double %a) { -; CHECK-LABEL: define void @missing_reassoc_flag_on_div1( -; CHECK-SAME: double [[A:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) -; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]] -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]] -; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 -; CHECK-NEXT: [[DIV1:%.*]] = fdiv double [[A]], [[SQRT]] -; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt - store double %div, ptr @x - %mul = fmul reassoc double %div, %div - store double %mul, ptr @r1 - %div1 = fdiv double %a, %sqrt - store double %div1, ptr @r2 - ret void -} - -; div = -1/sqrt(a) -define void @negative_fdiv_val(double %a) { -; CHECK-LABEL: define void @negative_fdiv_val( -; CHECK-SAME: double [[A:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]) -; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]] -; CHECK-NEXT: [[TMP1:%.*]] = fneg reassoc double [[SQRT1]] -; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[TMP1]] -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8 -; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - %div = fdiv reassoc arcp ninf double -1.000000e+00, %sqrt - store double %div, ptr @x - %mul = fmul reassoc double %div, %div - store double %mul, ptr @r1 - %div1 = fdiv reassoc double %a, %sqrt - store double %div1, ptr @r2 - ret void -} - -define void @fpmath_metadata_on_div1(double %a) { -; CHECK-LABEL: define void @fpmath_metadata_on_div1( -; CHECK-SAME: double [[A:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]), !fpmath [[META0:![0-9]+]] -; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]] -; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]] -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8 -; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt - store double %div, ptr @x - %mul = fmul reassoc double %div, %div - store double %mul, ptr @r1 - %div1 = fdiv reassoc double %a, %sqrt, !fpmath !3 - store double %div1, ptr @r2 - ret void -} - -define void @fpmath_metadata_on_mul(double %a) { -; CHECK-LABEL: define void @fpmath_metadata_on_mul( -; CHECK-SAME: double [[A:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]) -; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]], !fpmath [[META1:![0-9]+]] -; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]] -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8 -; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt - store double %div, ptr @x - %mul = fmul reassoc double %div, %div, !fpmath !2 - store double %mul, ptr @r1 - %div1 = fdiv reassoc double %a, %sqrt - store double %div1, ptr @r2 - ret void -} - -; FIXME: DIV in the result should get the fpmath metadata from %div. -define void @fpmath_metadata_on_div(double %a) { -; CHECK-LABEL: define void @fpmath_metadata_on_div( -; CHECK-SAME: double [[A:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]) -; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]] -; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]], !fpmath [[META2:![0-9]+]] -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8 -; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) - %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt, !fpmath !1 - store double %div, ptr @x - %mul = fmul reassoc double %div, %div - store double %mul, ptr @r1 - %div1 = fdiv reassoc double %a, %sqrt - store double %div1, ptr @r2 - ret void -} - -define void @fpmath_metadata_on_all(double %a) { -; CHECK-LABEL: define void @fpmath_metadata_on_all( -; CHECK-SAME: double [[A:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]), !fpmath [[META0]] -; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]], !fpmath [[META1]] -; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]], !fpmath [[META2]] -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8 -; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a), !fpmath !0 - %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt, !fpmath !1 - store double %div, ptr @x - %mul = fmul reassoc double %div, %div, !fpmath !2 - store double %mul, ptr @r1 - %div1 = fdiv reassoc double %a, %sqrt, !fpmath !3 - store double %div1, ptr @r2 - ret void -} - -define void @vector_input(<2 x double> %a) { -; CHECK-LABEL: define void @vector_input( -; CHECK-SAME: <2 x double> [[A:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc <2 x double> @llvm.sqrt.v2f64(<2 x double> [[A]]) -; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc <2 x double> splat (double 1.000000e+00), [[A]] -; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc <2 x double> [[TMP0]], [[SQRT1]] -; CHECK-NEXT: store <2 x double> [[DIV]], ptr @v, align 16 -; CHECK-NEXT: store <2 x double> [[TMP0]], ptr @v1, align 16 -; CHECK-NEXT: store <2 x double> [[SQRT1]], ptr @v2, align 16 -; CHECK-NEXT: ret void -; -entry: - %sqrt = call reassoc nnan nsz ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) - %div = fdiv reassoc arcp ninf <2 x double><double 1.000000e+00, double 1.000000e+00>, %sqrt - store <2 x double> %div, ptr @v - %mul = fmul reassoc <2 x double> %div, %div - store <2 x double> %mul, ptr @v1 - %div1 = fdiv reassoc <2 x double> %a, %sqrt - store <2 x double> %div1, ptr @v2 - ret void -} - -define void @strict_fp_metadata(double %a) { -; CHECK-LABEL: define void @strict_fp_metadata( -; CHECK-SAME: double [[A:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CONV:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 1, metadata !"round.dynamic", metadata !"fpexcept.strict") -; CHECK-NEXT: [[CALL:%.*]] = call double @llvm.sqrt.f64(double noundef [[A]]) -; CHECK-NEXT: [[DIV:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[CONV]], double [[CALL]], metadata !"round.dynamic", metadata !"fpexcept.strict") -; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 -; CHECK-NEXT: [[MUL:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[DIV]], double [[DIV]], metadata !"round.dynamic", metadata !"fpexcept.strict") -; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 -; CHECK-NEXT: [[DIV2:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[A]], double [[CALL]], metadata !"round.dynamic", metadata !"fpexcept.strict") -; CHECK-NEXT: store double [[DIV2]], ptr @r2, align 8 -; CHECK-NEXT: ret void -; -entry: - %conv = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 1, metadata !"round.dynamic", metadata !"fpexcept.strict") - %call = call double @llvm.sqrt.f64(double noundef %a) - %div = call double @llvm.experimental.constrained.fdiv.f64(double %conv, double %call, metadata !"round.dynamic", metadata !"fpexcept.strict") - store double %div, ptr @x - %mul = call double @llvm.experimental.constrained.fmul.f64(double %div, double %div, metadata !"round.dynamic", metadata !"fpexcept.strict") - store double %mul, ptr @r1 - %div2 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %call, metadata !"round.dynamic", metadata !"fpexcept.strict") - store double %div2, ptr @r2 - ret void -} - -declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) -declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) -declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) -declare double @llvm.sqrt.f64(double) -declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) - -!0 = !{float 2.5} -!1 = !{float 3.5} -!2 = !{float 4.5} -!3 = !{float 5.5} -; CHECK: [[META0]] = !{float 5.500000e+00} -; CHECK: [[META1]] = !{float 4.500000e+00} -; CHECK: [[META2]] = !{float 3.500000e+00} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits