Author: Sushant Gokhale Date: 2025-01-17T02:04:01-08:00 New Revision: ba06cf81896f7a5ea8d025c1b26af7ea4a47dc53
URL: https://github.com/llvm/llvm-project/commit/ba06cf81896f7a5ea8d025c1b26af7ea4a47dc53 DIFF: https://github.com/llvm/llvm-project/commit/ba06cf81896f7a5ea8d025c1b26af7ea4a47dc53.diff LOG: Revert "Revert "[InstCombine] Transform high latency, dependent FSQRT/FDIV in…" This reverts commit 606d0a7cdc0c551df754eb4494a2c16861b6a9b9. Added: llvm/test/Transforms/InstCombine/fsqrtdiv-transform.ll Modified: llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp Removed: ################################################################################ diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index d0b2ded127ff73..b6acde9bdd1104 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -13,6 +13,7 @@ #include "InstCombineInternal.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ValueTracking.h" @@ -657,6 +658,94 @@ Instruction *InstCombinerImpl::foldPowiReassoc(BinaryOperator &I) { return nullptr; } +// If we have the following pattern, +// X = 1.0/sqrt(a) +// R1 = X * X +// R2 = a/sqrt(a) +// then this method collects all the instructions that match R1 and R2. +static bool getFSqrtDivOptPattern(Instruction *Div, + SmallPtrSetImpl<Instruction *> &R1, + SmallPtrSetImpl<Instruction *> &R2) { + Value *A; + if (match(Div, m_FDiv(m_FPOne(), m_Sqrt(m_Value(A)))) || + match(Div, m_FDiv(m_SpecificFP(-1.0), m_Sqrt(m_Value(A))))) { + for (User *U : Div->users()) { + Instruction *I = cast<Instruction>(U); + if (match(I, m_FMul(m_Specific(Div), m_Specific(Div)))) + R1.insert(I); + } + + CallInst *CI = cast<CallInst>(Div->getOperand(1)); + for (User *U : CI->users()) { + Instruction *I = cast<Instruction>(U); + if (match(I, m_FDiv(m_Specific(A), m_Sqrt(m_Specific(A))))) + R2.insert(I); + } + } + return !R1.empty() && !R2.empty(); +} + +// Check legality for transforming +// x = 1.0/sqrt(a) +// r1 = x * x; +// r2 = a/sqrt(a); +// +// TO +// +// r1 = 1/a +// r2 = sqrt(a) +// x = r1 * r2 +// This transform works only when 'a' is known positive. +static bool isFSqrtDivToFMulLegal(Instruction *X, + SmallPtrSetImpl<Instruction *> &R1, + SmallPtrSetImpl<Instruction *> &R2) { + // Check if the required pattern for the transformation exists. + if (!getFSqrtDivOptPattern(X, R1, R2)) + return false; + + BasicBlock *BBx = X->getParent(); + BasicBlock *BBr1 = (*R1.begin())->getParent(); + BasicBlock *BBr2 = (*R2.begin())->getParent(); + + CallInst *FSqrt = cast<CallInst>(X->getOperand(1)); + if (!FSqrt->hasAllowReassoc() || !FSqrt->hasNoNaNs() || + !FSqrt->hasNoSignedZeros() || !FSqrt->hasNoInfs()) + return false; + + // We change x = 1/sqrt(a) to x = sqrt(a) * 1/a . This change isn't allowed + // by recip fp as it is strictly meant to transform ops of type a/b to + // a * 1/b. So, this can be considered as algebraic rewrite and reassoc flag + // has been used(rather abused)in the past for algebraic rewrites. + if (!X->hasAllowReassoc() || !X->hasAllowReciprocal() || !X->hasNoInfs()) + return false; + + // Check the constraints on X, R1 and R2 combined. + // fdiv instruction and one of the multiplications must reside in the same + // block. If not, the optimized code may execute more ops than before and + // this may hamper the performance. + if (BBx != BBr1 && BBx != BBr2) + return false; + + // Check the constraints on instructions in R1. + if (any_of(R1, [BBr1](Instruction *I) { + // When you have multiple instructions residing in R1 and R2 + // respectively, it's diff icult to generate combinations of (R1,R2) and + // then check if we have the required pattern. So, for now, just be + // conservative. + return (I->getParent() != BBr1 || !I->hasAllowReassoc()); + })) + return false; + + // Check the constraints on instructions in R2. + return all_of(R2, [BBr2](Instruction *I) { + // When you have multiple instructions residing in R1 and R2 + // respectively, it's diff icult to generate combination of (R1,R2) and + // then check if we have the required pattern. So, for now, just be + // conservative. + return (I->getParent() == BBr2 && I->hasAllowReassoc()); + }); +} + Instruction *InstCombinerImpl::foldFMulReassoc(BinaryOperator &I) { Value *Op0 = I.getOperand(0); Value *Op1 = I.getOperand(1); @@ -1913,6 +2002,75 @@ static Instruction *foldFDivSqrtDivisor(BinaryOperator &I, return BinaryOperator::CreateFMulFMF(Op0, NewSqrt, &I); } +// Change +// X = 1/sqrt(a) +// R1 = X * X +// R2 = a * X +// +// TO +// +// FDiv = 1/a +// FSqrt = sqrt(a) +// FMul = FDiv * FSqrt +// Replace Uses Of R1 With FDiv +// Replace Uses Of R2 With FSqrt +// Replace Uses Of X With FMul +static Instruction * +convertFSqrtDivIntoFMul(CallInst *CI, Instruction *X, + const SmallPtrSetImpl<Instruction *> &R1, + const SmallPtrSetImpl<Instruction *> &R2, + InstCombiner::BuilderTy &B, InstCombinerImpl *IC) { + + B.SetInsertPoint(X); + + // Have an instruction that is representative of all of instructions in R1 and + // get the most common fpmath metadata and fast-math flags on it. + Value *SqrtOp = CI->getArgOperand(0); + auto *FDiv = cast<Instruction>( + B.CreateFDiv(ConstantFP::get(X->getType(), 1.0), SqrtOp)); + auto *R1FPMathMDNode = (*R1.begin())->getMetadata(LLVMContext::MD_fpmath); + FastMathFlags R1FMF = (*R1.begin())->getFastMathFlags(); // Common FMF + for (Instruction *I : R1) { + R1FPMathMDNode = MDNode::getMostGenericFPMath( + R1FPMathMDNode, I->getMetadata(LLVMContext::MD_fpmath)); + R1FMF &= I->getFastMathFlags(); + IC->replaceInstUsesWith(*I, FDiv); + IC->eraseInstFromFunction(*I); + } + FDiv->setMetadata(LLVMContext::MD_fpmath, R1FPMathMDNode); + FDiv->copyFastMathFlags(R1FMF); + + // Have a single sqrt call instruction that is representative of all of + // instructions in R2 and get the most common fpmath metadata and fast-math + // flags on it. + auto *FSqrt = cast<CallInst>(CI->clone()); + FSqrt->insertBefore(CI); + auto *R2FPMathMDNode = (*R2.begin())->getMetadata(LLVMContext::MD_fpmath); + FastMathFlags R2FMF = (*R2.begin())->getFastMathFlags(); // Common FMF + for (Instruction *I : R2) { + R2FPMathMDNode = MDNode::getMostGenericFPMath( + R2FPMathMDNode, I->getMetadata(LLVMContext::MD_fpmath)); + R2FMF &= I->getFastMathFlags(); + IC->replaceInstUsesWith(*I, FSqrt); + IC->eraseInstFromFunction(*I); + } + FSqrt->setMetadata(LLVMContext::MD_fpmath, R2FPMathMDNode); + FSqrt->copyFastMathFlags(R2FMF); + + Instruction *FMul; + // If X = -1/sqrt(a) initially,then FMul = -(FDiv * FSqrt) + if (match(X, m_FDiv(m_SpecificFP(-1.0), m_Specific(CI)))) { + Value *Mul = B.CreateFMul(FDiv, FSqrt); + FMul = cast<Instruction>(B.CreateFNeg(Mul)); + } else + FMul = cast<Instruction>(B.CreateFMul(FDiv, FSqrt)); + FMul->copyMetadata(*X); + FMul->copyFastMathFlags(FastMathFlags::intersectRewrite(R1FMF, R2FMF) | + FastMathFlags::unionValue(R1FMF, R2FMF)); + IC->replaceInstUsesWith(*X, FMul); + return IC->eraseInstFromFunction(*X); +} + Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { Module *M = I.getModule(); @@ -1937,6 +2095,24 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) { return R; Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // Convert + // x = 1.0/sqrt(a) + // r1 = x * x; + // r2 = a/sqrt(a); + // + // TO + // + // r1 = 1/a + // r2 = sqrt(a) + // x = r1 * r2 + SmallPtrSet<Instruction *, 2> R1, R2; + if (isFSqrtDivToFMulLegal(&I, R1, R2)) { + CallInst *CI = cast<CallInst>(I.getOperand(1)); + if (Instruction *D = convertFSqrtDivIntoFMul(CI, &I, R1, R2, Builder, this)) + return D; + } + if (isa<Constant>(Op0)) if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) if (Instruction *R = FoldOpIntoSelect(I, SI)) diff --git a/llvm/test/Transforms/InstCombine/fsqrtdiv-transform.ll b/llvm/test/Transforms/InstCombine/fsqrtdiv-transform.ll new file mode 100644 index 00000000000000..6296954333e8a7 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/fsqrtdiv-transform.ll @@ -0,0 +1,631 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -passes='instcombine<no-verify-fixpoint>' < %s | FileCheck %s + +@x = global double 0.000000e+00 +@r1 = global double 0.000000e+00 +@r2 = global double 0.000000e+00 +@r3 = global double 0.000000e+00 +@v = global [2 x double] zeroinitializer +@v1 = global [2 x double] zeroinitializer +@v2 = global [2 x double] zeroinitializer + +; div/mul/div1 in the same block. +define void @bb_constraint_case1(double %a) { +; CHECK-LABEL: define void @bb_constraint_case1( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]) +; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]] +; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]] +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8 +; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt + store double %div, ptr @x + %mul = fmul reassoc double %div, %div + store double %mul, ptr @r1 + %div1 = fdiv reassoc double %a, %sqrt + store double %div1, ptr @r2 + ret void +} + +; div/mul in one block and div1 in other block with conditional guard. +define void @bb_constraint_case2(double %a, i32 %d) { +; CHECK-LABEL: define void @bb_constraint_case2( +; CHECK-SAME: double [[A:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]) +; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]] +; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]] +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8 +; CHECK-NEXT: [[D_NOT:%.*]] = icmp eq i32 [[D]], 0 +; CHECK-NEXT: br i1 [[D_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt + store double %div, ptr @x + %mul = fmul reassoc double %div, %div + store double %mul, ptr @r1 + %d.not = icmp eq i32 %d, 0 + br i1 %d.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %div1 = fdiv reassoc double %a, %sqrt + store double %div1, ptr @r2 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; div in one block. mul/div1 in other block and conditionally guarded. Don't optimize. +define void @bb_constraint_case3(double %a, i32 %d) { +; CHECK-LABEL: define void @bb_constraint_case3( +; CHECK-SAME: double [[A:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) +; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]] +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: [[D_NOT:%.*]] = icmp eq i32 [[D]], 0 +; CHECK-NEXT: br i1 [[D_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]] +; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 +; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]] +; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt + store double %div, ptr @x + %d.not = icmp eq i32 %d, 0 + br i1 %d.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %mul = fmul reassoc double %div, %div + store double %mul, ptr @r1 + %div1 = fdiv reassoc double %a, %sqrt + store double %div1, ptr @r2 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +; div in one block. mul/div1 each in diff erent block and conditionally guarded. Don't optimize. +define void @bb_constraint_case4(double %a, i32 %c, i32 %d) { +; CHECK-LABEL: define void @bb_constraint_case4( +; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) +; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]] +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq i32 [[C]], 0 +; CHECK-NEXT: br i1 [[C_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]] +; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[D_NOT:%.*]] = icmp eq i32 [[D]], 0 +; CHECK-NEXT: br i1 [[D_NOT]], label [[IF_END1:%.*]], label [[IF_THEN1:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]] +; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 +; CHECK-NEXT: br label [[IF_END1]] +; CHECK: if.end1: +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt + store double %div, ptr @x + %c.not = icmp eq i32 %c, 0 + br i1 %c.not, label %if.end, label %if.then + +if.then: ; preds = %entry + %mul = fmul reassoc double %div, %div + store double %mul, ptr @r1 + br label %if.end + +if.end: ; preds = %if.then, %entry + %d.not = icmp eq i32 %d, 0 + br i1 %d.not, label %if.end1, label %if.then1 + +if.then1: ; preds = %if.end + %div1 = fdiv reassoc double %a, %sqrt + store double %div1, ptr @r2 + br label %if.end1 + +if.end1: ; preds = %if.then1, %if.end + ret void +} + +; sqrt value comes from diff erent blocks. Don't optimize. +define void @bb_constraint_case5(double %a, i32 %c) { +; CHECK-LABEL: define void @bb_constraint_case5( +; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq i32 [[C]], 0 +; CHECK-NEXT: br i1 [[C_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP0:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[ADD:%.*]] = fadd double [[A]], 1.000000e+01 +; CHECK-NEXT: [[TMP1:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[ADD]]) +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[SQRT:%.*]] = phi double [ [[TMP0]], [[IF_THEN]] ], [ [[TMP1]], [[IF_ELSE]] ] +; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]] +; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]] +; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 +; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]] +; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 +; CHECK-NEXT: ret void +; +entry: + %c.not = icmp eq i32 %c, 0 + br i1 %c.not, label %if.else, label %if.then + +if.then: ; preds = %entry + %0 = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + br label %if.end + +if.else: ; preds = %entry + %add = fadd double %a, 1.000000e+01 + %1 = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %add) + br label %if.end + +if.end: ; preds = %if.else, %if.then + %sqrt = phi double[ %0, %if.then], [ %1, %if.else] + %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt + %mul = fmul reassoc double %div, %div + store double %mul, ptr @r1 + %div1 = fdiv reassoc double %a, %sqrt + store double %div1, ptr @r2 + ret void +} + +; div in one block and conditionally guarded. mul/div1 in other block. Don't optimize. +define void @bb_constraint_case6(double %a, i32 %d) { +; CHECK-LABEL: define void @bb_constraint_case6( +; CHECK-SAME: double [[A:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) +; CHECK-NEXT: [[D_NOT:%.*]] = icmp eq i32 [[D]], 0 +; CHECK-NEXT: br i1 [[D_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr @x, align 8 +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]] +; CHECK-NEXT: store double [[TMP1]], ptr @x, align 8 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[DIV:%.*]] = phi double [ [[TMP0]], [[IF_ELSE]] ], [ [[TMP1]], [[IF_THEN]] ] +; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]] +; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 +; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]] +; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + %d.not = icmp eq i32 %d, 0 + br i1 %d.not, label %if.else, label %if.then + +if.else: ; preds = %entry + %1 = load double, ptr @x + br label %if.end + +if.then: ; preds = %entry + %2 = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt + store double %2, ptr @x + br label %if.end + +if.end: ; preds = %if.else, %if.then + %div = phi double [ %1, %if.else ], [ %2, %if.then ] + %mul = fmul reassoc double %div, %div + store double %mul, ptr @r1 + %div1 = fdiv reassoc double %a, %sqrt + store double %div1, ptr @r2 + ret void +} + +; value for mul comes from diff erent blocks. Don't optimize. +define void @bb_constraint_case7(double %a, i32 %c, i32 %d) { +; CHECK-LABEL: define void @bb_constraint_case7( +; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]], i32 [[D:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) +; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]] +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq i32 [[C]], 0 +; CHECK-NEXT: br i1 [[C_NOT]], label [[IF_ELSE:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TMP0:%.*]] = fdiv double 3.000000e+00, [[A]] +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: [[D_NOT:%.*]] = icmp eq i32 [[D]], 0 +; CHECK-NEXT: br i1 [[D_NOT]], label [[IF_ELSE1:%.*]], label [[IF_THEN1:%.*]] +; CHECK: if.then1: +; CHECK-NEXT: [[TMP1:%.*]] = fdiv double 2.000000e+00, [[A]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.else1: +; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc double [[DIV]], [[DIV]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[MUL:%.*]] = phi double [ [[TMP1]], [[IF_THEN1]] ], [ [[TMP2]], [[IF_ELSE1]] ], [ [[TMP0]], [[IF_THEN]] ] +; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 +; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]] +; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt + store double %div, ptr @x + %c.not = icmp eq i32 %c, 0 + br i1 %c.not, label %if.else, label %if.then + +if.then: ; preds = %entry + %1 = fdiv double 3.000000e+00, %a + br label %if.end + +if.else: ; preds = %entry + %d.not = icmp eq i32 %d, 0 + br i1 %d.not, label %if.else1, label %if.then1 + +if.then1: ; preds = %if.else + %2 = fdiv double 2.000000e+00, %a + br label %if.end + +if.else1: ; preds = %if.else + %3 = fmul reassoc double %div, %div + br label %if.end + +if.end: ; preds = %if.then1, %if.else1, %if.then + %mul = phi double [ %2, %if.then1 ], [ %3, %if.else1 ], [ %1, %if.then ] + store double %mul, ptr @r1 + %div1 = fdiv reassoc double %a, %sqrt + store double %div1, ptr @r2 + ret void +} + +; value of mul comes from two diff erent blocks(as shown by select ins). +define void @bb_constraint_case8(double %a, i32 %c) { +; CHECK-LABEL: define void @bb_constraint_case8( +; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]) +; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]] +; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]] +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq i32 [[C]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = fmul double [[A]], [[A]] +; CHECK-NEXT: [[MUL:%.*]] = select i1 [[C_NOT]], double [[TMP1]], double [[TMP0]] +; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 +; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt + store double %div, ptr @x + %c.not = icmp eq i32 %c, 0 + %1 = fmul double %a, %a + %2 = fmul reassoc double %div, %div + %mul = select i1 %c.not, double %1, double %2 + store double %mul, ptr @r1 + %div1 = fdiv reassoc double %a, %sqrt + store double %div1, ptr @r2 + ret void +} + +; multiple instances of multiply ops to optimize. Optimize all. +define void @mutiple_multiply_instances(double %a, i32 %c) { +; CHECK-LABEL: define void @mutiple_multiply_instances( +; CHECK-SAME: double [[A:%.*]], i32 [[C:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]) +; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc double 1.000000e+00, [[A]] +; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP1]], [[SQRT1]] +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: [[C_NOT:%.*]] = icmp eq i32 [[C]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = fmul double [[A]], [[A]] +; CHECK-NEXT: [[TMP3:%.*]] = fmul double [[A]], [[A]] +; CHECK-NEXT: [[MUL1:%.*]] = select i1 [[C_NOT]], double [[TMP2]], double [[TMP1]] +; CHECK-NEXT: [[MUL2:%.*]] = select i1 [[C_NOT]], double [[TMP1]], double [[TMP3]] +; CHECK-NEXT: store double [[MUL1]], ptr @r1, align 8 +; CHECK-NEXT: store double [[MUL2]], ptr @r3, align 8 +; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt + store double %div, ptr @x + %c.not = icmp eq i32 %c, 0 + %1 = fmul double %a, %a + %2 = fmul double %a, %a + %3 = fmul reassoc double %div, %div + %4 = fmul reassoc double %div, %div + %mul1 = select i1 %c.not, double %1, double %3 + %mul2 = select i1 %c.not, double %4, double %2 + store double %mul1, ptr @r1 + store double %mul2, ptr @r3 + %div1 = fdiv reassoc double %a, %sqrt + store double %div1, ptr @r2 + ret void +} + +; missing flags for optimization. +define void @missing_arcp_flag_on_div(double %a) { +; CHECK-LABEL: define void @missing_arcp_flag_on_div( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) +; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf double 1.000000e+00, [[SQRT]] +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]] +; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 +; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]] +; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + %div = fdiv reassoc ninf double 1.000000e+00, %sqrt + store double %div, ptr @x + %mul = fmul reassoc double %div, %div + store double %mul, ptr @r1 + %div1 = fdiv reassoc double %a, %sqrt + store double %div1, ptr @r2 + ret void +} + +; missing flags for optimization. +define void @missing_reassoc_flag_on_mul(double %a) { +; CHECK-LABEL: define void @missing_reassoc_flag_on_mul( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) +; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]] +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: [[MUL:%.*]] = fmul double [[DIV]], [[DIV]] +; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 +; CHECK-NEXT: [[DIV1:%.*]] = fdiv reassoc double [[A]], [[SQRT]] +; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt + store double %div, ptr @x + %mul = fmul double %div, %div + store double %mul, ptr @r1 + %div1 = fdiv reassoc double %a, %sqrt + store double %div1, ptr @r2 + ret void +} + +; missing flags for optimization. +define void @missing_reassoc_flag_on_div1(double %a) { +; CHECK-LABEL: define void @missing_reassoc_flag_on_div1( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT:%.*]] = call reassoc nnan ninf nsz double @llvm.sqrt.f64(double [[A]]) +; CHECK-NEXT: [[DIV:%.*]] = fdiv reassoc ninf arcp double 1.000000e+00, [[SQRT]] +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: [[MUL:%.*]] = fmul reassoc double [[DIV]], [[DIV]] +; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 +; CHECK-NEXT: [[DIV1:%.*]] = fdiv double [[A]], [[SQRT]] +; CHECK-NEXT: store double [[DIV1]], ptr @r2, align 8 +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt + store double %div, ptr @x + %mul = fmul reassoc double %div, %div + store double %mul, ptr @r1 + %div1 = fdiv double %a, %sqrt + store double %div1, ptr @r2 + ret void +} + +; div = -1/sqrt(a) +define void @negative_fdiv_val(double %a) { +; CHECK-LABEL: define void @negative_fdiv_val( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]) +; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]] +; CHECK-NEXT: [[TMP1:%.*]] = fneg reassoc double [[SQRT1]] +; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[TMP1]] +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8 +; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + %div = fdiv reassoc arcp ninf double -1.000000e+00, %sqrt + store double %div, ptr @x + %mul = fmul reassoc double %div, %div + store double %mul, ptr @r1 + %div1 = fdiv reassoc double %a, %sqrt + store double %div1, ptr @r2 + ret void +} + +define void @fpmath_metadata_on_div1(double %a) { +; CHECK-LABEL: define void @fpmath_metadata_on_div1( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]), !fpmath [[META0:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]] +; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]] +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8 +; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt + store double %div, ptr @x + %mul = fmul reassoc double %div, %div + store double %mul, ptr @r1 + %div1 = fdiv reassoc double %a, %sqrt, !fpmath !3 + store double %div1, ptr @r2 + ret void +} + +define void @fpmath_metadata_on_mul(double %a) { +; CHECK-LABEL: define void @fpmath_metadata_on_mul( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]) +; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]], !fpmath [[META1:![0-9]+]] +; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]] +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8 +; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt + store double %div, ptr @x + %mul = fmul reassoc double %div, %div, !fpmath !2 + store double %mul, ptr @r1 + %div1 = fdiv reassoc double %a, %sqrt + store double %div1, ptr @r2 + ret void +} + +; FIXME: DIV in the result should get the fpmath metadata from %div. +define void @fpmath_metadata_on_div(double %a) { +; CHECK-LABEL: define void @fpmath_metadata_on_div( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]) +; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]] +; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]], !fpmath [[META2:![0-9]+]] +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8 +; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a) + %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt, !fpmath !1 + store double %div, ptr @x + %mul = fmul reassoc double %div, %div + store double %mul, ptr @r1 + %div1 = fdiv reassoc double %a, %sqrt + store double %div1, ptr @r2 + ret void +} + +define void @fpmath_metadata_on_all(double %a) { +; CHECK-LABEL: define void @fpmath_metadata_on_all( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc double @llvm.sqrt.f64(double [[A]]), !fpmath [[META0]] +; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc double 1.000000e+00, [[A]], !fpmath [[META1]] +; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc double [[TMP0]], [[SQRT1]], !fpmath [[META2]] +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: store double [[TMP0]], ptr @r1, align 8 +; CHECK-NEXT: store double [[SQRT1]], ptr @r2, align 8 +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf double @llvm.sqrt.f64(double %a), !fpmath !0 + %div = fdiv reassoc arcp ninf double 1.000000e+00, %sqrt, !fpmath !1 + store double %div, ptr @x + %mul = fmul reassoc double %div, %div, !fpmath !2 + store double %mul, ptr @r1 + %div1 = fdiv reassoc double %a, %sqrt, !fpmath !3 + store double %div1, ptr @r2 + ret void +} + +define void @vector_input(<2 x double> %a) { +; CHECK-LABEL: define void @vector_input( +; CHECK-SAME: <2 x double> [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SQRT1:%.*]] = call reassoc <2 x double> @llvm.sqrt.v2f64(<2 x double> [[A]]) +; CHECK-NEXT: [[TMP0:%.*]] = fdiv reassoc <2 x double> splat (double 1.000000e+00), [[A]] +; CHECK-NEXT: [[DIV:%.*]] = fmul reassoc <2 x double> [[TMP0]], [[SQRT1]] +; CHECK-NEXT: store <2 x double> [[DIV]], ptr @v, align 16 +; CHECK-NEXT: store <2 x double> [[TMP0]], ptr @v1, align 16 +; CHECK-NEXT: store <2 x double> [[SQRT1]], ptr @v2, align 16 +; CHECK-NEXT: ret void +; +entry: + %sqrt = call reassoc nnan nsz ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> %a) + %div = fdiv reassoc arcp ninf <2 x double><double 1.000000e+00, double 1.000000e+00>, %sqrt + store <2 x double> %div, ptr @v + %mul = fmul reassoc <2 x double> %div, %div + store <2 x double> %mul, ptr @v1 + %div1 = fdiv reassoc <2 x double> %a, %sqrt + store <2 x double> %div1, ptr @v2 + ret void +} + +define void @strict_fp_metadata(double %a) { +; CHECK-LABEL: define void @strict_fp_metadata( +; CHECK-SAME: double [[A:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV:%.*]] = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 1, metadata !"round.dynamic", metadata !"fpexcept.strict") +; CHECK-NEXT: [[CALL:%.*]] = call double @llvm.sqrt.f64(double noundef [[A]]) +; CHECK-NEXT: [[DIV:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[CONV]], double [[CALL]], metadata !"round.dynamic", metadata !"fpexcept.strict") +; CHECK-NEXT: store double [[DIV]], ptr @x, align 8 +; CHECK-NEXT: [[MUL:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[DIV]], double [[DIV]], metadata !"round.dynamic", metadata !"fpexcept.strict") +; CHECK-NEXT: store double [[MUL]], ptr @r1, align 8 +; CHECK-NEXT: [[DIV2:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[A]], double [[CALL]], metadata !"round.dynamic", metadata !"fpexcept.strict") +; CHECK-NEXT: store double [[DIV2]], ptr @r2, align 8 +; CHECK-NEXT: ret void +; +entry: + %conv = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 1, metadata !"round.dynamic", metadata !"fpexcept.strict") + %call = call double @llvm.sqrt.f64(double noundef %a) + %div = call double @llvm.experimental.constrained.fdiv.f64(double %conv, double %call, metadata !"round.dynamic", metadata !"fpexcept.strict") + store double %div, ptr @x + %mul = call double @llvm.experimental.constrained.fmul.f64(double %div, double %div, metadata !"round.dynamic", metadata !"fpexcept.strict") + store double %mul, ptr @r1 + %div2 = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %call, metadata !"round.dynamic", metadata !"fpexcept.strict") + store double %div2, ptr @r2 + ret void +} + +declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) +declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata) +declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata) +declare double @llvm.sqrt.f64(double) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) + +!0 = !{float 2.5} +!1 = !{float 3.5} +!2 = !{float 4.5} +!3 = !{float 5.5} +; CHECK: [[META0]] = !{float 5.500000e+00} +; CHECK: [[META1]] = !{float 4.500000e+00} +; CHECK: [[META2]] = !{float 3.500000e+00} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits