wristow created this revision. wristow added reviewers: spatel, mcberg2017. Herald added subscribers: jsji, hiraditya, nemanjai. Herald added a project: LLVM.
Fused Multiply Add (FMA) was not always being disabled when the switch `-ffp-contract=off` was used. More specifically, FMA is enabled when `-ffp-contract=fast` is used, and it also is enabled implicitly with `-ffast-math`. The combination: -ffast-math -ffp-contract=off is intended to leave most of fast-math enabled (for example, leave reassociation, reciprocal transformations, etc.) enabled, but disable the use of FMA. However, FMA was incorrectly left enabled with the above switch combination. This commit fixes this, allowing users to enable most of the fast-math optimizations, while disabling the FMA feature. https://reviews.llvm.org/D72675 Files: clang/lib/Driver/ToolChains/Clang.cpp clang/test/Driver/fast-math.c llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/test/CodeGen/PowerPC/fmf-propagation.ll llvm/test/CodeGen/X86/fp-contract.ll
Index: llvm/test/CodeGen/X86/fp-contract.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/fp-contract.ll @@ -0,0 +1,204 @@ +; Tests for -ffp-contract/-ffast-math interaction. +; Specifically, -ffp-contract=off must suppress the use of FMA. + +; RUN: llc < %s -mcpu=haswell | FileCheck %s --check-prefix=FMA + +; Scalar versions: + +define float @MulAddPlain(float %a, float %b, float %c) { +; FMA-LABEL: MulAddPlain: +; FMA: vmulss +; FMA-NEXT: vaddss +; FMA-NEXT: ret + %mul = fmul float %a, %b + %add = fadd float %mul, %c + ret float %add +} + +define float @MulAddFast(float %a, float %b, float %c) { +; FMA-LABEL: MulAddFast: +; FMA: vfmadd213ss +; FMA-NEXT: ret + %mul = fmul fast float %a, %b + %add = fadd fast float %mul, %c + ret float %add +} + +define float @MulAddContract(float %a, float %b, float %c) { +; FMA-LABEL: MulAddContract: +; FMA: vfmadd213ss +; FMA-NEXT: ret + %mul = fmul contract float %a, %b + %add = fadd contract float %mul, %c + ret float %add +} + +; Enabling all the fast-math-flags except 'contract' does not enable fused operations. +define float @MulAddFastNoContract(float %a, float %b, float %c) { +; FMA-LABEL: MulAddFastNoContract: +; FMA: vmulss +; FMA-NEXT: vaddss +; FMA-NEXT: ret + %mul = fmul nnan ninf nsz arcp afn reassoc float %a, %b + %add = fadd nnan ninf nsz arcp afn reassoc float %mul, %c + ret float %add +} + +define float @MulAddReassoc(float %a, float %b, float %c) { +; FMA-LABEL: MulAddReassoc: +; FMA: vmulss +; FMA-NEXT: vaddss +; FMA-NEXT: ret + %mul = fmul reassoc float %a, %b + %add = fadd reassoc float %mul, %c + ret float %add +} + +define float @MulSubPlain(float %a, float %b, float %c) { +; FMA-LABEL: MulSubPlain: +; FMA: vmulss +; FMA-NEXT: vsubss +; FMA-NEXT: ret + %mul = fmul float %a, %b + %sub = fsub float %mul, %c + ret float %sub +} + +define float @MulSubFast(float %a, float %b, float %c) { +; FMA-LABEL: MulSubFast: +; FMA: vfmsub213ss +; FMA-NEXT: ret + %mul = fmul fast float %a, %b + %sub = fsub fast float %mul, %c + ret float %sub +} + +define float @MulSubContract(float %a, float %b, float %c) { +; FMA-LABEL: MulSubContract: +; FMA: vfmsub213ss +; FMA-NEXT: ret + %mul = fmul contract float %a, %b + %sub = fsub contract float %mul, %c + ret float %sub +} + +; Enabling all the fast-math-flags except 'contract' does not enable fused operations. +define float @MulSubFastNoContract(float %a, float %b, float %c) { +; FMA-LABEL: MulSubFastNoContract: +; FMA: vmulss +; FMA-NEXT: vsubss +; FMA-NEXT: ret + %mul = fmul nnan ninf nsz arcp afn reassoc float %a, %b + %sub = fsub nnan ninf nsz arcp afn reassoc float %mul, %c + ret float %sub +} + +define float @MulSubReassoc(float %a, float %b, float %c) { +; FMA-LABEL: MulSubReassoc: +; FMA: vmulss +; FMA-NEXT: vsubss +; FMA-NEXT: ret + %mul = fmul reassoc float %a, %b + %sub = fsub reassoc float %mul, %c + ret float %sub +} + +; Vector versions: + +define <4 x float> @VecMulAddPlain(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulAddPlain: +; FMA: vmulps +; FMA-NEXT: vaddps +; FMA-NEXT: ret + %mul = fmul <4 x float> %a, %b + %add = fadd <4 x float> %mul, %c + ret <4 x float> %add +} + +define <4 x float> @VecMulAddFast(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulAddFast: +; FMA: vfmadd213ps +; FMA-NEXT: ret + %mul = fmul fast <4 x float> %a, %b + %add = fadd fast <4 x float> %mul, %c + ret <4 x float> %add +} + +define <4 x float> @VecMulAddContract(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulAddContract: +; FMA: vfmadd213ps +; FMA-NEXT: ret + %mul = fmul contract <4 x float> %a, %b + %add = fadd contract <4 x float> %mul, %c + ret <4 x float> %add +} + +; Enabling all the fast-math-flags except 'contract' does not enable fused operations. +define <4 x float> @VecMulAddFastNoContract(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulAddFastNoContract: +; FMA: vmulps +; FMA-NEXT: vaddps +; FMA-NEXT: ret + %mul = fmul nnan ninf nsz arcp afn reassoc <4 x float> %a, %b + %add = fadd nnan ninf nsz arcp afn reassoc <4 x float> %mul, %c + ret <4 x float> %add +} + +define <4 x float> @VecMulAddReassoc(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulAddReassoc: +; FMA: vmulps +; FMA-NEXT: vaddps +; FMA-NEXT: ret + %mul = fmul reassoc <4 x float> %a, %b + %add = fadd reassoc <4 x float> %mul, %c + ret <4 x float> %add +} + +define <4 x float> @VecMulSubPlain(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulSubPlain: +; FMA: vmulps +; FMA-NEXT: vsubps +; FMA-NEXT: ret + %mul = fmul <4 x float> %a, %b + %sub = fsub <4 x float> %mul, %c + ret <4 x float> %sub +} + +define <4 x float> @VecMulSubFast(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulSubFast: +; FMA: vfmsub213ps +; FMA-NEXT: ret + %mul = fmul fast <4 x float> %a, %b + %sub = fsub fast <4 x float> %mul, %c + ret <4 x float> %sub +} + +define <4 x float> @VecMulSubContract(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulSubContract: +; FMA: vfmsub213ps +; FMA-NEXT: ret + %mul = fmul contract <4 x float> %a, %b + %sub = fsub contract <4 x float> %mul, %c + ret <4 x float> %sub +} + +; Enabling all the fast-math-flags except 'contract' does not enable fused operations. +define <4 x float> @VecMulSubFastNoContract(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulSubFastNoContract: +; FMA: vmulps +; FMA-NEXT: vsubps +; FMA-NEXT: ret + %mul = fmul nnan ninf nsz arcp afn reassoc <4 x float> %a, %b + %sub = fsub nnan ninf nsz arcp afn reassoc <4 x float> %mul, %c + ret <4 x float> %sub +} + +define <4 x float> @VecMulSubReassoc(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; FMA-LABEL: VecMulSubReassoc: +; FMA: vmulps +; FMA-NEXT: vsubps +; FMA-NEXT: ret + %mul = fmul reassoc <4 x float> %a, %b + %sub = fsub reassoc <4 x float> %mul, %c + ret <4 x float> %sub +} Index: llvm/test/CodeGen/PowerPC/fmf-propagation.ll =================================================================== --- llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -58,17 +58,19 @@ ret float %add } -; Reassociation implies that FMA contraction is allowed. +; On the FMF test, reassociation alone does _not_ imply that FMA contraction is +; allowed (in particular, we need to be able to disable FMA even when +; reassociation is enabled). ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:' -; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG: fadd reassoc {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:' define float @fmul_fadd_reassoc1(float %x, float %y, float %z) { ; FMF-LABEL: fmul_fadd_reassoc1: ; FMF: # %bb.0: -; FMF-NEXT: xsmaddasp 3, 1, 2 -; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: xsmulsp 0, 1, 2 +; FMF-NEXT: xsaddsp 1, 0, 3 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fadd_reassoc1: @@ -84,14 +86,14 @@ ; This shouldn't change anything - the intermediate fmul result is now also flagged. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:' -; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG: fadd reassoc {{t[0-9]+}}, {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:' define float @fmul_fadd_reassoc2(float %x, float %y, float %z) { ; FMF-LABEL: fmul_fadd_reassoc2: ; FMF: # %bb.0: -; FMF-NEXT: xsmaddasp 3, 1, 2 -; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: xsmulsp 0, 1, 2 +; FMF-NEXT: xsaddsp 1, 0, 3 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fadd_reassoc2: @@ -104,6 +106,52 @@ ret float %add } +; Reassociation applied with contract enables FMA contraction (of course). + +; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract_reassoc1:' +; FMFDEBUG: fma contract reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract_reassoc1:' + +define float @fmul_fadd_contract_reassoc1(float %x, float %y, float %z) { +; FMF-LABEL: fmul_fadd_contract_reassoc1: +; FMF: # %bb.0: +; FMF-NEXT: xsmaddasp 3, 1, 2 +; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: fmul_fadd_contract_reassoc1: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: xsmaddasp 3, 1, 2 +; GLOBAL-NEXT: fmr 1, 3 +; GLOBAL-NEXT: blr + %mul = fmul contract float %x, %y + %add = fadd contract reassoc float %mul, %z + ret float %add +} + +; This shouldn't change anything - the intermediate fmul result is now also flagged. + +; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract_reassoc2:' +; FMFDEBUG: fma contract reassoc {{t[0-9]+}}, {{t[0-9]+}} +; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract_reassoc2:' + +define float @fmul_fadd_contract_reassoc2(float %x, float %y, float %z) { +; FMF-LABEL: fmul_fadd_contract_reassoc2: +; FMF: # %bb.0: +; FMF-NEXT: xsmaddasp 3, 1, 2 +; FMF-NEXT: fmr 1, 3 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: fmul_fadd_contract_reassoc2: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: xsmaddasp 3, 1, 2 +; GLOBAL-NEXT: fmr 1, 3 +; GLOBAL-NEXT: blr + %mul = fmul contract reassoc float %x, %y + %add = fadd contract reassoc float %mul, %z + ret float %add +} + ; The fadd is now fully 'fast'. This implies that contraction is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:' @@ -122,7 +170,7 @@ ; GLOBAL-NEXT: xsmaddasp 3, 1, 2 ; GLOBAL-NEXT: fmr 1, 3 ; GLOBAL-NEXT: blr - %mul = fmul fast float %x, %y + %mul = fmul float %x, %y %add = fadd fast float %mul, %z ret float %add } @@ -151,10 +199,69 @@ } ; fma(X, 7.0, X * 42.0) --> X * 49.0 -; This is the minimum FMF needed for this transform - the FMA allows reassociation. +; This is the minimum FMF needed for this transform - the 'contract' allows the needed reassociation. + +; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract1:' +; FMFDEBUG: fmul contract {{t[0-9]+}}, +; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract1:' + +; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract1:' +; GLOBALDEBUG: fmul contract {{t[0-9]+}} +; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract1:' + +define float @fmul_fma_contract1(float %x) { +; FMF-LABEL: fmul_fma_contract1: +; FMF: # %bb.0: +; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI8_0@toc@l(3) +; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: fmul_fma_contract1: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI8_0@toc@l(3) +; GLOBAL-NEXT: xsmulsp 1, 1, 0 +; GLOBAL-NEXT: blr + %mul = fmul float %x, 42.0 + %fma = call contract float @llvm.fma.f32(float %x, float 7.0, float %mul) + ret float %fma +} + +; This shouldn't change anything - the intermediate fmul result is now also flagged. + +; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract2:' +; FMFDEBUG: fmul contract {{t[0-9]+}}, +; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract2:' + +; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract2:' +; GLOBALDEBUG: fmul contract {{t[0-9]+}} +; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract2:' + +define float @fmul_fma_contract2(float %x) { +; FMF-LABEL: fmul_fma_contract2: +; FMF: # %bb.0: +; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI9_0@toc@l(3) +; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: fmul_fma_contract2: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI9_0@toc@l(3) +; GLOBAL-NEXT: xsmulsp 1, 1, 0 +; GLOBAL-NEXT: blr + %mul = fmul contract float %x, 42.0 + %fma = call contract float @llvm.fma.f32(float %x, float 7.0, float %mul) + ret float %fma +} + +; On the FMF test, reassociation alone does _not_ imply that FMA contraction is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:' -; FMFDEBUG: fmul reassoc {{t[0-9]+}}, +; FMFDEBUG: fmul {{t[0-9]+}}, +; FMFDEBUG: fma reassoc {{t[0-9]+}}, ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:' @@ -164,15 +271,19 @@ define float @fmul_fma_reassoc1(float %x) { ; FMF-LABEL: fmul_fma_reassoc1: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; FMF-NEXT: lfs 0, .LCPI6_0@toc@l(3) -; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI10_0@toc@l(3) +; FMF-NEXT: addis 3, 2, .LCPI10_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI10_1@toc@l(3) +; FMF-NEXT: xsmulsp 0, 1, 0 +; FMF-NEXT: xsmaddasp 0, 1, 2 +; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_reassoc1: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI6_0@toc@ha -; GLOBAL-NEXT: lfs 0, .LCPI6_0@toc@l(3) +; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI10_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr %mul = fmul float %x, 42.0 @@ -184,6 +295,7 @@ ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:' ; FMFDEBUG: fmul reassoc {{t[0-9]+}} +; FMFDEBUG: fma reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:' @@ -193,15 +305,19 @@ define float @fmul_fma_reassoc2(float %x) { ; FMF-LABEL: fmul_fma_reassoc2: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; FMF-NEXT: lfs 0, .LCPI7_0@toc@l(3) -; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI11_0@toc@l(3) +; FMF-NEXT: addis 3, 2, .LCPI11_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI11_1@toc@l(3) +; FMF-NEXT: xsmulsp 0, 1, 0 +; FMF-NEXT: xsmaddasp 0, 1, 2 +; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_reassoc2: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha -; GLOBAL-NEXT: lfs 0, .LCPI7_0@toc@l(3) +; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI11_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr %mul = fmul reassoc float %x, 42.0 @@ -209,6 +325,64 @@ ret float %fma } +; Reassociation applied with contract enables FMA contraction (of course). + +; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract_reassoc1:' +; FMFDEBUG: fmul contract reassoc {{t[0-9]+}}, +; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract_reassoc1:' + +; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract_reassoc1:' +; GLOBALDEBUG: fmul contract reassoc {{t[0-9]+}} +; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract_reassoc1:' + +define float @fmul_fma_contract_reassoc1(float %x) { +; FMF-LABEL: fmul_fma_contract_reassoc1: +; FMF: # %bb.0: +; FMF-NEXT: addis 3, 2, .LCPI12_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI12_0@toc@l(3) +; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: fmul_fma_contract_reassoc1: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: addis 3, 2, .LCPI12_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI12_0@toc@l(3) +; GLOBAL-NEXT: xsmulsp 1, 1, 0 +; GLOBAL-NEXT: blr + %mul = fmul float %x, 42.0 + %fma = call contract reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul) + ret float %fma +} + +; This shouldn't change anything - the intermediate fmul result is now also flagged. + +; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract_reassoc2:' +; FMFDEBUG: fmul contract reassoc {{t[0-9]+}} +; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract_reassoc2:' + +; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract_reassoc2:' +; GLOBALDEBUG: fmul contract reassoc {{t[0-9]+}} +; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_contract_reassoc2:' + +define float @fmul_fma_contract_reassoc2(float %x) { +; FMF-LABEL: fmul_fma_contract_reassoc2: +; FMF: # %bb.0: +; FMF-NEXT: addis 3, 2, .LCPI13_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI13_0@toc@l(3) +; FMF-NEXT: xsmulsp 1, 1, 0 +; FMF-NEXT: blr +; +; GLOBAL-LABEL: fmul_fma_contract_reassoc2: +; GLOBAL: # %bb.0: +; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI13_0@toc@l(3) +; GLOBAL-NEXT: xsmulsp 1, 1, 0 +; GLOBAL-NEXT: blr + %mul = fmul contract reassoc float %x, 42.0 + %fma = call contract reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul) + ret float %fma +} + ; The FMA is now fully 'fast'. This implies that reassociation is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:' @@ -222,15 +396,15 @@ define float @fmul_fma_fast1(float %x) { ; FMF-LABEL: fmul_fma_fast1: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha -; FMF-NEXT: lfs 0, .LCPI8_0@toc@l(3) +; FMF-NEXT: addis 3, 2, .LCPI14_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI14_0@toc@l(3) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_fast1: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha -; GLOBAL-NEXT: lfs 0, .LCPI8_0@toc@l(3) +; GLOBAL-NEXT: addis 3, 2, .LCPI14_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI14_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr %mul = fmul float %x, 42.0 @@ -251,15 +425,15 @@ define float @fmul_fma_fast2(float %x) { ; FMF-LABEL: fmul_fma_fast2: ; FMF: # %bb.0: -; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha -; FMF-NEXT: lfs 0, .LCPI9_0@toc@l(3) +; FMF-NEXT: addis 3, 2, .LCPI15_0@toc@ha +; FMF-NEXT: lfs 0, .LCPI15_0@toc@l(3) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: blr ; ; GLOBAL-LABEL: fmul_fma_fast2: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha -; GLOBAL-NEXT: lfs 0, .LCPI9_0@toc@l(3) +; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha +; GLOBAL-NEXT: lfs 0, .LCPI15_0@toc@l(3) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: blr %mul = fmul fast float %x, 42.0 @@ -282,19 +456,19 @@ ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 ; FMF-NEXT: fcmpu 0, 1, 0 -; FMF-NEXT: beq 0, .LBB10_2 +; FMF-NEXT: beq 0, .LBB16_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: xsrsqrtesp 0, 1 -; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha -; FMF-NEXT: addis 4, 2, .LCPI10_1@toc@ha -; FMF-NEXT: lfs 2, .LCPI10_0@toc@l(3) -; FMF-NEXT: lfs 3, .LCPI10_1@toc@l(4) +; FMF-NEXT: addis 3, 2, .LCPI16_0@toc@ha +; FMF-NEXT: addis 4, 2, .LCPI16_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI16_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI16_1@toc@l(4) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: xsmulsp 0, 1, 0 ; FMF-NEXT: xsmulsp 1, 1, 2 ; FMF-NEXT: xsaddsp 0, 0, 3 ; FMF-NEXT: xsmulsp 0, 1, 0 -; FMF-NEXT: .LBB10_2: +; FMF-NEXT: .LBB16_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; @@ -302,18 +476,18 @@ ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB10_2 +; GLOBAL-NEXT: beq 0, .LBB16_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha -; GLOBAL-NEXT: addis 4, 2, .LCPI10_1@toc@ha -; GLOBAL-NEXT: lfs 2, .LCPI10_0@toc@l(3) -; GLOBAL-NEXT: lfs 3, .LCPI10_1@toc@l(4) +; GLOBAL-NEXT: addis 3, 2, .LCPI16_0@toc@ha +; GLOBAL-NEXT: addis 4, 2, .LCPI16_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI16_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI16_1@toc@l(4) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0 ; GLOBAL-NEXT: xsmulsp 0, 1, 3 ; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB10_2: +; GLOBAL-NEXT: .LBB16_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr %rt = call afn float @llvm.sqrt.f32(float %x) @@ -335,18 +509,18 @@ ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 ; FMF-NEXT: fcmpu 0, 1, 0 -; FMF-NEXT: beq 0, .LBB11_2 +; FMF-NEXT: beq 0, .LBB17_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: xsrsqrtesp 0, 1 -; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; FMF-NEXT: addis 4, 2, .LCPI11_1@toc@ha -; FMF-NEXT: lfs 2, .LCPI11_0@toc@l(3) -; FMF-NEXT: lfs 3, .LCPI11_1@toc@l(4) +; FMF-NEXT: addis 3, 2, .LCPI17_0@toc@ha +; FMF-NEXT: addis 4, 2, .LCPI17_1@toc@ha +; FMF-NEXT: lfs 2, .LCPI17_0@toc@l(3) +; FMF-NEXT: lfs 3, .LCPI17_1@toc@l(4) ; FMF-NEXT: xsmulsp 1, 1, 0 ; FMF-NEXT: xsmaddasp 2, 1, 0 ; FMF-NEXT: xsmulsp 0, 1, 3 ; FMF-NEXT: xsmulsp 0, 0, 2 -; FMF-NEXT: .LBB11_2: +; FMF-NEXT: .LBB17_2: ; FMF-NEXT: fmr 1, 0 ; FMF-NEXT: blr ; @@ -354,18 +528,18 @@ ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB11_2 +; GLOBAL-NEXT: beq 0, .LBB17_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 -; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; GLOBAL-NEXT: addis 4, 2, .LCPI11_1@toc@ha -; GLOBAL-NEXT: lfs 2, .LCPI11_0@toc@l(3) -; GLOBAL-NEXT: lfs 3, .LCPI11_1@toc@l(4) +; GLOBAL-NEXT: addis 3, 2, .LCPI17_0@toc@ha +; GLOBAL-NEXT: addis 4, 2, .LCPI17_1@toc@ha +; GLOBAL-NEXT: lfs 2, .LCPI17_0@toc@l(3) +; GLOBAL-NEXT: lfs 3, .LCPI17_1@toc@l(4) ; GLOBAL-NEXT: xsmulsp 1, 1, 0 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0 ; GLOBAL-NEXT: xsmulsp 0, 1, 3 ; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB11_2: +; GLOBAL-NEXT: .LBB17_2: ; GLOBAL-NEXT: fmr 1, 0 ; GLOBAL-NEXT: blr %rt = call fast float @llvm.sqrt.f32(float %x) @@ -387,10 +561,10 @@ ; FMF: # %bb.0: ; FMF-NEXT: xxlxor 0, 0, 0 ; FMF-NEXT: xscmpudp 0, 1, 0 -; FMF-NEXT: blt 0, .LBB12_2 +; FMF-NEXT: blt 0, .LBB18_2 ; FMF-NEXT: # %bb.1: ; FMF-NEXT: fmr 3, 2 -; FMF-NEXT: .LBB12_2: +; FMF-NEXT: .LBB18_2: ; FMF-NEXT: fmr 1, 3 ; FMF-NEXT: blr ; @@ -398,10 +572,10 @@ ; GLOBAL: # %bb.0: ; GLOBAL-NEXT: xxlxor 0, 0, 0 ; GLOBAL-NEXT: xscmpudp 0, 1, 0 -; GLOBAL-NEXT: blt 0, .LBB12_2 +; GLOBAL-NEXT: blt 0, .LBB18_2 ; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: fmr 3, 2 -; GLOBAL-NEXT: .LBB12_2: +; GLOBAL-NEXT: .LBB18_2: ; GLOBAL-NEXT: fmr 1, 3 ; GLOBAL-NEXT: blr %cmp = fcmp nnan ult double %a, 0.0 Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11506,7 +11506,7 @@ static bool isContractable(SDNode *N) { SDNodeFlags F = N->getFlags(); - return F.hasAllowContract() || F.hasAllowReassociation(); + return F.hasAllowContract(); } /// Try to perform FMA combining on a given FADD node. Index: clang/test/Driver/fast-math.c =================================================================== --- clang/test/Driver/fast-math.c +++ clang/test/Driver/fast-math.c @@ -180,6 +180,13 @@ // CHECK-FAST-MATH: "-ffast-math" // CHECK-FAST-MATH: "-ffinite-math-only" // +// -ffp-contract=off must disable the fast-math umbrella, and the unsafe-fp-math +// umbrella. +// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s +// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s +// // RUN: %clang -### -ffast-math -fno-fast-math -c %s 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s // RUN: %clang -### -ffast-math -fno-finite-math-only -c %s 2>&1 \ Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -2700,6 +2700,14 @@ A->claim(); } + // If -ffp-contract=off has been specified on the command line, then we must + // suppress the emission of -ffast-math and -menable-unsafe-fp-math to cc1. + bool FPContractDisabled = false; + if (!FPContract.empty()) { + CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + FPContract)); + FPContractDisabled = FPContract.equals("off"); + } + if (!HonorINFs) CmdArgs.push_back("-menable-no-infs"); @@ -2710,7 +2718,7 @@ CmdArgs.push_back("-fmath-errno"); if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros && - !TrappingMath) + !TrappingMath && !FPContractDisabled) CmdArgs.push_back("-menable-unsafe-fp-math"); if (!SignedZeros) @@ -2753,7 +2761,8 @@ // that's consistent with gcc's behaviour. if (!HonorINFs && !HonorNaNs && !MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros && !TrappingMath && !RoundingFPMath) { - CmdArgs.push_back("-ffast-math"); + if (!FPContractDisabled) + CmdArgs.push_back("-ffast-math"); if (FPModel.equals("fast")) { if (FPContract.equals("fast")) // All set, do nothing.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits