[PATCH] D72675: Fix -ffast-math/-ffp-contract interaction

Warren Ristow via Phabricator via cfe-commits Tue, 14 Jan 2020 16:56:34 -0800

wristow updated this revision to Diff 238143.
wristow retitled this revision from "ix -ffast-math/-ffp-contract interaction" 
to "Fix -ffast-math/-ffp-contract interaction".
wristow added a comment.


Addressed comments from @hfinkel .


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72675/new/

https://reviews.llvm.org/D72675

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/fast-math.c
  llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
  llvm/test/CodeGen/PowerPC/fmf-propagation.ll
  llvm/test/CodeGen/X86/fp-contract.ll

Index: llvm/test/CodeGen/X86/fp-contract.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/fp-contract.ll
@@ -0,0 +1,204 @@
+; Tests for -ffp-contract/-ffast-math interaction.
+; Specifically, -ffp-contract=off must suppress the use of FMA.
+
+; RUN: llc < %s -mcpu=haswell | FileCheck %s --check-prefix=FMA
+
+; Scalar versions:
+
+define float @MulAddPlain(float %a, float %b, float %c) {
+; FMA-LABEL: MulAddPlain:
+; FMA:       vmulss
+; FMA-NEXT:  vaddss
+; FMA-NEXT:  ret
+  %mul = fmul float %a, %b
+  %add = fadd float %mul, %c
+  ret float %add
+}
+
+define float @MulAddFast(float %a, float %b, float %c) {
+; FMA-LABEL: MulAddFast:
+; FMA:       vfmadd213ss
+; FMA-NEXT:  ret
+  %mul = fmul fast float %a, %b
+  %add = fadd fast float %mul, %c
+  ret float %add
+}
+
+define float @MulAddContract(float %a, float %b, float %c) {
+; FMA-LABEL: MulAddContract:
+; FMA:       vfmadd213ss
+; FMA-NEXT:  ret
+  %mul = fmul contract float %a, %b
+  %add = fadd contract float %mul, %c
+  ret float %add
+}
+
+; Enabling all the fast-math-flags except 'contract' does not enable fused operations.
+define float @MulAddFastNoContract(float %a, float %b, float %c) {
+; FMA-LABEL: MulAddFastNoContract:
+; FMA:       vmulss
+; FMA-NEXT:  vaddss
+; FMA-NEXT:  ret
+  %mul = fmul nnan ninf nsz arcp afn reassoc float %a, %b
+  %add = fadd nnan ninf nsz arcp afn reassoc float %mul, %c
+  ret float %add
+}
+
+define float @MulAddReassoc(float %a, float %b, float %c) {
+; FMA-LABEL: MulAddReassoc:
+; FMA:       vmulss
+; FMA-NEXT:  vaddss
+; FMA-NEXT:  ret
+  %mul = fmul reassoc float %a, %b
+  %add = fadd reassoc float %mul, %c
+  ret float %add
+}
+
+define float @MulSubPlain(float %a, float %b, float %c) {
+; FMA-LABEL: MulSubPlain:
+; FMA:       vmulss
+; FMA-NEXT:  vsubss
+; FMA-NEXT:  ret
+  %mul = fmul float %a, %b
+  %sub = fsub float %mul, %c
+  ret float %sub
+}
+
+define float @MulSubFast(float %a, float %b, float %c) {
+; FMA-LABEL: MulSubFast:
+; FMA:       vfmsub213ss
+; FMA-NEXT:  ret
+  %mul = fmul fast float %a, %b
+  %sub = fsub fast float %mul, %c
+  ret float %sub
+}
+
+define float @MulSubContract(float %a, float %b, float %c) {
+; FMA-LABEL: MulSubContract:
+; FMA:       vfmsub213ss
+; FMA-NEXT:  ret
+  %mul = fmul contract float %a, %b
+  %sub = fsub contract float %mul, %c
+  ret float %sub
+}
+
+; Enabling all the fast-math-flags except 'contract' does not enable fused operations.
+define float @MulSubFastNoContract(float %a, float %b, float %c) {
+; FMA-LABEL: MulSubFastNoContract:
+; FMA:       vmulss
+; FMA-NEXT:  vsubss
+; FMA-NEXT:  ret
+  %mul = fmul nnan ninf nsz arcp afn reassoc float %a, %b
+  %sub = fsub nnan ninf nsz arcp afn reassoc float %mul, %c
+  ret float %sub
+}
+
+define float @MulSubReassoc(float %a, float %b, float %c) {
+; FMA-LABEL: MulSubReassoc:
+; FMA:       vmulss
+; FMA-NEXT:  vsubss
+; FMA-NEXT:  ret
+  %mul = fmul reassoc float %a, %b
+  %sub = fsub reassoc float %mul, %c
+  ret float %sub
+}
+
+; Vector versions:
+
+define <4 x float> @VecMulAddPlain(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulAddPlain:
+; FMA:       vmulps
+; FMA-NEXT:  vaddps
+; FMA-NEXT:  ret
+  %mul = fmul <4 x float> %a, %b
+  %add = fadd <4 x float> %mul, %c
+  ret <4 x float> %add
+}
+
+define <4 x float> @VecMulAddFast(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulAddFast:
+; FMA:       vfmadd213ps
+; FMA-NEXT:  ret
+  %mul = fmul fast <4 x float> %a, %b
+  %add = fadd fast <4 x float> %mul, %c
+  ret <4 x float> %add
+}
+
+define <4 x float> @VecMulAddContract(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulAddContract:
+; FMA:       vfmadd213ps
+; FMA-NEXT:  ret
+  %mul = fmul contract <4 x float> %a, %b
+  %add = fadd contract <4 x float> %mul, %c
+  ret <4 x float> %add
+}
+
+; Enabling all the fast-math-flags except 'contract' does not enable fused operations.
+define <4 x float> @VecMulAddFastNoContract(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulAddFastNoContract:
+; FMA:       vmulps
+; FMA-NEXT:  vaddps
+; FMA-NEXT:  ret
+  %mul = fmul nnan ninf nsz arcp afn reassoc <4 x float> %a, %b
+  %add = fadd nnan ninf nsz arcp afn reassoc <4 x float> %mul, %c
+  ret <4 x float> %add
+}
+
+define <4 x float> @VecMulAddReassoc(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulAddReassoc:
+; FMA:       vmulps
+; FMA-NEXT:  vaddps
+; FMA-NEXT:  ret
+  %mul = fmul reassoc <4 x float> %a, %b
+  %add = fadd reassoc <4 x float> %mul, %c
+  ret <4 x float> %add
+}
+
+define <4 x float> @VecMulSubPlain(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulSubPlain:
+; FMA:       vmulps
+; FMA-NEXT:  vsubps
+; FMA-NEXT:  ret
+  %mul = fmul <4 x float> %a, %b
+  %sub = fsub <4 x float> %mul, %c
+  ret <4 x float> %sub
+}
+
+define <4 x float> @VecMulSubFast(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulSubFast:
+; FMA:       vfmsub213ps
+; FMA-NEXT:  ret
+  %mul = fmul fast <4 x float> %a, %b
+  %sub = fsub fast <4 x float> %mul, %c
+  ret <4 x float> %sub
+}
+
+define <4 x float> @VecMulSubContract(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulSubContract:
+; FMA:       vfmsub213ps
+; FMA-NEXT:  ret
+  %mul = fmul contract <4 x float> %a, %b
+  %sub = fsub contract <4 x float> %mul, %c
+  ret <4 x float> %sub
+}
+
+; Enabling all the fast-math-flags except 'contract' does not enable fused operations.
+define <4 x float> @VecMulSubFastNoContract(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulSubFastNoContract:
+; FMA:       vmulps
+; FMA-NEXT:  vsubps
+; FMA-NEXT:  ret
+  %mul = fmul nnan ninf nsz arcp afn reassoc <4 x float> %a, %b
+  %sub = fsub nnan ninf nsz arcp afn reassoc <4 x float> %mul, %c
+  ret <4 x float> %sub
+}
+
+define <4 x float> @VecMulSubReassoc(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; FMA-LABEL: VecMulSubReassoc:
+; FMA:       vmulps
+; FMA-NEXT:  vsubps
+; FMA-NEXT:  ret
+  %mul = fmul reassoc <4 x float> %a, %b
+  %sub = fsub reassoc <4 x float> %mul, %c
+  ret <4 x float> %sub
+}
Index: llvm/test/CodeGen/PowerPC/fmf-propagation.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/fmf-propagation.ll
+++ llvm/test/CodeGen/PowerPC/fmf-propagation.ll
@@ -58,17 +58,19 @@
   ret float %add
 }
 
-; Reassociation implies that FMA contraction is allowed.
+; On the FMF test, reassociation alone does _not_ imply that FMA contraction is
+; allowed (in particular, we need to be able to disable FMA even when
+; reassociation is enabled).
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'
-; FMFDEBUG:         fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         fadd reassoc {{t[0-9]+}}, {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'
 
 define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {
 ; FMF-LABEL: fmul_fadd_reassoc1:
 ; FMF:       # %bb.0:
-; FMF-NEXT:    xsmaddasp 3, 1, 2
-; FMF-NEXT:    fmr 1, 3
+; FMF-NEXT:    xsmulsp 0, 1, 2
+; FMF-NEXT:    xsaddsp 1, 0, 3
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fadd_reassoc1:
@@ -84,14 +86,14 @@
 ; This shouldn't change anything - the intermediate fmul result is now also flagged.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:'
-; FMFDEBUG:         fma reassoc {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:         fadd reassoc {{t[0-9]+}}, {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:'
 
 define float @fmul_fadd_reassoc2(float %x, float %y, float %z) {
 ; FMF-LABEL: fmul_fadd_reassoc2:
 ; FMF:       # %bb.0:
-; FMF-NEXT:    xsmaddasp 3, 1, 2
-; FMF-NEXT:    fmr 1, 3
+; FMF-NEXT:    xsmulsp 0, 1, 2
+; FMF-NEXT:    xsaddsp 1, 0, 3
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fadd_reassoc2:
@@ -104,6 +106,52 @@
   ret float %add
 }
 
+; Reassociation applied with contract enables FMA contraction (of course).
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract_reassoc1:'
+; FMFDEBUG:         fma contract reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_contract_reassoc1:'
+
+define float @fmul_fadd_contract_reassoc1(float %x, float %y, float %z) {
+; FMF-LABEL: fmul_fadd_contract_reassoc1:
+; FMF:       # %bb.0:
+; FMF-NEXT:    xsmaddasp 3, 1, 2
+; FMF-NEXT:    fmr 1, 3
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fadd_contract_reassoc1:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
+; GLOBAL-NEXT:    fmr 1, 3
+; GLOBAL-NEXT:    blr
+  %mul = fmul contract float %x, %y
+  %add = fadd contract reassoc float %mul, %z
+  ret float %add
+}
+
+; This shouldn't change anything - the intermediate fmul result is now also flagged.
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract_reassoc2:'
+; FMFDEBUG:         fma contract reassoc {{t[0-9]+}}, {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_contract_reassoc2:'
+
+define float @fmul_fadd_contract_reassoc2(float %x, float %y, float %z) {
+; FMF-LABEL: fmul_fadd_contract_reassoc2:
+; FMF:       # %bb.0:
+; FMF-NEXT:    xsmaddasp 3, 1, 2
+; FMF-NEXT:    fmr 1, 3
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fadd_contract_reassoc2:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
+; GLOBAL-NEXT:    fmr 1, 3
+; GLOBAL-NEXT:    blr
+  %mul = fmul contract reassoc float %x, %y
+  %add = fadd contract reassoc float %mul, %z
+  ret float %add
+}
+
 ; The fadd is now fully 'fast'. This implies that contraction is allowed.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:'
@@ -122,7 +170,7 @@
 ; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
 ; GLOBAL-NEXT:    fmr 1, 3
 ; GLOBAL-NEXT:    blr
-  %mul = fmul fast float %x, %y
+  %mul = fmul float %x, %y
   %add = fadd fast float %mul, %z
   ret float %add
 }
@@ -151,10 +199,69 @@
 }
 
 ; fma(X, 7.0, X * 42.0) --> X * 49.0
-; This is the minimum FMF needed for this transform - the FMA allows reassociation.
+; This is the minimum FMF needed for this transform - the 'contract' allows the needed reassociation.
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract1:'
+; FMFDEBUG:         fmul contract {{t[0-9]+}},
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_contract1:'
+
+; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract1:'
+; GLOBALDEBUG:         fmul contract {{t[0-9]+}}
+; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_contract1:'
+
+define float @fmul_fma_contract1(float %x) {
+; FMF-LABEL: fmul_fma_contract1:
+; FMF:       # %bb.0:
+; FMF-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
+; FMF-NEXT:    lfs 0, .LCPI8_0@toc@l(3)
+; FMF-NEXT:    xsmulsp 1, 1, 0
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fma_contract1:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
+; GLOBAL-NEXT:    lfs 0, .LCPI8_0@toc@l(3)
+; GLOBAL-NEXT:    xsmulsp 1, 1, 0
+; GLOBAL-NEXT:    blr
+  %mul = fmul float %x, 42.0
+  %fma = call contract float @llvm.fma.f32(float %x, float 7.0, float %mul)
+  ret float %fma
+}
+
+; This shouldn't change anything - the intermediate fmul result is now also flagged.
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract2:'
+; FMFDEBUG:         fmul contract {{t[0-9]+}},
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_contract2:'
+
+; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract2:'
+; GLOBALDEBUG:         fmul contract {{t[0-9]+}}
+; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_contract2:'
+
+define float @fmul_fma_contract2(float %x) {
+; FMF-LABEL: fmul_fma_contract2:
+; FMF:       # %bb.0:
+; FMF-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
+; FMF-NEXT:    lfs 0, .LCPI9_0@toc@l(3)
+; FMF-NEXT:    xsmulsp 1, 1, 0
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fma_contract2:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
+; GLOBAL-NEXT:    lfs 0, .LCPI9_0@toc@l(3)
+; GLOBAL-NEXT:    xsmulsp 1, 1, 0
+; GLOBAL-NEXT:    blr
+  %mul = fmul contract float %x, 42.0
+  %fma = call contract float @llvm.fma.f32(float %x, float 7.0, float %mul)
+  ret float %fma
+}
+
+; On the FMF test, reassociation alone does _not_ imply that FMA contraction is allowed.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
-; FMFDEBUG:         fmul reassoc {{t[0-9]+}},
+; FMFDEBUG:         fmul {{t[0-9]+}},
+; FMFDEBUG:         fma reassoc {{t[0-9]+}},
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
@@ -164,15 +271,19 @@
 define float @fmul_fma_reassoc1(float %x) {
 ; FMF-LABEL: fmul_fma_reassoc1:
 ; FMF:       # %bb.0:
-; FMF-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
-; FMF-NEXT:    lfs 0, .LCPI6_0@toc@l(3)
-; FMF-NEXT:    xsmulsp 1, 1, 0
+; FMF-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
+; FMF-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
+; FMF-NEXT:    addis 3, 2, .LCPI10_1@toc@ha
+; FMF-NEXT:    lfs 2, .LCPI10_1@toc@l(3)
+; FMF-NEXT:    xsmulsp 0, 1, 0
+; FMF-NEXT:    xsmaddasp 0, 1, 2
+; FMF-NEXT:    fmr 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_reassoc1:
 ; GLOBAL:       # %bb.0:
-; GLOBAL-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
-; GLOBAL-NEXT:    lfs 0, .LCPI6_0@toc@l(3)
+; GLOBAL-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
+; GLOBAL-NEXT:    lfs 0, .LCPI10_0@toc@l(3)
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    blr
   %mul = fmul float %x, 42.0
@@ -184,6 +295,7 @@
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
 ; FMFDEBUG:         fmul reassoc {{t[0-9]+}}
+; FMFDEBUG:         fma reassoc {{t[0-9]+}}
 ; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
 
 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
@@ -193,15 +305,19 @@
 define float @fmul_fma_reassoc2(float %x) {
 ; FMF-LABEL: fmul_fma_reassoc2:
 ; FMF:       # %bb.0:
-; FMF-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
-; FMF-NEXT:    lfs 0, .LCPI7_0@toc@l(3)
-; FMF-NEXT:    xsmulsp 1, 1, 0
+; FMF-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
+; FMF-NEXT:    lfs 0, .LCPI11_0@toc@l(3)
+; FMF-NEXT:    addis 3, 2, .LCPI11_1@toc@ha
+; FMF-NEXT:    lfs 2, .LCPI11_1@toc@l(3)
+; FMF-NEXT:    xsmulsp 0, 1, 0
+; FMF-NEXT:    xsmaddasp 0, 1, 2
+; FMF-NEXT:    fmr 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_reassoc2:
 ; GLOBAL:       # %bb.0:
-; GLOBAL-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
-; GLOBAL-NEXT:    lfs 0, .LCPI7_0@toc@l(3)
+; GLOBAL-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
+; GLOBAL-NEXT:    lfs 0, .LCPI11_0@toc@l(3)
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    blr
   %mul = fmul reassoc float %x, 42.0
@@ -209,6 +325,64 @@
   ret float %fma
 }
 
+; Reassociation applied with contract enables FMA contraction (of course).
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract_reassoc1:'
+; FMFDEBUG:         fmul contract reassoc {{t[0-9]+}},
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_contract_reassoc1:'
+
+; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract_reassoc1:'
+; GLOBALDEBUG:         fmul contract reassoc {{t[0-9]+}}
+; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_contract_reassoc1:'
+
+define float @fmul_fma_contract_reassoc1(float %x) {
+; FMF-LABEL: fmul_fma_contract_reassoc1:
+; FMF:       # %bb.0:
+; FMF-NEXT:    addis 3, 2, .LCPI12_0@toc@ha
+; FMF-NEXT:    lfs 0, .LCPI12_0@toc@l(3)
+; FMF-NEXT:    xsmulsp 1, 1, 0
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fma_contract_reassoc1:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    addis 3, 2, .LCPI12_0@toc@ha
+; GLOBAL-NEXT:    lfs 0, .LCPI12_0@toc@l(3)
+; GLOBAL-NEXT:    xsmulsp 1, 1, 0
+; GLOBAL-NEXT:    blr
+  %mul = fmul float %x, 42.0
+  %fma = call contract reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
+  ret float %fma
+}
+
+; This shouldn't change anything - the intermediate fmul result is now also flagged.
+
+; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract_reassoc2:'
+; FMFDEBUG:         fmul contract reassoc {{t[0-9]+}}
+; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_contract_reassoc2:'
+
+; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_contract_reassoc2:'
+; GLOBALDEBUG:         fmul contract reassoc {{t[0-9]+}}
+; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_contract_reassoc2:'
+
+define float @fmul_fma_contract_reassoc2(float %x) {
+; FMF-LABEL: fmul_fma_contract_reassoc2:
+; FMF:       # %bb.0:
+; FMF-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
+; FMF-NEXT:    lfs 0, .LCPI13_0@toc@l(3)
+; FMF-NEXT:    xsmulsp 1, 1, 0
+; FMF-NEXT:    blr
+;
+; GLOBAL-LABEL: fmul_fma_contract_reassoc2:
+; GLOBAL:       # %bb.0:
+; GLOBAL-NEXT:    addis 3, 2, .LCPI13_0@toc@ha
+; GLOBAL-NEXT:    lfs 0, .LCPI13_0@toc@l(3)
+; GLOBAL-NEXT:    xsmulsp 1, 1, 0
+; GLOBAL-NEXT:    blr
+  %mul = fmul contract reassoc float %x, 42.0
+  %fma = call contract reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
+  ret float %fma
+}
+
 ; The FMA is now fully 'fast'. This implies that reassociation is allowed.
 
 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
@@ -222,15 +396,15 @@
 define float @fmul_fma_fast1(float %x) {
 ; FMF-LABEL: fmul_fma_fast1:
 ; FMF:       # %bb.0:
-; FMF-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
-; FMF-NEXT:    lfs 0, .LCPI8_0@toc@l(3)
+; FMF-NEXT:    addis 3, 2, .LCPI14_0@toc@ha
+; FMF-NEXT:    lfs 0, .LCPI14_0@toc@l(3)
 ; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_fast1:
 ; GLOBAL:       # %bb.0:
-; GLOBAL-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
-; GLOBAL-NEXT:    lfs 0, .LCPI8_0@toc@l(3)
+; GLOBAL-NEXT:    addis 3, 2, .LCPI14_0@toc@ha
+; GLOBAL-NEXT:    lfs 0, .LCPI14_0@toc@l(3)
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    blr
   %mul = fmul float %x, 42.0
@@ -251,15 +425,15 @@
 define float @fmul_fma_fast2(float %x) {
 ; FMF-LABEL: fmul_fma_fast2:
 ; FMF:       # %bb.0:
-; FMF-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
-; FMF-NEXT:    lfs 0, .LCPI9_0@toc@l(3)
+; FMF-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
+; FMF-NEXT:    lfs 0, .LCPI15_0@toc@l(3)
 ; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    blr
 ;
 ; GLOBAL-LABEL: fmul_fma_fast2:
 ; GLOBAL:       # %bb.0:
-; GLOBAL-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
-; GLOBAL-NEXT:    lfs 0, .LCPI9_0@toc@l(3)
+; GLOBAL-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
+; GLOBAL-NEXT:    lfs 0, .LCPI15_0@toc@l(3)
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    blr
   %mul = fmul fast float %x, 42.0
@@ -282,19 +456,19 @@
 ; FMF:       # %bb.0:
 ; FMF-NEXT:    xxlxor 0, 0, 0
 ; FMF-NEXT:    fcmpu 0, 1, 0
-; FMF-NEXT:    beq 0, .LBB10_2
+; FMF-NEXT:    beq 0, .LBB16_2
 ; FMF-NEXT:  # %bb.1:
 ; FMF-NEXT:    xsrsqrtesp 0, 1
-; FMF-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
-; FMF-NEXT:    addis 4, 2, .LCPI10_1@toc@ha
-; FMF-NEXT:    lfs 2, .LCPI10_0@toc@l(3)
-; FMF-NEXT:    lfs 3, .LCPI10_1@toc@l(4)
+; FMF-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
+; FMF-NEXT:    addis 4, 2, .LCPI16_1@toc@ha
+; FMF-NEXT:    lfs 2, .LCPI16_0@toc@l(3)
+; FMF-NEXT:    lfs 3, .LCPI16_1@toc@l(4)
 ; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    xsmulsp 0, 1, 0
 ; FMF-NEXT:    xsmulsp 1, 1, 2
 ; FMF-NEXT:    xsaddsp 0, 0, 3
 ; FMF-NEXT:    xsmulsp 0, 1, 0
-; FMF-NEXT:  .LBB10_2:
+; FMF-NEXT:  .LBB16_2:
 ; FMF-NEXT:    fmr 1, 0
 ; FMF-NEXT:    blr
 ;
@@ -302,18 +476,18 @@
 ; GLOBAL:       # %bb.0:
 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
 ; GLOBAL-NEXT:    fcmpu 0, 1, 0
-; GLOBAL-NEXT:    beq 0, .LBB10_2
+; GLOBAL-NEXT:    beq 0, .LBB16_2
 ; GLOBAL-NEXT:  # %bb.1:
 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
-; GLOBAL-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
-; GLOBAL-NEXT:    addis 4, 2, .LCPI10_1@toc@ha
-; GLOBAL-NEXT:    lfs 2, .LCPI10_0@toc@l(3)
-; GLOBAL-NEXT:    lfs 3, .LCPI10_1@toc@l(4)
+; GLOBAL-NEXT:    addis 3, 2, .LCPI16_0@toc@ha
+; GLOBAL-NEXT:    addis 4, 2, .LCPI16_1@toc@ha
+; GLOBAL-NEXT:    lfs 2, .LCPI16_0@toc@l(3)
+; GLOBAL-NEXT:    lfs 3, .LCPI16_1@toc@l(4)
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
 ; GLOBAL-NEXT:    xsmulsp 0, 1, 3
 ; GLOBAL-NEXT:    xsmulsp 0, 0, 2
-; GLOBAL-NEXT:  .LBB10_2:
+; GLOBAL-NEXT:  .LBB16_2:
 ; GLOBAL-NEXT:    fmr 1, 0
 ; GLOBAL-NEXT:    blr
   %rt = call afn float @llvm.sqrt.f32(float %x)
@@ -335,18 +509,18 @@
 ; FMF:       # %bb.0:
 ; FMF-NEXT:    xxlxor 0, 0, 0
 ; FMF-NEXT:    fcmpu 0, 1, 0
-; FMF-NEXT:    beq 0, .LBB11_2
+; FMF-NEXT:    beq 0, .LBB17_2
 ; FMF-NEXT:  # %bb.1:
 ; FMF-NEXT:    xsrsqrtesp 0, 1
-; FMF-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
-; FMF-NEXT:    addis 4, 2, .LCPI11_1@toc@ha
-; FMF-NEXT:    lfs 2, .LCPI11_0@toc@l(3)
-; FMF-NEXT:    lfs 3, .LCPI11_1@toc@l(4)
+; FMF-NEXT:    addis 3, 2, .LCPI17_0@toc@ha
+; FMF-NEXT:    addis 4, 2, .LCPI17_1@toc@ha
+; FMF-NEXT:    lfs 2, .LCPI17_0@toc@l(3)
+; FMF-NEXT:    lfs 3, .LCPI17_1@toc@l(4)
 ; FMF-NEXT:    xsmulsp 1, 1, 0
 ; FMF-NEXT:    xsmaddasp 2, 1, 0
 ; FMF-NEXT:    xsmulsp 0, 1, 3
 ; FMF-NEXT:    xsmulsp 0, 0, 2
-; FMF-NEXT:  .LBB11_2:
+; FMF-NEXT:  .LBB17_2:
 ; FMF-NEXT:    fmr 1, 0
 ; FMF-NEXT:    blr
 ;
@@ -354,18 +528,18 @@
 ; GLOBAL:       # %bb.0:
 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
 ; GLOBAL-NEXT:    fcmpu 0, 1, 0
-; GLOBAL-NEXT:    beq 0, .LBB11_2
+; GLOBAL-NEXT:    beq 0, .LBB17_2
 ; GLOBAL-NEXT:  # %bb.1:
 ; GLOBAL-NEXT:    xsrsqrtesp 0, 1
-; GLOBAL-NEXT:    addis 3, 2, .LCPI11_0@toc@ha
-; GLOBAL-NEXT:    addis 4, 2, .LCPI11_1@toc@ha
-; GLOBAL-NEXT:    lfs 2, .LCPI11_0@toc@l(3)
-; GLOBAL-NEXT:    lfs 3, .LCPI11_1@toc@l(4)
+; GLOBAL-NEXT:    addis 3, 2, .LCPI17_0@toc@ha
+; GLOBAL-NEXT:    addis 4, 2, .LCPI17_1@toc@ha
+; GLOBAL-NEXT:    lfs 2, .LCPI17_0@toc@l(3)
+; GLOBAL-NEXT:    lfs 3, .LCPI17_1@toc@l(4)
 ; GLOBAL-NEXT:    xsmulsp 1, 1, 0
 ; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
 ; GLOBAL-NEXT:    xsmulsp 0, 1, 3
 ; GLOBAL-NEXT:    xsmulsp 0, 0, 2
-; GLOBAL-NEXT:  .LBB11_2:
+; GLOBAL-NEXT:  .LBB17_2:
 ; GLOBAL-NEXT:    fmr 1, 0
 ; GLOBAL-NEXT:    blr
   %rt = call fast float @llvm.sqrt.f32(float %x)
@@ -387,10 +561,10 @@
 ; FMF:       # %bb.0:
 ; FMF-NEXT:    xxlxor 0, 0, 0
 ; FMF-NEXT:    xscmpudp 0, 1, 0
-; FMF-NEXT:    blt 0, .LBB12_2
+; FMF-NEXT:    blt 0, .LBB18_2
 ; FMF-NEXT:  # %bb.1:
 ; FMF-NEXT:    fmr 3, 2
-; FMF-NEXT:  .LBB12_2:
+; FMF-NEXT:  .LBB18_2:
 ; FMF-NEXT:    fmr 1, 3
 ; FMF-NEXT:    blr
 ;
@@ -398,10 +572,10 @@
 ; GLOBAL:       # %bb.0:
 ; GLOBAL-NEXT:    xxlxor 0, 0, 0
 ; GLOBAL-NEXT:    xscmpudp 0, 1, 0
-; GLOBAL-NEXT:    blt 0, .LBB12_2
+; GLOBAL-NEXT:    blt 0, .LBB18_2
 ; GLOBAL-NEXT:  # %bb.1:
 ; GLOBAL-NEXT:    fmr 3, 2
-; GLOBAL-NEXT:  .LBB12_2:
+; GLOBAL-NEXT:  .LBB18_2:
 ; GLOBAL-NEXT:    fmr 1, 3
 ; GLOBAL-NEXT:    blr
   %cmp = fcmp nnan ult double %a, 0.0
Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11506,7 +11506,7 @@
 
 static bool isContractable(SDNode *N) {
   SDNodeFlags F = N->getFlags();
-  return F.hasAllowContract() || F.hasAllowReassociation();
+  return F.hasAllowContract();
 }
 
 /// Try to perform FMA combining on a given FADD node.
Index: clang/test/Driver/fast-math.c
===================================================================
--- clang/test/Driver/fast-math.c
+++ clang/test/Driver/fast-math.c
@@ -180,6 +180,13 @@
 // CHECK-FAST-MATH: "-ffast-math"
 // CHECK-FAST-MATH: "-ffinite-math-only"
 //
+// -ffp-contract=off must disable the fast-math umbrella, and the unsafe-fp-math
+// umbrella.
+// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
+// RUN: %clang -### -ffast-math -ffp-contract=off -c %s 2>&1 \
+// RUN:   | FileCheck --check-prefix=CHECK-NO-UNSAFE-MATH %s
+//
 // RUN: %clang -### -ffast-math -fno-fast-math -c %s 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-NO-FAST-MATH %s
 // RUN: %clang -### -ffast-math -fno-finite-math-only -c %s 2>&1 \
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -2709,8 +2709,10 @@
   if (MathErrno)
     CmdArgs.push_back("-fmath-errno");
 
+  // If -ffp-contract=off has been specified on the command line, then we must
+  // suppress the emission of -ffast-math and -menable-unsafe-fp-math to cc1.
   if (!MathErrno && AssociativeMath && ReciprocalMath && !SignedZeros &&
-      !TrappingMath)
+      !TrappingMath && !(FPContract.equals("off") || FPContract.equals("on")))
     CmdArgs.push_back("-menable-unsafe-fp-math");
 
   if (!SignedZeros)
@@ -2753,7 +2755,8 @@
   // that's consistent with gcc's behaviour.
   if (!HonorINFs && !HonorNaNs && !MathErrno && AssociativeMath &&
       ReciprocalMath && !SignedZeros && !TrappingMath && !RoundingFPMath) {
-    CmdArgs.push_back("-ffast-math");
+    if (!(FPContract.equals("off") || FPContract.equals("on")))
+      CmdArgs.push_back("-ffast-math");
     if (FPModel.equals("fast")) {
       if (FPContract.equals("fast"))
         // All set, do nothing.

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D72675: Fix -ffast-math/-ffp-contract interaction

Reply via email to