https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/67302
>From a1567f579531c3abbd1f4e9b7c7edd2f95ead42c Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Mon, 25 Sep 2023 17:10:51 +0800 Subject: [PATCH 1/4] [PowerPC] Implement llvm.set.rounding intrinsic According to LangRef, llvm.set.rounding sets rounding mode by integer argument: 0 - toward zero 1 - to nearest, ties to even 2 - toward positive infinity 3 - toward negative infinity 4 - to nearest, ties away from zero While PowerPC ISA says: 0 - to nearest 1 - toward zero 2 - toward positive infinity 3 - toward negative infinity This patch maps the argument and write into last two bits of FPSCR (rounding mode). --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 80 ++++++++ llvm/lib/Target/PowerPC/PPCISelLowering.h | 1 + llvm/test/CodeGen/PowerPC/frounds.ll | 194 +++++++++++++++++++- 3 files changed, 274 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index f4e3531980d165f..4e5ff0cb7169662 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -426,6 +426,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); + setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); // If we're enabling GP optimizations, use hardware square root if (!Subtarget.hasFSQRT() && @@ -8898,6 +8899,83 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, return FP; } +SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op, + SelectionDAG &DAG) const { + SDLoc Dl(Op); + MachineFunction &MF = DAG.getMachineFunction(); + EVT PtrVT = getPointerTy(MF.getDataLayout()); + SDValue Chain = Op.getOperand(0); + + // If requested mode is constant, just use simpler mtfsb. + if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + uint64_t Mode = CVal->getZExtValue(); + if (Mode >= 4) + llvm_unreachable("Unsupported rounding mode!"); + unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1); + SDNode *SetHi = DAG.getMachineNode( + (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other, + {DAG.getConstant(30, Dl, MVT::i32, true), Chain}); + SDNode *SetLo = DAG.getMachineNode( + (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other, + {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)}); + return SDValue(SetLo, 0); + } + + // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format. + SDValue One = DAG.getConstant(1, Dl, MVT::i32); + SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1), + DAG.getConstant(3, Dl, MVT::i32)); + SDValue DstFlag = DAG.getNode( + ISD::XOR, Dl, MVT::i32, SrcFlag, + DAG.getNode(ISD::AND, Dl, MVT::i32, + DAG.getNOT(Dl, + DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One), + MVT::i32), + One)); + SDValue MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain); + Chain = MFFS.getValue(1); + SDValue NewFPSCR; + if (isTypeLegal(MVT::i64)) { + // Set the last two bits (rounding mode) of bitcasted FPSCR. + NewFPSCR = DAG.getNode( + ISD::OR, Dl, MVT::i64, + DAG.getNode(ISD::AND, Dl, MVT::i64, + DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS), + DAG.getNOT(Dl, DAG.getConstant(3, Dl, MVT::i64), MVT::i64)), + DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag)); + NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR); + } else { + // In 32-bit mode, store f64, load and update the lower half. + int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false); + SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); + Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo()); + SDValue Addr; + if (Subtarget.isLittleEndian()) + Addr = StackSlot; + else + Addr = DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot, + DAG.getConstant(4, Dl, PtrVT)); + SDValue Tmp = DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo()); + Chain = Tmp.getValue(1); + + Tmp = DAG.getNode( + ISD::OR, Dl, MVT::i32, + DAG.getNode(ISD::AND, Dl, MVT::i32, Tmp, + DAG.getNOT(Dl, DAG.getConstant(3, Dl, MVT::i32), MVT::i32)), + DstFlag); + + Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo()); + NewFPSCR = + DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo()); + Chain = NewFPSCR.getValue(1); + } + SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true); + SDNode *MTFSF = DAG.getMachineNode( + PPC::MTFSF, Dl, MVT::Other, + {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain}); + return SDValue(MTFSF, 0); +} + SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -11647,6 +11725,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG); + case ISD::SET_ROUNDING: + return LowerSET_ROUNDING(Op, DAG); // Lower 64-bit shifts. case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 7c62e370f1536a4..6c197327593f0f8 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1286,6 +1286,7 @@ namespace llvm { const SDLoc &dl) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/PowerPC/frounds.ll b/llvm/test/CodeGen/PowerPC/frounds.ll index c1f7181b30f3f6e..1944af1687d0ef7 100644 --- a/llvm/test/CodeGen/PowerPC/frounds.ll +++ b/llvm/test/CodeGen/PowerPC/frounds.ll @@ -77,4 +77,196 @@ return: ; preds = %entry ret i32 %retval3 } -declare i32 @llvm.get.rounding() nounwind +define void @setrnd_tozero() { +; PPC32-LABEL: setrnd_tozero: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb0 30 +; PPC32-NEXT: mtfsb1 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_tozero: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb0 30 +; PPC64-NEXT: mtfsb1 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_tozero: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb0 30 +; PPC64LE-NEXT: mtfsb1 31 +; PPC64LE-NEXT: blr +; +; DM-LABEL: setrnd_tozero: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb0 30 +; DM-NEXT: mtfsb1 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 0) + ret void +} + +define void @setrnd_tonearest_tieeven() { +; PPC32-LABEL: setrnd_tonearest_tieeven: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb0 30 +; PPC32-NEXT: mtfsb0 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_tonearest_tieeven: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb0 30 +; PPC64-NEXT: mtfsb0 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_tonearest_tieeven: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb0 30 +; PPC64LE-NEXT: mtfsb0 31 +; PPC64LE-NEXT: blr +; +; DM-LABEL: setrnd_tonearest_tieeven: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb0 30 +; DM-NEXT: mtfsb0 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 1) + ret void +} + +define void @setrnd_toposinf() { +; PPC32-LABEL: setrnd_toposinf: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb1 30 +; PPC32-NEXT: mtfsb0 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_toposinf: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb1 30 +; PPC64-NEXT: mtfsb0 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_toposinf: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb1 30 +; PPC64LE-NEXT: mtfsb0 31 +; PPC64LE-NEXT: blr +; +; DM-LABEL: setrnd_toposinf: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb1 30 +; DM-NEXT: mtfsb0 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 2) + ret void +} + +define void @setrnd_toneginf() { +; PPC32-LABEL: setrnd_toneginf: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: mtfsb1 30 +; PPC32-NEXT: mtfsb1 31 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_toneginf: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mtfsb1 30 +; PPC64-NEXT: mtfsb1 31 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_toneginf: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mtfsb1 30 +; PPC64LE-NEXT: mtfsb1 31 +; PPC64LE-NEXT: blr +; +; DM-LABEL: setrnd_toneginf: +; DM: # %bb.0: # %entry +; DM-NEXT: mtfsb1 30 +; DM-NEXT: mtfsb1 31 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 3) + ret void +} + +define void @setrnd_var(i32 %x) { +; PPC32-LABEL: setrnd_var: +; PPC32: # %bb.0: # %entry +; PPC32-NEXT: stwu 1, -16(1) +; PPC32-NEXT: .cfi_def_cfa_offset 16 +; PPC32-NEXT: mffs 0 +; PPC32-NEXT: stfd 0, 8(1) +; PPC32-NEXT: clrlwi 4, 3, 30 +; PPC32-NEXT: lwz 5, 12(1) +; PPC32-NEXT: rlwinm 3, 3, 31, 31, 31 +; PPC32-NEXT: xor 3, 3, 4 +; PPC32-NEXT: xori 3, 3, 1 +; PPC32-NEXT: rlwinm 4, 5, 0, 0, 29 +; PPC32-NEXT: rlwimi 4, 3, 0, 30, 31 +; PPC32-NEXT: stw 4, 12(1) +; PPC32-NEXT: lfd 0, 8(1) +; PPC32-NEXT: mtfsf 255, 0 +; PPC32-NEXT: addi 1, 1, 16 +; PPC32-NEXT: blr +; +; PPC64-LABEL: setrnd_var: +; PPC64: # %bb.0: # %entry +; PPC64-NEXT: mffs 0 +; PPC64-NEXT: stfd 0, -16(1) +; PPC64-NEXT: ld 5, -16(1) +; PPC64-NEXT: clrlwi 4, 3, 30 +; PPC64-NEXT: rlwinm 3, 3, 31, 31, 31 +; PPC64-NEXT: xor 3, 3, 4 +; PPC64-NEXT: xori 3, 3, 1 +; PPC64-NEXT: clrldi 3, 3, 32 +; PPC64-NEXT: rldicr 4, 5, 0, 61 +; PPC64-NEXT: or 3, 4, 3 +; PPC64-NEXT: std 3, -8(1) +; PPC64-NEXT: lfd 0, -8(1) +; PPC64-NEXT: mtfsf 255, 0 +; PPC64-NEXT: blr +; +; PPC64LE-LABEL: setrnd_var: +; PPC64LE: # %bb.0: # %entry +; PPC64LE-NEXT: mffs 0 +; PPC64LE-NEXT: stfd 0, -16(1) +; PPC64LE-NEXT: clrlwi 4, 3, 30 +; PPC64LE-NEXT: rlwinm 3, 3, 31, 31, 31 +; PPC64LE-NEXT: xor 3, 3, 4 +; PPC64LE-NEXT: ld 4, -16(1) +; PPC64LE-NEXT: xori 3, 3, 1 +; PPC64LE-NEXT: clrldi 3, 3, 32 +; PPC64LE-NEXT: rldicr 4, 4, 0, 61 +; PPC64LE-NEXT: or 3, 4, 3 +; PPC64LE-NEXT: std 3, -8(1) +; PPC64LE-NEXT: lfd 0, -8(1) +; PPC64LE-NEXT: mtfsf 255, 0 +; PPC64LE-NEXT: blr +; +; DM-LABEL: setrnd_var: +; DM: # %bb.0: # %entry +; DM-NEXT: clrlwi 4, 3, 30 +; DM-NEXT: rlwinm 3, 3, 31, 31, 31 +; DM-NEXT: xor 3, 3, 4 +; DM-NEXT: xori 3, 3, 1 +; DM-NEXT: clrldi 3, 3, 32 +; DM-NEXT: mffs 0 +; DM-NEXT: mffprd 4, 0 +; DM-NEXT: rldicr 4, 4, 0, 61 +; DM-NEXT: or 3, 4, 3 +; DM-NEXT: mtfprd 0, 3 +; DM-NEXT: mtfsf 255, 0 +; DM-NEXT: blr +entry: + call void @llvm.set.rounding(i32 %x) + ret void +} + +declare i32 @llvm.get.rounding() #0 +declare void @llvm.set.rounding(i32) #0 + +attributes #0 = { nounwind } >From f1c1a5c14147c69a5e2731e8ebe0febad6a12c4a Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Thu, 16 Nov 2023 15:25:08 +0800 Subject: [PATCH 2/4] Exclude SPE --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 5 ++--- llvm/test/CodeGen/PowerPC/frounds.ll | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 5efc9ba487a710a..3be34f89c51d041 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -420,14 +420,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, } else { setOperationAction(ISD::FMA , MVT::f64, Legal); setOperationAction(ISD::FMA , MVT::f32, Legal); + setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); + setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); } if (Subtarget.hasSPE()) setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); - setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); - setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); - // If we're enabling GP optimizations, use hardware square root if (!Subtarget.hasFSQRT() && !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() && diff --git a/llvm/test/CodeGen/PowerPC/frounds.ll b/llvm/test/CodeGen/PowerPC/frounds.ll index 1944af1687d0ef7..538de2938aba817 100644 --- a/llvm/test/CodeGen/PowerPC/frounds.ll +++ b/llvm/test/CodeGen/PowerPC/frounds.ll @@ -233,14 +233,14 @@ define void @setrnd_var(i32 %x) { ; PPC64LE-LABEL: setrnd_var: ; PPC64LE: # %bb.0: # %entry ; PPC64LE-NEXT: mffs 0 -; PPC64LE-NEXT: stfd 0, -16(1) ; PPC64LE-NEXT: clrlwi 4, 3, 30 ; PPC64LE-NEXT: rlwinm 3, 3, 31, 31, 31 +; PPC64LE-NEXT: stfd 0, -16(1) ; PPC64LE-NEXT: xor 3, 3, 4 ; PPC64LE-NEXT: ld 4, -16(1) ; PPC64LE-NEXT: xori 3, 3, 1 -; PPC64LE-NEXT: clrldi 3, 3, 32 ; PPC64LE-NEXT: rldicr 4, 4, 0, 61 +; PPC64LE-NEXT: clrldi 3, 3, 32 ; PPC64LE-NEXT: or 3, 4, 3 ; PPC64LE-NEXT: std 3, -8(1) ; PPC64LE-NEXT: lfd 0, -8(1) >From a2c14908060849a83b0ac000c96fa6a9251e811b Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Mon, 27 Nov 2023 17:42:07 +0800 Subject: [PATCH 3/4] Use assert instead of unreachable --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 8cc794ad375de60..32faa60970605ae 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8910,8 +8910,7 @@ SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op, // If requested mode is constant, just use simpler mtfsb. if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { uint64_t Mode = CVal->getZExtValue(); - if (Mode >= 4) - llvm_unreachable("Unsupported rounding mode!"); + assert(Model < 4 && "Unsupported rounding mode!"); unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1); SDNode *SetHi = DAG.getMachineNode( (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other, >From 00a5ae8fac889f14ead068f0259540788bd125fb Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Mon, 27 Nov 2023 17:49:49 +0800 Subject: [PATCH 4/4] Fixup --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 32faa60970605ae..c56d8d65c75bdd6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8910,7 +8910,7 @@ SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op, // If requested mode is constant, just use simpler mtfsb. if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { uint64_t Mode = CVal->getZExtValue(); - assert(Model < 4 && "Unsupported rounding mode!"); + assert(Mode < 4 && "Unsupported rounding mode!"); unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1); SDNode *SetHi = DAG.getMachineNode( (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other, _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits