https://github.com/tru updated https://github.com/llvm/llvm-project/pull/109125
>From a7554dfc222b13624426ebd6ef46e122b9c16ee7 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng <dtcxzyw2...@gmail.com> Date: Tue, 10 Sep 2024 09:19:39 +0800 Subject: [PATCH 1/2] [LoongArch][ISel] Check the number of sign bits in `PatGprGpr_32` (#107432) After https://github.com/llvm/llvm-project/pull/92205, LoongArch ISel selects `div.w` for `trunc i64 (sdiv i64 3202030857, (sext i32 X to i64)) to i32`. It is incorrect since `3202030857` is not a signed 32-bit constant. It will produce wrong result when `X == 2`: https://alive2.llvm.org/ce/z/pzfGZZ This patch adds additional `sexti32` checks to operands of `PatGprGpr_32`. Alive2 proof: https://alive2.llvm.org/ce/z/AkH5Mp Fix #107414. (cherry picked from commit a111f9119a5ec77c19a514ec09454218f739454f) --- .../Target/LoongArch/LoongArchInstrInfo.td | 5 +- .../ir-instruction/sdiv-udiv-srem-urem.ll | 67 ++++++++++++++++++- 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index ef647a42778737..339d50bd819217 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1065,10 +1065,13 @@ def RDTIME_D : RDTIME_2R<0x00006800>; /// Generic pattern classes +def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{ + return cast<VTSDNode>(N->getOperand(1))->getVT().bitsLE(MVT::i32); +}]>; class PatGprGpr<SDPatternOperator OpNode, LAInst Inst> : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>; class PatGprGpr_32<SDPatternOperator OpNode, LAInst Inst> - : Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>; + : Pat<(sext_inreg (OpNode (assertsexti32 GPR:$rj), (assertsexti32 GPR:$rk)), i32), (Inst GPR:$rj, GPR:$rk)>; class PatGpr<SDPatternOperator OpNode, LAInst Inst> : Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>; diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll index ab3eec240db3c1..c22acdb4969071 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll @@ -191,7 +191,8 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: addi.w $a0, $a0, 0 -; LA64-NEXT: div.w $a0, $a0, $a1 +; LA64-NEXT: div.d $a0, $a0, $a1 +; LA64-NEXT: addi.w $a0, $a0, 0 ; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32: @@ -207,11 +208,12 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) { ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: addi.w $a1, $a1, 0 ; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 -; LA64-TRAP-NEXT: div.w $a0, $a0, $a1 +; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 ; LA64-TRAP-NEXT: bnez $a1, .LBB5_2 ; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 ; LA64-TRAP-NEXT: .LBB5_2: # %entry +; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 ; LA64-TRAP-NEXT: ret entry: %r = sdiv i32 %a, %b @@ -1151,3 +1153,64 @@ entry: %r = urem i64 %a, %b ret i64 %r } + +define signext i32 @pr107414(i32 signext %x) { +; LA32-LABEL: pr107414: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a2, $a0 +; LA32-NEXT: srai.w $a3, $a0, 31 +; LA32-NEXT: lu12i.w $a0, -266831 +; LA32-NEXT: ori $a0, $a0, 3337 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: bl %plt(__divdi3) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: pr107414: +; LA64: # %bb.0: # %entry +; LA64-NEXT: lu12i.w $a1, -266831 +; LA64-NEXT: ori $a1, $a1, 3337 +; LA64-NEXT: lu32i.d $a1, 0 +; LA64-NEXT: div.d $a0, $a1, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: ret +; +; LA32-TRAP-LABEL: pr107414: +; LA32-TRAP: # %bb.0: # %entry +; LA32-TRAP-NEXT: addi.w $sp, $sp, -16 +; LA32-TRAP-NEXT: .cfi_def_cfa_offset 16 +; LA32-TRAP-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-TRAP-NEXT: .cfi_offset 1, -4 +; LA32-TRAP-NEXT: move $a2, $a0 +; LA32-TRAP-NEXT: srai.w $a3, $a0, 31 +; LA32-TRAP-NEXT: lu12i.w $a0, -266831 +; LA32-TRAP-NEXT: ori $a0, $a0, 3337 +; LA32-TRAP-NEXT: move $a1, $zero +; LA32-TRAP-NEXT: bl %plt(__divdi3) +; LA32-TRAP-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-TRAP-NEXT: addi.w $sp, $sp, 16 +; LA32-TRAP-NEXT: ret +; +; LA64-TRAP-LABEL: pr107414: +; LA64-TRAP: # %bb.0: # %entry +; LA64-TRAP-NEXT: lu12i.w $a1, -266831 +; LA64-TRAP-NEXT: ori $a1, $a1, 3337 +; LA64-TRAP-NEXT: lu32i.d $a1, 0 +; LA64-TRAP-NEXT: div.d $a1, $a1, $a0 +; LA64-TRAP-NEXT: bnez $a0, .LBB32_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry +; LA64-TRAP-NEXT: break 7 +; LA64-TRAP-NEXT: .LBB32_2: # %entry +; LA64-TRAP-NEXT: addi.w $a0, $a1, 0 +; LA64-TRAP-NEXT: ret +entry: + %conv = sext i32 %x to i64 + %div = sdiv i64 3202030857, %conv + %conv1 = trunc i64 %div to i32 + ret i32 %conv1 +} >From 99058521d4c80635f60b2c1442b683395e0ee818 Mon Sep 17 00:00:00 2001 From: hev <wang...@loongson.cn> Date: Tue, 10 Sep 2024 16:52:21 +0800 Subject: [PATCH 2/2] [LoongArch] Eliminate the redundant sign extension of division (#107971) If all incoming values of `div.d` are sign-extended and all users only use the lower 32 bits, then convert them to W versions. Fixes: #107946 (cherry picked from commit 0f47e3aebdd2a4a938468a272ea4224552dbf176) --- llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp | 15 +++++++++++++++ .../ir-instruction/sdiv-udiv-srem-urem.ll | 6 ++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp index abac69054f3b91..ab90409fdf47d0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp @@ -637,6 +637,19 @@ static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST, break; } return false; + // If all incoming values are sign-extended and all users only use + // the lower 32 bits, then convert them to W versions. + case LoongArch::DIV_D: { + if (!AddRegToWorkList(MI->getOperand(1).getReg())) + return false; + if (!AddRegToWorkList(MI->getOperand(2).getReg())) + return false; + if (hasAllWUsers(*MI, ST, MRI)) { + FixableDef.insert(MI); + break; + } + return false; + } } } @@ -651,6 +664,8 @@ static unsigned getWOp(unsigned Opcode) { return LoongArch::ADDI_W; case LoongArch::ADD_D: return LoongArch::ADD_W; + case LoongArch::DIV_D: + return LoongArch::DIV_W; case LoongArch::LD_D: case LoongArch::LD_WU: return LoongArch::LD_W; diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll index c22acdb4969071..c5af79157eaadc 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll @@ -191,8 +191,7 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) { ; LA64: # %bb.0: # %entry ; LA64-NEXT: addi.w $a1, $a1, 0 ; LA64-NEXT: addi.w $a0, $a0, 0 -; LA64-NEXT: div.d $a0, $a0, $a1 -; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: div.w $a0, $a0, $a1 ; LA64-NEXT: ret ; ; LA32-TRAP-LABEL: sdiv_si32_ui32_ui32: @@ -208,12 +207,11 @@ define signext i32 @sdiv_si32_ui32_ui32(i32 %a, i32 %b) { ; LA64-TRAP: # %bb.0: # %entry ; LA64-TRAP-NEXT: addi.w $a1, $a1, 0 ; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 -; LA64-TRAP-NEXT: div.d $a0, $a0, $a1 +; LA64-TRAP-NEXT: div.w $a0, $a0, $a1 ; LA64-TRAP-NEXT: bnez $a1, .LBB5_2 ; LA64-TRAP-NEXT: # %bb.1: # %entry ; LA64-TRAP-NEXT: break 7 ; LA64-TRAP-NEXT: .LBB5_2: # %entry -; LA64-TRAP-NEXT: addi.w $a0, $a0, 0 ; LA64-TRAP-NEXT: ret entry: %r = sdiv i32 %a, %b _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits