https://github.com/AtariDreams created https://github.com/llvm/llvm-project/pull/86728
When folding (ashr (shl, x, c1), c2) we need to treat c1 and c2 as unsigned to find out if the combined shift should be a left or right shift. Also do an early out during pre-legalization in case c1 and c2 has differet types, as that otherwise complicated the comparison of c1 and c2 a bit. (cherry picked from commit 3e6e54eb795ce7a1ccd47df8c22fc08125a88886) >From 9b30b09c19df92572120136368d4435f9e4b77cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Pettersson?= <bjorn.a.petters...@ericsson.com> Date: Tue, 26 Mar 2024 20:53:34 +0100 Subject: [PATCH] [X86] Fix miscompile in combineShiftRightArithmetic (#86597) When folding (ashr (shl, x, c1), c2) we need to treat c1 and c2 as unsigned to find out if the combined shift should be a left or right shift. Also do an early out during pre-legalization in case c1 and c2 has differet types, as that otherwise complicated the comparison of c1 and c2 a bit. (cherry picked from commit 3e6e54eb795ce7a1ccd47df8c22fc08125a88886) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 29 +++++++++-------- llvm/test/CodeGen/X86/sar_fold.ll | 41 +++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 9e64726fb6fff7..71fc6b5047eaa9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47035,10 +47035,13 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) return V; - // fold (ashr (shl, a, [56,48,32,24,16]), SarConst) - // into (shl, (sext (a), [56,48,32,24,16] - SarConst)) or - // into (lshr, (sext (a), SarConst - [56,48,32,24,16])) - // depending on sign of (SarConst - [56,48,32,24,16]) + // fold (SRA (SHL X, ShlConst), SraConst) + // into (SHL (sext_in_reg X), ShlConst - SraConst) + // or (sext_in_reg X) + // or (SRA (sext_in_reg X), SraConst - ShlConst) + // depending on relation between SraConst and ShlConst. + // We only do this if (Size - ShlConst) is equal to 8, 16 or 32. That allows + // us to do the sext_in_reg from corresponding bit. // sexts in X86 are MOVs. The MOVs have the same code size // as above SHIFTs (only SHIFT on 1 has lower code size). @@ -47054,29 +47057,29 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); APInt ShlConst = N01->getAsAPIntVal(); - APInt SarConst = N1->getAsAPIntVal(); + APInt SraConst = N1->getAsAPIntVal(); EVT CVT = N1.getValueType(); - if (SarConst.isNegative()) + if (CVT != N01.getValueType()) + return SDValue(); + if (SraConst.isNegative()) return SDValue(); for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) { unsigned ShiftSize = SVT.getSizeInBits(); - // skipping types without corresponding sext/zext and - // ShlConst that is not one of [56,48,32,24,16] + // Only deal with (Size - ShlConst) being equal to 8, 16 or 32. if (ShiftSize >= Size || ShlConst != Size - ShiftSize) continue; SDLoc DL(N); SDValue NN = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT)); - SarConst = SarConst - (Size - ShiftSize); - if (SarConst == 0) + if (SraConst.eq(ShlConst)) return NN; - if (SarConst.isNegative()) + if (SraConst.ult(ShlConst)) return DAG.getNode(ISD::SHL, DL, VT, NN, - DAG.getConstant(-SarConst, DL, CVT)); + DAG.getConstant(ShlConst - SraConst, DL, CVT)); return DAG.getNode(ISD::SRA, DL, VT, NN, - DAG.getConstant(SarConst, DL, CVT)); + DAG.getConstant(SraConst - ShlConst, DL, CVT)); } return SDValue(); } diff --git a/llvm/test/CodeGen/X86/sar_fold.ll b/llvm/test/CodeGen/X86/sar_fold.ll index 21655e19440afe..0f1396954b03a1 100644 --- a/llvm/test/CodeGen/X86/sar_fold.ll +++ b/llvm/test/CodeGen/X86/sar_fold.ll @@ -44,3 +44,44 @@ define i32 @shl24sar25(i32 %a) #0 { %2 = ashr exact i32 %1, 25 ret i32 %2 } + +define void @shl144sar48(ptr %p) #0 { +; CHECK-LABEL: shl144sar48: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movswl (%eax), %ecx +; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: sarl $31, %edx +; CHECK-NEXT: shldl $2, %ecx, %edx +; CHECK-NEXT: shll $2, %ecx +; CHECK-NEXT: movl %ecx, 12(%eax) +; CHECK-NEXT: movl %edx, 16(%eax) +; CHECK-NEXT: movl $0, 8(%eax) +; CHECK-NEXT: movl $0, 4(%eax) +; CHECK-NEXT: movl $0, (%eax) +; CHECK-NEXT: retl + %a = load i160, ptr %p + %1 = shl i160 %a, 144 + %2 = ashr exact i160 %1, 46 + store i160 %2, ptr %p + ret void +} + +define void @shl144sar2(ptr %p) #0 { +; CHECK-LABEL: shl144sar2: +; CHECK: # %bb.0: +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movswl (%eax), %ecx +; CHECK-NEXT: shll $14, %ecx +; CHECK-NEXT: movl %ecx, 16(%eax) +; CHECK-NEXT: movl $0, 8(%eax) +; CHECK-NEXT: movl $0, 12(%eax) +; CHECK-NEXT: movl $0, 4(%eax) +; CHECK-NEXT: movl $0, (%eax) +; CHECK-NEXT: retl + %a = load i160, ptr %p + %1 = shl i160 %a, 144 + %2 = ashr exact i160 %1, 2 + store i160 %2, ptr %p + ret void +} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits