Author: Sanjay Patel Date: 2021-12-14T16:00:42-05:00 New Revision: 1a60ae02c65d26981017f59bc5918d3c2e363bfd
URL: https://github.com/llvm/llvm-project/commit/1a60ae02c65d26981017f59bc5918d3c2e363bfd DIFF: https://github.com/llvm/llvm-project/commit/1a60ae02c65d26981017f59bc5918d3c2e363bfd.diff LOG: [InstCombine] fold mask-with-signbit-splat to icmp+select ~(iN X s>> (N-1)) & Y --> (X s< 0) ? 0 : Y https://alive2.llvm.org/ce/z/JKlQ9x This is similar to D111410 / 727e642e970d028049d , but it includes a 'not' of the signbit and so it saves an instruction in the basic pattern. DAGCombiner or target-specific folds can expand this back into bit-hacks. The diffs in the logical-select tests are not true regressions - running early-cse and another round of instcombine is expected in a normal opt pipeline, and that reduces back to a minimal form as shown in the duplicated PhaseOrdering test. I have no understanding of the SystemZ diffs, so I made the minimal edits suggested by FileCheck to make that test pass again. That whole test file is wrong though. It is running the entire optimizer (-O2) to check IR, and then topping that by even running codegen and checking asm. It needs to be split up. Fixes #52631 Added: Modified: clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp llvm/test/Transforms/InstCombine/and.ll llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll llvm/test/Transforms/InstCombine/logical-select.ll llvm/test/Transforms/InstCombine/vec_sext.ll llvm/test/Transforms/PhaseOrdering/vector-select.ll Removed: ################################################################################ diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c index 7cd4a951741f0..38f0c2908825a 100644 --- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c +++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c @@ -3289,13 +3289,13 @@ void test_integer(void) { // CHECK-ASM: vsrlb vsc = vec_abs(vsc); - // CHECK-ASM: vlpb + // CHECK-ASM: vlcb vss = vec_abs(vss); - // CHECK-ASM: vlph + // CHECK-ASM: vlch vsi = vec_abs(vsi); - // CHECK-ASM: vlpf + // CHECK-ASM: vlcf vsl = vec_abs(vsl); - // CHECK-ASM: vlpg + // CHECK-ASM: vlcg vsc = vec_max(vsc, vsc); // CHECK-ASM: vmxb diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 9023619b14280..08cd1a7f97e60 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2133,6 +2133,15 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg"); return SelectInst::Create(Cmp, Y, Zero); } + // If there's a 'not' of the shifted value, swap the select operands: + // ~(iN X s>> (N-1)) & Y --> (X s< 0) ? 0 : Y + if (match(&I, m_c_And(m_OneUse(m_Not( + m_AShr(m_Value(X), m_SpecificInt(FullShift)))), + m_Value(Y)))) { + Constant *Zero = ConstantInt::getNullValue(Ty); + Value *Cmp = Builder.CreateICmpSLT(X, Zero, "isneg"); + return SelectInst::Create(Cmp, Zero, Y); + } // (~x) & y --> ~(x | (~y)) iff that gets rid of inversions if (sinkNotIntoOtherHandOfAndOrOr(I)) diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll index edaef78b631d8..53c7f09189ff5 100644 --- a/llvm/test/Transforms/InstCombine/and.ll +++ b/llvm/test/Transforms/InstCombine/and.ll @@ -1463,9 +1463,8 @@ define i8 @lshr_bitwidth_mask(i8 %x, i8 %y) { define i8 @not_ashr_bitwidth_mask(i8 %x, i8 %y) { ; CHECK-LABEL: @not_ashr_bitwidth_mask( -; CHECK-NEXT: [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7 -; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[SIGN]], -1 -; CHECK-NEXT: [[POS_OR_ZERO:%.*]] = and i8 [[NOT]], [[Y:%.*]] +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i8 [[X:%.*]], 0 +; CHECK-NEXT: [[POS_OR_ZERO:%.*]] = select i1 [[ISNEG]], i8 0, i8 [[Y:%.*]] ; CHECK-NEXT: ret i8 [[POS_OR_ZERO]] ; %sign = ashr i8 %x, 7 @@ -1477,9 +1476,8 @@ define i8 @not_ashr_bitwidth_mask(i8 %x, i8 %y) { define <2 x i8> @not_ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) { ; CHECK-LABEL: @not_ashr_bitwidth_mask_vec_commute( ; CHECK-NEXT: [[Y:%.*]] = mul <2 x i8> [[PY:%.*]], <i8 42, i8 2> -; CHECK-NEXT: [[SIGN:%.*]] = ashr <2 x i8> [[X:%.*]], <i8 7, i8 7> -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i8> [[SIGN]], <i8 -1, i8 -1> -; CHECK-NEXT: [[POS_OR_ZERO:%.*]] = and <2 x i8> [[Y]], [[NOT]] +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <2 x i8> [[X:%.*]], zeroinitializer +; CHECK-NEXT: [[POS_OR_ZERO:%.*]] = select <2 x i1> [[ISNEG]], <2 x i8> zeroinitializer, <2 x i8> [[Y]] ; CHECK-NEXT: ret <2 x i8> [[POS_OR_ZERO]] ; %y = mul <2 x i8> %py, <i8 42, i8 2> ; thwart complexity-based ordering @@ -1489,12 +1487,14 @@ define <2 x i8> @not_ashr_bitwidth_mask_vec_commute(<2 x i8> %x, <2 x i8> %py) { ret <2 x i8> %pos_or_zero } +; extra use of shift is ok + define i8 @not_ashr_bitwidth_mask_use1(i8 %x, i8 %y) { ; CHECK-LABEL: @not_ashr_bitwidth_mask_use1( ; CHECK-NEXT: [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7 ; CHECK-NEXT: call void @use8(i8 [[SIGN]]) -; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[SIGN]], -1 -; CHECK-NEXT: [[R:%.*]] = and i8 [[NOT]], [[Y:%.*]] +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i8 [[X]], 0 +; CHECK-NEXT: [[R:%.*]] = select i1 [[ISNEG]], i8 0, i8 [[Y:%.*]] ; CHECK-NEXT: ret i8 [[R]] ; %sign = ashr i8 %x, 7 @@ -1504,6 +1504,8 @@ define i8 @not_ashr_bitwidth_mask_use1(i8 %x, i8 %y) { ret i8 %r } +; negative test - extra use + define i8 @not_ashr_bitwidth_mask_use2(i8 %x, i8 %y) { ; CHECK-LABEL: @not_ashr_bitwidth_mask_use2( ; CHECK-NEXT: [[SIGN:%.*]] = ashr i8 [[X:%.*]], 7 @@ -1519,6 +1521,8 @@ define i8 @not_ashr_bitwidth_mask_use2(i8 %x, i8 %y) { ret i8 %r } +; negative test - wrong shift amount + define i8 @not_ashr_not_bitwidth_mask(i8 %x, i8 %y) { ; CHECK-LABEL: @not_ashr_not_bitwidth_mask( ; CHECK-NEXT: [[SIGN:%.*]] = ashr i8 [[X:%.*]], 6 @@ -1532,6 +1536,8 @@ define i8 @not_ashr_not_bitwidth_mask(i8 %x, i8 %y) { ret i8 %r } +; negative test - wrong shift opcode + define i8 @not_lshr_bitwidth_mask(i8 %x, i8 %y) { ; CHECK-LABEL: @not_lshr_bitwidth_mask( ; CHECK-NEXT: [[SIGN:%.*]] = lshr i8 [[X:%.*]], 7 diff --git a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll index 43e97cca05bab..2d05c264db825 100644 --- a/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/logical-select-inseltpoison.ll @@ -582,9 +582,12 @@ define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) { ; CHECK-LABEL: @allSignBits( -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt i32 [[COND:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTNOT]], i32 [[FVAL:%.*]], i32 [[TVAL:%.*]] -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt i32 [[COND:%.*]], 0 +; CHECK-NEXT: [[A1:%.*]] = select i1 [[ISNEG1]], i32 [[TVAL:%.*]], i32 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[COND]], 0 +; CHECK-NEXT: [[A2:%.*]] = select i1 [[ISNEG]], i32 0, i32 [[FVAL:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = or i32 [[A1]], [[A2]] +; CHECK-NEXT: ret i32 [[SEL]] ; %bitmask = ashr i32 %cond, 31 %not_bitmask = xor i32 %bitmask, -1 @@ -596,9 +599,12 @@ define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) { define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) { ; CHECK-LABEL: @allSignBits_vec( -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1> -; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]] -; CHECK-NEXT: ret <4 x i8> [[TMP1]] +; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer +; CHECK-NEXT: [[A1:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i8> [[TVAL:%.*]], <4 x i8> zeroinitializer +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <4 x i8> [[COND]], zeroinitializer +; CHECK-NEXT: [[A2:%.*]] = select <4 x i1> [[ISNEG]], <4 x i8> zeroinitializer, <4 x i8> [[FVAL:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = or <4 x i8> [[A2]], [[A1]] +; CHECK-NEXT: ret <4 x i8> [[SEL]] ; %bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7> %not_bitmask = xor <4 x i8> %bitmask, <i8 -1, i8 -1, i8 -1, i8 -1> diff --git a/llvm/test/Transforms/InstCombine/logical-select.ll b/llvm/test/Transforms/InstCombine/logical-select.ll index 5093f20cba0c4..c23f851bf8da7 100644 --- a/llvm/test/Transforms/InstCombine/logical-select.ll +++ b/llvm/test/Transforms/InstCombine/logical-select.ll @@ -583,9 +583,12 @@ define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) { ; CHECK-LABEL: @allSignBits( -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt i32 [[COND:%.*]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = select i1 [[DOTNOT]], i32 [[FVAL:%.*]], i32 [[TVAL:%.*]] -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt i32 [[COND:%.*]], 0 +; CHECK-NEXT: [[A1:%.*]] = select i1 [[ISNEG1]], i32 [[TVAL:%.*]], i32 0 +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt i32 [[COND]], 0 +; CHECK-NEXT: [[A2:%.*]] = select i1 [[ISNEG]], i32 0, i32 [[FVAL:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = or i32 [[A1]], [[A2]] +; CHECK-NEXT: ret i32 [[SEL]] ; %bitmask = ashr i32 %cond, 31 %not_bitmask = xor i32 %bitmask, -1 @@ -597,9 +600,12 @@ define i32 @allSignBits(i32 %cond, i32 %tval, i32 %fval) { define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) { ; CHECK-LABEL: @allSignBits_vec( -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1> -; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]] -; CHECK-NEXT: ret <4 x i8> [[TMP1]] +; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer +; CHECK-NEXT: [[A1:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i8> [[TVAL:%.*]], <4 x i8> zeroinitializer +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <4 x i8> [[COND]], zeroinitializer +; CHECK-NEXT: [[A2:%.*]] = select <4 x i1> [[ISNEG]], <4 x i8> zeroinitializer, <4 x i8> [[FVAL:%.*]] +; CHECK-NEXT: [[SEL:%.*]] = or <4 x i8> [[A2]], [[A1]] +; CHECK-NEXT: ret <4 x i8> [[SEL]] ; %bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7> %not_bitmask = xor <4 x i8> %bitmask, <i8 -1, i8 -1, i8 -1, i8 -1> diff --git a/llvm/test/Transforms/InstCombine/vec_sext.ll b/llvm/test/Transforms/InstCombine/vec_sext.ll index 39bd40874160b..93107e38365ee 100644 --- a/llvm/test/Transforms/InstCombine/vec_sext.ll +++ b/llvm/test/Transforms/InstCombine/vec_sext.ll @@ -4,9 +4,12 @@ define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @vec_select( ; CHECK-NEXT: [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[SUB]] -; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer +; CHECK-NEXT: [[T2:%.*]] = select <4 x i1> [[ISNEG]], <4 x i32> zeroinitializer, <4 x i32> [[A]] +; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt <4 x i32> [[B]], zeroinitializer +; CHECK-NEXT: [[T3:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i32> [[SUB]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[COND:%.*]] = or <4 x i32> [[T2]], [[T3]] +; CHECK-NEXT: ret <4 x i32> [[COND]] ; %cmp = icmp slt <4 x i32> %b, zeroinitializer %sext = sext <4 x i1> %cmp to <4 x i32> @@ -23,9 +26,12 @@ define <4 x i32> @vec_select(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @vec_select_alternate_sign_bit_test(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: @vec_select_alternate_sign_bit_test( ; CHECK-NEXT: [[SUB:%.*]] = sub nsw <4 x i32> zeroinitializer, [[A:%.*]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[B:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[SUB]], <4 x i32> [[A]] -; CHECK-NEXT: ret <4 x i32> [[TMP2]] +; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt <4 x i32> [[B:%.*]], zeroinitializer +; CHECK-NEXT: [[T2:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i32> [[A]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <4 x i32> [[B]], zeroinitializer +; CHECK-NEXT: [[T3:%.*]] = select <4 x i1> [[ISNEG]], <4 x i32> zeroinitializer, <4 x i32> [[SUB]] +; CHECK-NEXT: [[COND:%.*]] = or <4 x i32> [[T2]], [[T3]] +; CHECK-NEXT: ret <4 x i32> [[COND]] ; %cmp = icmp sgt <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1> %sext = sext <4 x i1> %cmp to <4 x i32> diff --git a/llvm/test/Transforms/PhaseOrdering/vector-select.ll b/llvm/test/Transforms/PhaseOrdering/vector-select.ll index 3533c9e846a94..84876b24c5d9b 100644 --- a/llvm/test/Transforms/PhaseOrdering/vector-select.ll +++ b/llvm/test/Transforms/PhaseOrdering/vector-select.ll @@ -3,16 +3,9 @@ define <3 x float> @PR52631(<3 x float> %a, <3 x float> %b, <3 x i32> %c) { ; CHECK-LABEL: @PR52631( -; CHECK-NEXT: [[ASTYPE:%.*]] = bitcast <3 x float> [[B:%.*]] to <3 x i32> -; CHECK-NEXT: [[ISNEG:%.*]] = icmp slt <3 x i32> [[C:%.*]], zeroinitializer -; CHECK-NEXT: [[AND:%.*]] = select <3 x i1> [[ISNEG]], <3 x i32> [[ASTYPE]], <3 x i32> zeroinitializer -; CHECK-NEXT: [[C_LOBIT2:%.*]] = ashr <3 x i32> [[C]], <i32 31, i32 31, i32 31> -; CHECK-NEXT: [[C_LOBIT2_NOT:%.*]] = xor <3 x i32> [[C_LOBIT2]], <i32 -1, i32 -1, i32 -1> -; CHECK-NEXT: [[ASTYPE28:%.*]] = bitcast <3 x float> [[A:%.*]] to <3 x i32> -; CHECK-NEXT: [[AND29:%.*]] = and <3 x i32> [[C_LOBIT2_NOT]], [[ASTYPE28]] -; CHECK-NEXT: [[OR:%.*]] = or <3 x i32> [[AND29]], [[AND]] -; CHECK-NEXT: [[ASTYPE33:%.*]] = bitcast <3 x i32> [[OR]] to <3 x float> -; CHECK-NEXT: ret <3 x float> [[ASTYPE33]] +; CHECK-NEXT: [[ISNEG3:%.*]] = icmp slt <3 x i32> [[C:%.*]], zeroinitializer +; CHECK-NEXT: [[OR_V:%.*]] = select <3 x i1> [[ISNEG3]], <3 x float> [[B:%.*]], <3 x float> [[A:%.*]] +; CHECK-NEXT: ret <3 x float> [[OR_V]] ; %a.addr = alloca <3 x float>, align 16 %b.addr = alloca <3 x float>, align 16 @@ -85,9 +78,9 @@ define <3 x float> @PR52631(<3 x float> %a, <3 x float> %b, <3 x i32> %c) { define <4 x i8> @allSignBits_vec(<4 x i8> %cond, <4 x i8> %tval, <4 x i8> %fval) { ; CHECK-LABEL: @allSignBits_vec( -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp sgt <4 x i8> [[COND:%.*]], <i8 -1, i8 -1, i8 -1, i8 -1> -; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> [[DOTNOT]], <4 x i8> [[FVAL:%.*]], <4 x i8> [[TVAL:%.*]] -; CHECK-NEXT: ret <4 x i8> [[TMP1]] +; CHECK-NEXT: [[ISNEG1:%.*]] = icmp slt <4 x i8> [[COND:%.*]], zeroinitializer +; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[ISNEG1]], <4 x i8> [[TVAL:%.*]], <4 x i8> [[FVAL:%.*]] +; CHECK-NEXT: ret <4 x i8> [[SEL]] ; %bitmask = ashr <4 x i8> %cond, <i8 7, i8 7, i8 7, i8 7> %not_bitmask = xor <4 x i8> %bitmask, <i8 -1, i8 -1, i8 -1, i8 -1> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits