llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-aarch64 Author: Amara Emerson (aemerson) <details> <summary>Changes</summary> This is essentially a port of TargetLowering::scalarizeVectorStore(), which is used for the case where we have something like a store of <8 x s8> truncating to <8 x s1> in memory. The naive lowering is a sequence of extracts to compute a scalar value to store. AArch64's DAG implementation has some more smarts to improve this further which we can do later. --- Patch is 62.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121169.diff 5 Files Affected: - (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+33-2) - (modified) llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp (+2-1) - (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir (+54-5) - (modified) llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir (+61-7) - (modified) llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll (+928-300) ``````````diff diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 6e87acef6ca7d3..7ffd00bf4cd689 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -4142,9 +4142,40 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) { } if (MemTy.isVector()) { - // TODO: Handle vector trunc stores - if (MemTy != SrcTy) + LLT MemScalarTy = MemTy.getElementType(); + if (MemTy != SrcTy) { + if (!MemScalarTy.isByteSized()) { + // We need to build an integer scalar of the vector bit pattern. + // It's not legal for us to add padding when storing a vector. + unsigned NumBits = MemTy.getSizeInBits(); + LLT IntTy = LLT::scalar(NumBits); + auto CurrVal = MIRBuilder.buildConstant(IntTy, 0); + LLT IdxTy = getLLTForMVT(TLI.getVectorIdxTy(MF.getDataLayout())); + + for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) { + auto Elt = MIRBuilder.buildExtractVectorElement( + SrcTy.getElementType(), SrcReg, + MIRBuilder.buildConstant(IdxTy, I)); + auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt); + auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc); + unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian() + ? (MemTy.getNumElements() - 1) - I + : I; + auto ShiftAmt = MIRBuilder.buildConstant( + IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits()); + auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt); + CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted); + } + auto PtrInfo = MMO.getPointerInfo(); + auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy); + MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO); + StoreMI.eraseFromParent(); + return Legalized; + } + + // FIXME: implement simple scalarization. return UnableToLegalize; + } // TODO: We can do better than scalarizing the vector and at least split it // in half. diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 16b0587e799c7b..062e7ace5e724d 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -474,7 +474,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) }) .customIf(IsPtrVecPred) .scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0) - .scalarizeIf(scalarOrEltWiderThan(0, 64), 0); + .scalarizeIf(scalarOrEltWiderThan(0, 64), 0) + .lower(); getActionDefinitionsBuilder(G_INDEXED_STORE) // Idx 0 == Ptr, Idx 1 == Val diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir index be5699dab5b6de..e8e1cd351c56e3 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s --- name: scalar_to_oversize_vector tracksRegLiveness: true @@ -48,17 +48,66 @@ body: | G_BR %bb.2 ... -# This test currently is expected to fall back after reaching truncstore of <8 x s8> as <8 x s1>. --- name: boolean_vector_to_scalar tracksRegLiveness: true body: | bb.1: ; CHECK-LABEL: name: boolean_vector_to_scalar - ; CHECK: %vec:_(<8 x s1>) = G_IMPLICIT_DEF + ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s8>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s8>) = G_ANYEXT %vec(<8 x s1>) - ; CHECK-NEXT: G_STORE [[ANYEXT]](<8 x s8>), [[FRAME_INDEX]](p0) :: (store (<8 x s1>) into %stack.0) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C]](s64) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C3]](s64) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC1]](s8) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s64) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C4]](s64) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC2]](s8) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C4]](s64) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C5]](s64) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC3]](s8) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s64) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C6]](s64) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC4]](s8) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C1]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C6]](s64) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C7]](s64) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC5]](s8) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C1]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s64) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C8]](s64) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC6]](s8) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C1]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C8]](s64) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 + ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C9]](s64) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC7]](s8) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C1]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s64) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR7]](s32) + ; CHECK-NEXT: G_STORE [[TRUNC]](s8), [[FRAME_INDEX]](p0) :: (store (s8) into %stack.0) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s8) from %stack.0) ; CHECK-NEXT: %bc:_(s8) = COPY [[LOAD]](s8) ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %bc(s8) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir index de70f89461780b..1df6297e363833 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 -# RUN: llc -O0 -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s -# This test currently is expected to fall back after reaching truncstore of <8 x s8> as <8 x s1>. +# RUN: llc -O0 -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s --- name: store_8xs1 tracksRegLiveness: true @@ -13,12 +12,67 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1 ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 - ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(slt), [[CONCAT_VECTORS]](<8 x s32>), [[BUILD_VECTOR]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s8>) = G_ANYEXT [[ICMP]](<8 x s1>) - ; CHECK-NEXT: G_STORE [[ANYEXT]](<8 x s8>), %ptr(p0) :: (store (<8 x s1>)) + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(slt), [[COPY]](<4 x s32>), [[BUILD_VECTOR]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(slt), [[COPY1]](<4 x s32>), [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>) + ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>) + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C1]](s64) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C3]](s64) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC1]](s8) + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]] + ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s64) + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 + ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C4]](s64) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC2]](s8) + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]] + ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C4]](s64) + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]] + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C5]](s64) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC3]](s8) + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]] + ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s64) + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]] + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C6]](s64) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC4]](s8) + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C2]] + ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C6]](s64) + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]] + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C7]](s64) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC5]](s8) + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C2]] + ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s64) + ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]] + ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6 + ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C8]](s64) + ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC6]](s8) + ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C2]] + ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C8]](s64) + ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]] + ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7 + ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C9]](s64) + ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC7]](s8) + ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C2]] + ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s64) + ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]] + ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR7]](s32) + ; CHECK-NEXT: G_STORE [[TRUNC3]](s8), %ptr(p0) :: (store (s8)) ; CHECK-NEXT: RET_ReallyLR %1:_(<4 x s32>) = COPY $q0 %2:_(<4 x s32>) = COPY $q1 diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll index 7f2eefe5ed72f6..496f7ebf300e50 100644 --- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll +++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll @@ -1,26 +1,100 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,SDAG +; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -global-isel -global-isel-abort=2 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,GISEL ; Basic tests from input vector to bitmask ; IR generated from clang for: ; __builtin_convertvector + reinterpret_cast<uint16&> +; GISEL: warning: Instruction selection used fallback path for convert_to_bitmask4 +; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask2 +; GISEL-NEXT: warning: Instruction selection used fallback path for clang_builtins_undef_concat_convert_to_bitmask4 +; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_no_compare +; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_compare_chain +; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_trunc_in_chain +; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_unknown_type_in_long_chain +; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_different_types_in_chain +; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_2xi32 +; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_4xi8 +; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_8xi2 +; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_float +; GISEL-NEXT: warning: Instruction selection used fallback path for convert_legalized_illegal_element_size +; GISEL-NEXT: warning: Instruction selection used fallback path for no_direct_convert_for_bad_concat +; GISEL-NEXT: warning: Instruction selection used fallback path for no_combine_illegal_num_elements + define i16 @convert_to_bitmask16(<16 x i8> %vec) { ; Bits used in mask -; CHECK-LABEL: convert_to_bitmask16: -; CHECK: ; %bb.0: -; CHECK-NEXT: Lloh0: -; CHECK-NEXT: adrp x8, lCPI0_0@PAGE -; CHECK-NEXT: cmeq.16b v0, v0, #0 -; CHECK-NEXT: Lloh1: -; CHECK-NEXT: ldr q1, [x8, lCPI0_0@PAGEOFF] -; CHECK-NEXT: bic.16b v0, v1, v0 -; CHECK-NEXT: ext.16b v1, v0, v0, #8 -; CHECK-NEXT: zip1.16b v0, v0, v1 -; CHECK-NEXT: addv.8h h0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 +; SDAG-LABEL: convert_to_bitmask16: +; SDAG: ; %bb.0: +; SDAG-NEXT: Lloh0: +; SDAG-NEXT: adrp x8, lCPI0_0@PAGE +; SDAG-NEXT: cmeq.16b v0, v0, #0 +; SDAG-NEXT: Lloh1: +; SDAG-NEXT: ldr q1, [x8, lCPI0_0@PAGEOFF] +; SDAG-NEXT: bic.16b v0, v1, v0 +; SDAG-NEXT: ext.16b v1, v0, v0, #8 +; SDAG-NEXT: zip1.16b v0, v0, v1 +; SDAG-NEXT: addv.8h h0, v0 +; SDAG-NEXT: fmov w0, s0 +; SDAG-NEXT: ret +; SDAG-NEXT: .loh AdrpLdr Lloh0, Lloh1 +; +; GISEL-LABEL: convert_to_bitmask16: +; GISEL: ; %bb.0: +; GISEL-NEXT: sub sp, sp, #16 +; GISEL-NEXT: .cfi_def_cfa_offset 16 +; GISEL-NEXT: cmeq.16b v0, v0, #0 +; GISEL-NEXT: mvn.16b v0, v0 +; GISEL-NEXT: umov.b w8, v0[1] +; GISEL-NEXT: umov.b w9, v0[0] +; GISEL-NEXT: umov.b w10, v0[2] +; GISEL-NEXT: umov.b w11, v0[3] +; GISEL-NEXT: and w8, w8, #0x1 +; GISEL-NEXT: bfi w9, w8, #1, #31 +; GISEL-NEXT: and w8, w10, #0x1 +; GISEL-NEXT: umov.b w10, v0[4] +; GISEL-NEXT: orr w8, w9, w8, lsl #2 +; GISEL-NEXT: and w9, w11, #0x1 +; GISEL-NEXT: umov.b w11, v0[5] +; GISEL-NEXT: orr w8, w8, w9, lsl #3 +; GISEL-NEXT: and w9, w10, #0x1 +; GISEL-NEXT: umov.b w10, v0[6] +; GISEL-NEXT: orr w8, w8, w9, lsl #4 +; GISEL-NEXT: and w9, w11, #0x1 +; GISEL-NEXT: umov.b w11, v0[7] +; GISEL-NEXT: orr w8, w8, w9, lsl #5 +; GISEL-NEXT: and w9, w10, #0x1 +; GISEL-NEXT: umov.b w10, v0[8] +; GISEL-NEXT: orr w8, w8, w9, lsl #6 +; GISEL-NEXT: and w9, w11, #0x1 +; GISEL-NEXT: umov.b w11, v0[9] +; GISEL-NEXT: orr w8, w8, w9, lsl #7 +; GISEL-NEXT: and w9, w10, #0x1 +; GISEL-NEXT: umov.b w10, v0[10] +; GISEL-NEXT: orr w8, w8, w9, lsl #8 +; GISEL-NEXT: and w9, w11, #0x1 +; GISEL-NEXT: umov.b w11, v0[11] +; GISEL-NEXT: orr w8, w8, w9, lsl #9 +; GISEL-NEXT: and w9, w10, #0x1 +; GISEL-NEXT: umov.b w10, v0[12] +; GISEL-NEXT: orr w8, w8, w9, lsl #10 +; GISEL-NEXT: and w9, w11, #0x1 +; GISEL-NEXT: umov.b w11, v0[13] +; GISEL-NEXT: orr w8, w8, w9, lsl #11 +; GISEL-NEXT: and w9, w10, #0x1 +; GISEL-NEXT: umov.b w10, v0[14] +; GISEL-NEXT: orr w8, w8, w9, lsl #12 +; GISEL-NEXT: and w9, w11, #0x1 +; GISEL-NEXT: umov.b w11, v0[15] +; GISEL-NEXT: orr w8, w8, w9, lsl #13 +; GISEL-NEXT: and w9, w10, #0x1 +; GISEL-NEXT: orr w8, w8, w9, lsl #14 +; GISEL-NEXT: and w9, w11, #0x1 +; GISEL-NEXT: orr w8, w8, w9, lsl #15 +; GISEL-NEXT: strh w8, [sp, #14] +; GISEL-NEXT: and w0, w8, #0xffff +; GISEL-NEXT: add sp, sp, #16 +; GISEL-NEXT: ret ; Actual conversion @@ -30,19 +104,53 @@ define i16 @convert_to_bitmask16(<16 x i8> %vec) { } define i16 @convert_to_bitmask8(<8 x i16> %vec) { -; CHECK-LABEL: convert_to_bitmask8: -; CHECK: ; %bb.0: -; CHECK-NEXT: Lloh2: -; CHECK-NEXT: adrp x8, lCPI1_0@PAGE -; CHECK-NEXT: cme... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/121169 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits