Author: Bradley Smith Date: 2020-12-16T13:39:04Z New Revision: e0b9c5df260c9d778be3b6ee56e6d0ffd7af9be7
URL: https://github.com/llvm/llvm-project/commit/e0b9c5df260c9d778be3b6ee56e6d0ffd7af9be7 DIFF: https://github.com/llvm/llvm-project/commit/e0b9c5df260c9d778be3b6ee56e6d0ffd7af9be7.diff LOG: [CostModel] Add costs for llvm.experimental.vector.{extract,insert} intrinsics Adds cost model support for the new llvm.experimental.vector.{extract,insert} intrinsics, using the existing getExtractSubvectorOverhead and getInsertSubvectorOverhead functions for shuffles. Previously this case would throw an assertion. Differential Revision: https://reviews.llvm.org/D93043 Added: llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll Modified: llvm/include/llvm/CodeGen/BasicTTIImpl.h Removed: ################################################################################ diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index 7dca7cd291c9..02f1b73226fc 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -114,12 +114,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { /// Estimate a cost of subvector extraction as a sequence of extract and /// insert operations. - unsigned getExtractSubvectorOverhead(FixedVectorType *VTy, int Index, + unsigned getExtractSubvectorOverhead(VectorType *VTy, int Index, FixedVectorType *SubVTy) { assert(VTy && SubVTy && "Can only extract subvectors from vectors"); int NumSubElts = SubVTy->getNumElements(); - assert((Index + NumSubElts) <= (int)VTy->getNumElements() && + assert((!isa<FixedVectorType>(VTy) || + (Index + NumSubElts) <= + (int)cast<FixedVectorType>(VTy)->getNumElements()) && "SK_ExtractSubvector index out of range"); unsigned Cost = 0; @@ -137,12 +139,14 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { /// Estimate a cost of subvector insertion as a sequence of extract and /// insert operations. - unsigned getInsertSubvectorOverhead(FixedVectorType *VTy, int Index, + unsigned getInsertSubvectorOverhead(VectorType *VTy, int Index, FixedVectorType *SubVTy) { assert(VTy && SubVTy && "Can only insert subvectors into vectors"); int NumSubElts = SubVTy->getNumElements(); - assert((Index + NumSubElts) <= (int)VTy->getNumElements() && + assert((!isa<FixedVectorType>(VTy) || + (Index + NumSubElts) <= + (int)cast<FixedVectorType>(VTy)->getNumElements()) && "SK_InsertSubvector index out of range"); unsigned Cost = 0; @@ -723,10 +727,10 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { case TTI::SK_PermuteTwoSrc: return getPermuteShuffleOverhead(cast<FixedVectorType>(Tp)); case TTI::SK_ExtractSubvector: - return getExtractSubvectorOverhead(cast<FixedVectorType>(Tp), Index, + return getExtractSubvectorOverhead(Tp, Index, cast<FixedVectorType>(SubTp)); case TTI::SK_InsertSubvector: - return getInsertSubvectorOverhead(cast<FixedVectorType>(Tp), Index, + return getInsertSubvectorOverhead(Tp, Index, cast<FixedVectorType>(SubTp)); } llvm_unreachable("Unknown TTI::ShuffleKind"); @@ -1255,6 +1259,26 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0], VarMask, Alignment, CostKind, I); } + case Intrinsic::experimental_vector_extract: { + // FIXME: Handle case where a scalable vector is extracted from a scalable + // vector + if (isa<ScalableVectorType>(RetTy)) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue(); + return thisT()->getShuffleCost(TTI::SK_ExtractSubvector, + cast<VectorType>(Args[0]->getType()), + Index, cast<VectorType>(RetTy)); + } + case Intrinsic::experimental_vector_insert: { + // FIXME: Handle case where a scalable vector is inserted into a scalable + // vector + if (isa<ScalableVectorType>(Args[1]->getType())) + return BaseT::getIntrinsicInstrCost(ICA, CostKind); + unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue(); + return thisT()->getShuffleCost( + TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), Index, + cast<VectorType>(Args[1]->getType())); + } case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_mul: case Intrinsic::vector_reduce_and: diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll new file mode 100644 index 000000000000..9523e17cb5de --- /dev/null +++ b/llvm/test/Analysis/CostModel/AArch64/sve-getIntrinsicInstrCost-vec-insert-extract.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -cost-model -analyze -mtriple=aarch64-linux-gnu -mattr=+sve | FileCheck %s + +define <16 x i32> @extract_cost(<vscale x 4 x i32> %vec) { +; CHECK-LABEL: 'extract_cost' +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ret = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %ret + + %ret = call <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32> %vec, i64 0) + ret <16 x i32> %ret +} + +define <vscale x 4 x i32> @insert_cost(<vscale x 4 x i32> %vec, <16 x i32> %subVec) { +; CHECK-LABEL: 'insert_cost' +; CHECK-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %ret = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %vec, <16 x i32> %subVec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 4 x i32> %ret + + %ret = call <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32> %vec, <16 x i32> %subVec, i64 0) + ret <vscale x 4 x i32> %ret +} + +define <vscale x 4 x i32> @extract_cost_scalable(<vscale x 16 x i32> %vec) { +; CHECK-LABEL: 'extract_cost_scalable' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ret = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 4 x i32> %ret + + %ret = call <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> %vec, i64 0) + ret <vscale x 4 x i32> %ret +} + +define <vscale x 16 x i32> @insert_cost_scalable(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subVec) { +; CHECK-LABEL: 'insert_cost_scalable' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ret = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subVec, i64 0) +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <vscale x 16 x i32> %ret + + %ret = call <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> %vec, <vscale x 4 x i32> %subVec, i64 0) + ret <vscale x 16 x i32> %ret +} + +declare <16 x i32> @llvm.experimental.vector.extract.v16i32.nxv4i32(<vscale x 4 x i32>, i64) +declare <vscale x 4 x i32> @llvm.experimental.vector.insert.nxv4i32.v16i32(<vscale x 4 x i32>, <16 x i32>, i64) +declare <vscale x 4 x i32> @llvm.experimental.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32>, i64) +declare <vscale x 16 x i32> @llvm.experimental.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32>, <vscale x 4 x i32>, i64) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits