https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/102641
Backport 0b745a10843fc85e579bbf459f78b3f43e7ab309 Requested by: @davemgreen >From f83753954d8f94dc7573e210e7fb2300803c0012 Mon Sep 17 00:00:00 2001 From: David Green <david.gr...@arm.com> Date: Fri, 9 Aug 2024 14:25:07 +0100 Subject: [PATCH] [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (#102105) The code-generator is currently not able to handle scalable vectors of <vscale x 1 x eltty>. The usual "fix" for this until it is supported is to mark the costs of loads/stores with an invalid cost, preventing the vectorizer from vectorizing at those factors. But on rare occasions loops do not contain load/stores, only reductions. So whilst this is still unsupported return an invalid cost to avoid selecting vscale x 1 VFs. The cost of a reduction is not currently used by the vectorizer so this adds the cost to the add/mul/and/or/xor or min/max that should feed the reduction. It includes reduction costs too, for completeness. This change will be removed when code-generation for these types is sufficiently reliable. Fixes #99760 (cherry picked from commit 0b745a10843fc85e579bbf459f78b3f43e7ab309) --- .../AArch64/AArch64TargetTransformInfo.cpp | 32 +++++++++++++++++ .../CostModel/AArch64/arith-fp-sve.ll | 4 +++ .../Analysis/CostModel/AArch64/cttz_elts.ll | 2 ++ .../Analysis/CostModel/AArch64/sve-arith.ll | 21 +++++++++++ .../CostModel/AArch64/sve-intrinsics.ll | 36 +++++++++++++++++++ .../Analysis/CostModel/AArch64/sve-min-max.ll | 12 +++++++ 6 files changed, 107 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 45148449dfb821..6a3efd587ac3f3 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -540,7 +540,15 @@ static InstructionCost getHistogramCost(const IntrinsicCostAttributes &ICA) { InstructionCost AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. auto *RetTy = ICA.getReturnType(); + if (auto *VTy = dyn_cast<ScalableVectorType>(RetTy)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + switch (ICA.getID()) { case Intrinsic::experimental_vector_histogram_add: if (!ST->hasSVE2()) @@ -3018,6 +3026,14 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost( ArrayRef<const Value *> Args, const Instruction *CxtI) { + // The code-generator is currently not able to handle scalable vectors + // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast<ScalableVectorType>(Ty)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + // TODO: Handle more cost kinds. if (CostKind != TTI::TCK_RecipThroughput) return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, @@ -3792,6 +3808,14 @@ InstructionCost AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast<ScalableVectorType>(Ty)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty); if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16()) @@ -3836,6 +3860,14 @@ InstructionCost AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, std::optional<FastMathFlags> FMF, TTI::TargetCostKind CostKind) { + // The code-generator is currently not able to handle scalable vectors + // of <vscale x 1 x eltty> yet, so return an invalid cost to avoid selecting + // it. This change will be removed when code-generation for these types is + // sufficiently reliable. + if (auto *VTy = dyn_cast<ScalableVectorType>(ValTy)) + if (VTy->getElementCount() == ElementCount::getScalable(1)) + return InstructionCost::getInvalid(); + if (TTI::requiresOrderedReduction(FMF)) { if (auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) { InstructionCost BaseCost = diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll index 18a1c31c03f748..770d3087b07522 100644 --- a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll @@ -8,6 +8,7 @@ define void @fadd() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fadd <vscale x 4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fadd <vscale x 8 x half> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fadd <vscale x 16 x half> undef, undef +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1F32 = fadd <vscale x 1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fadd <vscale x 2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fadd <vscale x 4 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fadd <vscale x 8 x float> undef, undef @@ -19,6 +20,7 @@ define void @fadd() { %V8F16 = fadd <vscale x 8 x half> undef, undef %V16F16 = fadd <vscale x 16 x half> undef, undef + %V1F32 = fadd <vscale x 1 x float> undef, undef %V2F32 = fadd <vscale x 2 x float> undef, undef %V4F32 = fadd <vscale x 4 x float> undef, undef %V8F32 = fadd <vscale x 8 x float> undef, undef @@ -34,6 +36,7 @@ define void @fsub() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F16 = fsub <vscale x 4 x half> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F16 = fsub <vscale x 8 x half> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F16 = fsub <vscale x 16 x half> undef, undef +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1F32 = fsub <vscale x 1 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fsub <vscale x 2 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fsub <vscale x 4 x float> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fsub <vscale x 8 x float> undef, undef @@ -45,6 +48,7 @@ define void @fsub() { %V8F16 = fsub <vscale x 8 x half> undef, undef %V16F16 = fsub <vscale x 16 x half> undef, undef + %V1F32 = fsub <vscale x 1 x float> undef, undef %V2F32 = fsub <vscale x 2 x float> undef, undef %V4F32 = fsub <vscale x 4 x float> undef, undef %V8F32 = fsub <vscale x 8 x float> undef, undef diff --git a/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll b/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll index e1a9ee114d2613..98d5bd5bd13f43 100644 --- a/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll +++ b/llvm/test/Analysis/CostModel/AArch64/cttz_elts.ll @@ -3,6 +3,7 @@ define void @foo_no_vscale_range() { ; CHECK-LABEL: 'foo_no_vscale_range' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %res.i64.nxv1i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv1i1(<vscale x 1 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 true) @@ -45,6 +46,7 @@ define void @foo_no_vscale_range() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res.i32.v32i1.nzip = call i32 @llvm.experimental.cttz.elts.i32.v32i1(<32 x i1> undef, i1 false) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + %res.i64.nxv1i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv1i1(<vscale x 1 x i1> undef, i1 true) %res.i64.nxv2i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1(<vscale x 2 x i1> undef, i1 true) %res.i64.nxv4i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> undef, i1 true) %res.i64.nxv8i1.zip = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> undef, i1 true) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll b/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll index f4dfea4cce349d..46450e68f40e23 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-arith.ll @@ -43,6 +43,7 @@ define void @scalable_mul() #0 { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv8i16 = mul <vscale x 8 x i16> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv4i32 = mul <vscale x 4 x i32> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mul_nxv2i64 = mul <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv1i64 = mul <vscale x 1 x i64> undef, undef ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; entry: @@ -50,6 +51,26 @@ entry: %mul_nxv8i16 = mul <vscale x 8 x i16> undef, undef %mul_nxv4i32 = mul <vscale x 4 x i32> undef, undef %mul_nxv2i64 = mul <vscale x 2 x i64> undef, undef + %mul_nxv1i64 = mul <vscale x 1 x i64> undef, undef + + ret void +} + +define void @scalable_add() #0 { +; CHECK-LABEL: 'scalable_add' +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv16i8 = add <vscale x 16 x i8> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv8i16 = add <vscale x 8 x i16> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv4i32 = add <vscale x 4 x i32> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %add_nxv2i64 = add <vscale x 2 x i64> undef, undef +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %add_nxv1i64 = add <vscale x 1 x i64> undef, undef +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void +; +entry: + %add_nxv16i8 = add <vscale x 16 x i8> undef, undef + %add_nxv8i16 = add <vscale x 8 x i16> undef, undef + %add_nxv4i32 = add <vscale x 4 x i32> undef, undef + %add_nxv2i64 = add <vscale x 2 x i64> undef, undef + %add_nxv1i64 = add <vscale x 1 x i64> undef, undef ret void } diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index 1993023c91e261..a3e260d211c431 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -116,82 +116,118 @@ declare <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float>, i64 define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale x 4 x float> %v2, <vscale x 4 x double> %v3) { ; CHECK-LABEL: 'reductions' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2) ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; TYPE_BASED_ONLY-LABEL: 'reductions' +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3) +; TYPE_BASED_ONLY-NEXT: Cost Model: Invalid cost for instruction: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef) %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0) %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1) + %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef) %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0) %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1) + %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef) %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0) %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1) + %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> undef) %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0) %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1) + %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> undef) %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0) %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1) + %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef) %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0) %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1) + %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> undef) %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0) %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1) + %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> undef) %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0) %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1) + %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef) %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0) %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1) + %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> undef) %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> %v2) %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> %v3) + %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef) %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2) %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3) + %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef) %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2) %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll index 7a801db1f35faa..777f4a2c4a64ed 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll @@ -14,6 +14,7 @@ define void @umin() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <vscale x 4 x i16> @llvm.umin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <vscale x 8 x i16> @llvm.umin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <vscale x 16 x i16> @llvm.umin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1i32 = call <vscale x 1 x i32> @llvm.umin.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <vscale x 4 x i32> @llvm.umin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <vscale x 8 x i32> @llvm.umin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef) @@ -30,6 +31,7 @@ define void @umin() { %V4i16 = call <vscale x 4 x i16> @llvm.umin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef) %V8i16 = call <vscale x 8 x i16> @llvm.umin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef) %V16i16 = call <vscale x 16 x i16> @llvm.umin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef) + %V1i32 = call <vscale x 1 x i32> @llvm.umin.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef) %V2i32 = call <vscale x 2 x i32> @llvm.umin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef) %V4i32 = call <vscale x 4 x i32> @llvm.umin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef) %V8i32 = call <vscale x 8 x i32> @llvm.umin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef) @@ -49,6 +51,7 @@ define void @umax() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <vscale x 4 x i16> @llvm.umax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <vscale x 8 x i16> @llvm.umax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <vscale x 16 x i16> @llvm.umax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1i32 = call <vscale x 1 x i32> @llvm.umax.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <vscale x 2 x i32> @llvm.umax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <vscale x 8 x i32> @llvm.umax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef) @@ -65,6 +68,7 @@ define void @umax() { %V4i16 = call <vscale x 4 x i16> @llvm.umax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef) %V8i16 = call <vscale x 8 x i16> @llvm.umax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef) %V16i16 = call <vscale x 16 x i16> @llvm.umax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef) + %V1i32 = call <vscale x 1 x i32> @llvm.umax.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef) %V2i32 = call <vscale x 2 x i32> @llvm.umax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef) %V4i32 = call <vscale x 4 x i32> @llvm.umax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef) %V8i32 = call <vscale x 8 x i32> @llvm.umax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef) @@ -84,6 +88,7 @@ define void @smin() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <vscale x 4 x i16> @llvm.smin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <vscale x 8 x i16> @llvm.smin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <vscale x 16 x i16> @llvm.smin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1i32 = call <vscale x 1 x i32> @llvm.smin.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <vscale x 2 x i32> @llvm.smin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <vscale x 4 x i32> @llvm.smin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <vscale x 8 x i32> @llvm.smin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef) @@ -100,6 +105,7 @@ define void @smin() { %V4i16 = call <vscale x 4 x i16> @llvm.smin.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef) %V8i16 = call <vscale x 8 x i16> @llvm.smin.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef) %V16i16 = call <vscale x 16 x i16> @llvm.smin.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef) + %V1i32 = call <vscale x 1 x i32> @llvm.smin.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef) %V2i32 = call <vscale x 2 x i32> @llvm.smin.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef) %V4i32 = call <vscale x 4 x i32> @llvm.smin.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef) %V8i32 = call <vscale x 8 x i32> @llvm.smin.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef) @@ -119,6 +125,7 @@ define void @smax() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i16 = call <vscale x 4 x i16> @llvm.smax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8i16 = call <vscale x 8 x i16> @llvm.smax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16i16 = call <vscale x 16 x i16> @llvm.smax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef) +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1i32 = call <vscale x 1 x i32> @llvm.smax.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2i32 = call <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4i32 = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8i32 = call <vscale x 8 x i32> @llvm.smax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef) @@ -135,6 +142,7 @@ define void @smax() { %V4i16 = call <vscale x 4 x i16> @llvm.smax.nxv4i16(<vscale x 4 x i16> undef, <vscale x 4 x i16> undef) %V8i16 = call <vscale x 8 x i16> @llvm.smax.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef) %V16i16 = call <vscale x 16 x i16> @llvm.smax.nxv16i16(<vscale x 16 x i16> undef, <vscale x 16 x i16> undef) + %V1i32 = call <vscale x 1 x i32> @llvm.smax.nxv1i32(<vscale x 1 x i32> undef, <vscale x 1 x i32> undef) %V2i32 = call <vscale x 2 x i32> @llvm.smax.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> undef) %V4i32 = call <vscale x 4 x i32> @llvm.smax.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef) %V8i32 = call <vscale x 8 x i32> @llvm.smax.nxv8i32(<vscale x 8 x i32> undef, <vscale x 8 x i32> undef) @@ -145,6 +153,7 @@ define void @smax() { define void @minnum() { ; CHECK-LABEL: 'minnum' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1f32 = call <vscale x 1 x float> @llvm.minnum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f32 = call <vscale x 2 x float> @llvm.minnum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f32 = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <vscale x 8 x float> @llvm.minnum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef) @@ -156,6 +165,7 @@ define void @minnum() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <vscale x 16 x half> @llvm.minnum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + %V1f32 = call <vscale x 1 x float> @llvm.minnum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef) %V2f32 = call <vscale x 2 x float> @llvm.minnum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef) %V4f32 = call <vscale x 4 x float> @llvm.minnum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef) %V8f32 = call <vscale x 8 x float> @llvm.minnum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef) @@ -170,6 +180,7 @@ define void @minnum() { define void @maxnum() { ; CHECK-LABEL: 'maxnum' +; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V1f32 = call <vscale x 1 x float> @llvm.maxnum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2f32 = call <vscale x 2 x float> @llvm.maxnum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f32 = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8f32 = call <vscale x 8 x float> @llvm.maxnum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef) @@ -181,6 +192,7 @@ define void @maxnum() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16f16 = call <vscale x 16 x half> @llvm.maxnum.nxv16f16(<vscale x 16 x half> undef, <vscale x 16 x half> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; + %V1f32 = call <vscale x 1 x float> @llvm.maxnum.nxv1f32(<vscale x 1 x float> undef, <vscale x 1 x float> undef) %V2f32 = call <vscale x 2 x float> @llvm.maxnum.nxv2f32(<vscale x 2 x float> undef, <vscale x 2 x float> undef) %V4f32 = call <vscale x 4 x float> @llvm.maxnum.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> undef) %V8f32 = call <vscale x 8 x float> @llvm.maxnum.nxv8f32(<vscale x 8 x float> undef, <vscale x 8 x float> undef) _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits