Author: Monk Chiang Date: 2020-12-26T21:42:30+08:00 New Revision: 622ea9cf74bce89560139ef50fc2f393572f676c
URL: https://github.com/llvm/llvm-project/commit/622ea9cf74bce89560139ef50fc2f393572f676c DIFF: https://github.com/llvm/llvm-project/commit/622ea9cf74bce89560139ef50fc2f393572f676c.diff LOG: [RISCV] Define vector widening reduction intrinsic. Define vwredsumu/vwredsum/vfwredosum/vfwredsum We work with @rogfer01 from BSC to come out this patch. Authored-by: Roger Ferrer Ibanez <rofir...@gmail.com> Co-Authored-by: Zakk Chen <zakk.c...@sifive.com> Differential Revision: https://reviews.llvm.org/D93807 Added: llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll llvm/test/CodeGen/RISCV/rvv/vfwredsum-rv32.ll llvm/test/CodeGen/RISCV/rvv/vfwredsum-rv64.ll Modified: llvm/include/llvm/IR/IntrinsicsRISCV.td llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td Removed: ################################################################################ diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 5e222e7474d2..4660de58d25b 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -663,11 +663,17 @@ let TargetPrefix = "riscv" in { defm vredmaxu : RISCVReduction; defm vredmax : RISCVReduction; + defm vwredsumu : RISCVReduction; + defm vwredsum : RISCVReduction; + defm vfredosum : RISCVReduction; defm vfredsum : RISCVReduction; defm vfredmin : RISCVReduction; defm vfredmax : RISCVReduction; + defm vfwredsum : RISCVReduction; + defm vfwredosum : RISCVReduction; + def int_riscv_vmand: RISCVBinaryAAANoMask; def int_riscv_vmnand: RISCVBinaryAAANoMask; def int_riscv_vmandnot: RISCVBinaryAAANoMask; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index fd4fb7c3e219..e064b1e811f7 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1799,6 +1799,22 @@ multiclass VPatReductionV_VS<string intrinsic, string instruction, bit IsFloat = } } +multiclass VPatReductionW_VS<string intrinsic, string instruction, bit IsFloat = 0> { + foreach vti = !if(IsFloat, AllFloatVectors, AllIntegerVectors) in + { + defvar wtiSEW = !mul(vti.SEW, 2); + if !le(wtiSEW, 64) then { + defvar wtiM1 = !cast<VTypeInfo>(!if(IsFloat, "VF", "VI") # wtiSEW # "M1"); + defm : VPatTernary<intrinsic, instruction, "VS", + wtiM1.Vector, vti.Vector, + wtiM1.Vector, vti.Mask, + vti.SEW, vti.LMul, + wtiM1.RegClass, vti.RegClass, + wtiM1.RegClass>; + } + } +} + //===----------------------------------------------------------------------===// // Pseudo instructions and patterns. //===----------------------------------------------------------------------===// @@ -2138,6 +2154,12 @@ defm PseudoVREDMINU : VPseudoReductionV_VS; defm PseudoVREDMIN : VPseudoReductionV_VS; defm PseudoVREDMAXU : VPseudoReductionV_VS; defm PseudoVREDMAX : VPseudoReductionV_VS; + +//===----------------------------------------------------------------------===// +// 15.2. Vector Widening Integer Reduction Instructions +//===----------------------------------------------------------------------===// +defm PseudoVWREDSUMU : VPseudoReductionV_VS; +defm PseudoVWREDSUM : VPseudoReductionV_VS; } // Predicates = [HasStdExtV] let Predicates = [HasStdExtV, HasStdExtF] in { @@ -2148,6 +2170,12 @@ defm PseudoVFREDOSUM : VPseudoReductionV_VS; defm PseudoVFREDSUM : VPseudoReductionV_VS; defm PseudoVFREDMIN : VPseudoReductionV_VS; defm PseudoVFREDMAX : VPseudoReductionV_VS; + +//===----------------------------------------------------------------------===// +// 15.4. Vector Widening Floating-Point Reduction Instructions +//===----------------------------------------------------------------------===// +defm PseudoVFWREDSUM : VPseudoReductionV_VS; +defm PseudoVFWREDOSUM : VPseudoReductionV_VS; } // Predicates = [HasStdExtV, HasStdExtF] //===----------------------------------------------------------------------===// @@ -2630,6 +2658,12 @@ defm "" : VPatReductionV_VS<"int_riscv_vredminu", "PseudoVREDMINU">; defm "" : VPatReductionV_VS<"int_riscv_vredmin", "PseudoVREDMIN">; defm "" : VPatReductionV_VS<"int_riscv_vredmaxu", "PseudoVREDMAXU">; defm "" : VPatReductionV_VS<"int_riscv_vredmax", "PseudoVREDMAX">; + +//===----------------------------------------------------------------------===// +// 15.2. Vector Widening Integer Reduction Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatReductionW_VS<"int_riscv_vwredsumu", "PseudoVWREDSUMU">; +defm "" : VPatReductionW_VS<"int_riscv_vwredsum", "PseudoVWREDSUM">; } // Predicates = [HasStdExtV] let Predicates = [HasStdExtV, HasStdExtF] in { @@ -2640,6 +2674,12 @@ defm "" : VPatReductionV_VS<"int_riscv_vfredosum", "PseudoVFREDOSUM", /*IsFloat= defm "" : VPatReductionV_VS<"int_riscv_vfredsum", "PseudoVFREDSUM", /*IsFloat=*/1>; defm "" : VPatReductionV_VS<"int_riscv_vfredmin", "PseudoVFREDMIN", /*IsFloat=*/1>; defm "" : VPatReductionV_VS<"int_riscv_vfredmax", "PseudoVFREDMAX", /*IsFloat=*/1>; + +//===----------------------------------------------------------------------===// +// 15.4. Vector Widening Floating-Point Reduction Instructions +//===----------------------------------------------------------------------===// +defm "" : VPatReductionW_VS<"int_riscv_vfwredsum", "PseudoVFWREDSUM", /*IsFloat=*/1>; +defm "" : VPatReductionW_VS<"int_riscv_vfwredosum", "PseudoVFWREDOSUM", /*IsFloat=*/1>; } // Predicates = [HasStdExtV, HasStdExtF] //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll new file mode 100644 index 000000000000..91b86cf4058b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll @@ -0,0 +1,43 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv32f16( + <vscale x 2 x float>, + <vscale x 32 x half>, + <vscale x 2 x float>, + i32); + +define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv32f16( + <vscale x 2 x float> %0, + <vscale x 32 x half> %1, + <vscale x 2 x float> %2, + i32 %3) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16.nxv32i1( + <vscale x 2 x float>, + <vscale x 32 x half>, + <vscale x 2 x float>, + <vscale x 32 x i1>, + i32); + +define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16.nxv32i1( + <vscale x 2 x float> %0, + <vscale x 32 x half> %1, + <vscale x 2 x float> %2, + <vscale x 32 x i1> %3, + i32 %4) + + ret <vscale x 2 x float> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll new file mode 100644 index 000000000000..1a9b1cde8226 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll @@ -0,0 +1,85 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv32f16( + <vscale x 2 x float>, + <vscale x 32 x half>, + <vscale x 2 x float>, + i64); + +define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv32f16( + <vscale x 2 x float> %0, + <vscale x 32 x half> %1, + <vscale x 2 x float> %2, + i64 %3) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16.nxv32i1( + <vscale x 2 x float>, + <vscale x 32 x half>, + <vscale x 2 x float>, + <vscale x 32 x i1>, + i64); + +define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16.nxv32i1( + <vscale x 2 x float> %0, + <vscale x 32 x half> %1, + <vscale x 2 x float> %2, + <vscale x 32 x i1> %3, + i64 %4) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv16f32( + <vscale x 1 x double>, + <vscale x 16 x float>, + <vscale x 1 x double>, + i64); + +define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv16f32( + <vscale x 1 x double> %0, + <vscale x 16 x float> %1, + <vscale x 1 x double> %2, + i64 %3) + + ret <vscale x 1 x double> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv16f32.nxv16i1( + <vscale x 1 x double>, + <vscale x 16 x float>, + <vscale x 1 x double>, + <vscale x 16 x i1>, + i64); + +define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, <vscale x 16 x i1> %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfwredosum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv16f32.nxv16i1( + <vscale x 1 x double> %0, + <vscale x 16 x float> %1, + <vscale x 1 x double> %2, + <vscale x 16 x i1> %3, + i64 %4) + + ret <vscale x 1 x double> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredsum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredsum-rv32.ll new file mode 100644 index 000000000000..dcc16b1b07ea --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwredsum-rv32.ll @@ -0,0 +1,43 @@ +; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare <vscale x 2 x float> @llvm.riscv.vfwredsum.nxv2f32.nxv32f16( + <vscale x 2 x float>, + <vscale x 32 x half>, + <vscale x 2 x float>, + i32); + +define <vscale x 2 x float> @intrinsic_vfwredsum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfwredsum_vs_nxv2f32_nxv32f16_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call <vscale x 2 x float> @llvm.riscv.vfwredsum.nxv2f32.nxv32f16( + <vscale x 2 x float> %0, + <vscale x 32 x half> %1, + <vscale x 2 x float> %2, + i32 %3) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwredsum.mask.nxv2f32.nxv32f16.nxv32i1( + <vscale x 2 x float>, + <vscale x 32 x half>, + <vscale x 2 x float>, + <vscale x 32 x i1>, + i32); + +define <vscale x 2 x float> @intrinsic_vfwredsum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i32 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfwredsum_mask_vs_nxv2f32_nxv32f16_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call <vscale x 2 x float> @llvm.riscv.vfwredsum.mask.nxv2f32.nxv32f16.nxv32i1( + <vscale x 2 x float> %0, + <vscale x 32 x half> %1, + <vscale x 2 x float> %2, + <vscale x 32 x i1> %3, + i32 %4) + + ret <vscale x 2 x float> %a +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredsum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredsum-rv64.ll new file mode 100644 index 000000000000..ce2eb047c1b0 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfwredsum-rv64.ll @@ -0,0 +1,85 @@ +; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \ +; RUN: --riscv-no-aliases < %s | FileCheck %s +declare <vscale x 2 x float> @llvm.riscv.vfwredsum.nxv2f32.nxv32f16( + <vscale x 2 x float>, + <vscale x 32 x half>, + <vscale x 2 x float>, + i64); + +define <vscale x 2 x float> @intrinsic_vfwredsum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfwredsum_vs_nxv2f32_nxv32f16_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call <vscale x 2 x float> @llvm.riscv.vfwredsum.nxv2f32.nxv32f16( + <vscale x 2 x float> %0, + <vscale x 32 x half> %1, + <vscale x 2 x float> %2, + i64 %3) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 2 x float> @llvm.riscv.vfwredsum.mask.nxv2f32.nxv32f16.nxv32i1( + <vscale x 2 x float>, + <vscale x 32 x half>, + <vscale x 2 x float>, + <vscale x 32 x i1>, + i64); + +define <vscale x 2 x float> @intrinsic_vfwredsum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfwredsum_mask_vs_nxv2f32_nxv32f16_nxv2f32 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e16,m8,ta,mu +; CHECK: vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call <vscale x 2 x float> @llvm.riscv.vfwredsum.mask.nxv2f32.nxv32f16.nxv32i1( + <vscale x 2 x float> %0, + <vscale x 32 x half> %1, + <vscale x 2 x float> %2, + <vscale x 32 x i1> %3, + i64 %4) + + ret <vscale x 2 x float> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vfwredsum.nxv1f64.nxv16f32( + <vscale x 1 x double>, + <vscale x 16 x float>, + <vscale x 1 x double>, + i64); + +define <vscale x 1 x double> @intrinsic_vfwredsum_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfwredsum_vs_nxv1f64_nxv16f32_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} + %a = call <vscale x 1 x double> @llvm.riscv.vfwredsum.nxv1f64.nxv16f32( + <vscale x 1 x double> %0, + <vscale x 16 x float> %1, + <vscale x 1 x double> %2, + i64 %3) + + ret <vscale x 1 x double> %a +} + +declare <vscale x 1 x double> @llvm.riscv.vfwredsum.mask.nxv1f64.nxv16f32.nxv16i1( + <vscale x 1 x double>, + <vscale x 16 x float>, + <vscale x 1 x double>, + <vscale x 16 x i1>, + i64); + +define <vscale x 1 x double> @intrinsic_vfwredsum_mask_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, <vscale x 16 x i1> %3, i64 %4) nounwind { +entry: +; CHECK-LABEL: intrinsic_vfwredsum_mask_vs_nxv1f64_nxv16f32_nxv1f64 +; CHECK: vsetvli {{.*}}, {{a[0-9]+}}, e32,m8,ta,mu +; CHECK: vfwredsum.vs {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, v0.t + %a = call <vscale x 1 x double> @llvm.riscv.vfwredsum.mask.nxv1f64.nxv16f32.nxv16i1( + <vscale x 1 x double> %0, + <vscale x 16 x float> %1, + <vscale x 1 x double> %2, + <vscale x 16 x i1> %3, + i64 %4) + + ret <vscale x 1 x double> %a +} _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits