https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/99042
>From aa74d04751558f3ab47d566c91fb8ad178df0dce Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Tue, 16 Jul 2024 13:37:34 +0100 Subject: [PATCH 1/3] [AArch64] Implement intrinsics for SVE FAMIN/FAMAX This patch implements the following intrinsics: * Floating-point absolute maximum (predicated) svfloat16_t svamax[_f16]_m(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svamax[_f16]_x(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svamax[_f16]_z(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svamax[_n_f16]_m(svbool_t, svfloat16_t, float16_t); svfloat16_t svamax[_n_f16]_x(svbool_t, svfloat16_t, float16_t); svfloat16_t svamax[_n_f16]_z(svbool_t, svfloat16_t, float16_t); * Floating-point absolute minimum (predicated) svfloat16_t svmin[_f16]_m(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svmin[_f16]_x(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svmin[_f16]_z(svbool_t, svfloat16_t, svfloat16_t); svfloat16_t svmin[_n_f16]_m(svbool_t, svfloat16_t, float16_t); svfloat16_t svmin[_n_f16]_x(svbool_t, svfloat16_t, float16_t); svfloat16_t svmin[_n_f16]_z(svbool_t, svfloat16_t, float16_t); All the intrinsics have also variants for `f32` and `f64`, and have the `__arm_streaming` attribute. (cf. https://github.com/ARM-software/acle/pull/324) --- clang/include/clang/Basic/arm_sve.td | 5 + .../acle_sve2_faminmax.c | 775 ++++++++++++++++++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 7 + .../Target/AArch64/AArch64ISelLowering.cpp | 8 + llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 + .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 14 +- .../AArch64/sve2-intrinsics-faminmax.ll | 266 ++++++ 7 files changed, 1075 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c create mode 100644 llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 078373823a3b6f..b40ce9b4d11b56 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2401,3 +2401,8 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; } + +let SVETargetGuard = "sve2,faminmax", SMETargetGuard = "sme2,faminmax" in { + defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", "aarch64_sve_famin_u">; + defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", "aarch64_sve_famax_u">; +} diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c new file mode 100644 index 00000000000000..3cf7d99d606f32 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c @@ -0,0 +1,775 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CPP +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CPP +// RUN: %clang_cc1 -x c++ -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CPP + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +// REQUIRES: aarch64-registered-target + +#ifdef __ARM_FEATURE_SME +#include "arm_sme.h" +#else +#include "arm_sve.h" +#endif + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +#ifdef __ARM_FEATURE_SME +#define STREAMING __arm_streaming +#else +#define STREAMING +#endif + +// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_f16_m( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famin_f16_mu10__SVBool_tu13__SVFloat16_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +svfloat16_t test_famin_f16_m(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _f16, _m)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_f16_x( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famin_f16_xu10__SVBool_tu13__SVFloat16_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +svfloat16_t test_famin_f16_x(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _f16, _x)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_f16_z( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famin_f16_zu10__SVBool_tu13__SVFloat16_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer +// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP2]] +// +svfloat16_t test_famin_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _f16, _z)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_n_f16_m( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famin_n_f16_mu10__SVBool_tu13__SVFloat16_tDh( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +svfloat16_t test_famin_n_f16_m(svbool_t pg, svfloat16_t a, float16_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _n_f16, _m)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_n_f16_x( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famin_n_f16_xu10__SVBool_tu13__SVFloat16_tDh( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +svfloat16_t test_famin_n_f16_x(svbool_t pg, svfloat16_t a, float16_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _n_f16, _x)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_n_f16_z( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famin_n_f16_zu10__SVBool_tu13__SVFloat16_tDh( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer +// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP2]] +// +svfloat16_t test_famin_n_f16_z(svbool_t pg, svfloat16_t a, float16_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _n_f16, _z)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_f32_m( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famin_f32_mu10__SVBool_tu13__SVFloat32_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +svfloat32_t test_famin_f32_m(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _f32, _m)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_f32_x( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famin_f32_xu10__SVBool_tu13__SVFloat32_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +svfloat32_t test_famin_f32_x(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _f32, _x)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_f32_z( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famin_f32_zu10__SVBool_tu13__SVFloat32_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer +// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP2]] +// +svfloat32_t test_famin_f32_z(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _f32, _z)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_n_f32_m( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famin_n_f32_mu10__SVBool_tu13__SVFloat32_tf( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +svfloat32_t test_famin_n_f32_m(svbool_t pg, svfloat32_t a, float32_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _n_f32, _m)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_n_f32_x( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famin_n_f32_xu10__SVBool_tu13__SVFloat32_tf( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +svfloat32_t test_famin_n_f32_x(svbool_t pg, svfloat32_t a, float32_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _n_f32, _x)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_n_f32_z( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famin_n_f32_zu10__SVBool_tu13__SVFloat32_tf( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer +// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP2]] +// +svfloat32_t test_famin_n_f32_z(svbool_t pg, svfloat32_t a, float32_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _n_f32, _z)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_f64_m( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famin_f64_mu10__SVBool_tu13__SVFloat64_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +svfloat64_t test_famin_f64_m(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _f64, _m)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_f64_x( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famin_f64_xu10__SVBool_tu13__SVFloat64_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +svfloat64_t test_famin_f64_x(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _f64, _x)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_f64_z( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famin_f64_zu10__SVBool_tu13__SVFloat64_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer +// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP2]] +// +svfloat64_t test_famin_f64_z(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _f64, _z)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_n_f64_m( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famin_n_f64_mu10__SVBool_tu13__SVFloat64_td( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +svfloat64_t test_famin_n_f64_m(svbool_t pg, svfloat64_t a, float64_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _n_f64, _m)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_n_f64_x( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famin_n_f64_xu10__SVBool_tu13__SVFloat64_td( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +svfloat64_t test_famin_n_f64_x(svbool_t pg, svfloat64_t a, float64_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _n_f64, _x)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_n_f64_z( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famin_n_f64_zu10__SVBool_tu13__SVFloat64_td( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer +// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP2]] +// +svfloat64_t test_famin_n_f64_z(svbool_t pg, svfloat64_t a, float64_t b) STREAMING { + return SVE_ACLE_FUNC(svamin, _n_f64, _z)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_f16_m( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famax_f16_mu10__SVBool_tu13__SVFloat16_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +svfloat16_t test_famax_f16_m(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _f16, _m)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_f16_x( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famax_f16_xu10__SVBool_tu13__SVFloat16_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +svfloat16_t test_famax_f16_x(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _f16, _x)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_f16_z( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famax_f16_zu10__SVBool_tu13__SVFloat16_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer +// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP2]] +// +svfloat16_t test_famax_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _f16, _z)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_n_f16_m( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famax_n_f16_mu10__SVBool_tu13__SVFloat16_tDh( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +svfloat16_t test_famax_n_f16_m(svbool_t pg, svfloat16_t a, float16_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _n_f16, _m)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_n_f16_x( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famax_n_f16_xu10__SVBool_tu13__SVFloat16_tDh( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]] +// +svfloat16_t test_famax_n_f16_x(svbool_t pg, svfloat16_t a, float16_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _n_f16, _x)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_n_f16_z( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famax_n_f16_zu10__SVBool_tu13__SVFloat16_tDh( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer +// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP2]] +// +svfloat16_t test_famax_n_f16_z(svbool_t pg, svfloat16_t a, float16_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _n_f16, _z)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_f32_m( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famax_f32_mu10__SVBool_tu13__SVFloat32_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +svfloat32_t test_famax_f32_m(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _f32, _m)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_f32_x( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famax_f32_xu10__SVBool_tu13__SVFloat32_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +svfloat32_t test_famax_f32_x(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _f32, _x)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_f32_z( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famax_f32_zu10__SVBool_tu13__SVFloat32_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer +// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP2]] +// +svfloat32_t test_famax_f32_z(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _f32, _z)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_n_f32_m( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famax_n_f32_mu10__SVBool_tu13__SVFloat32_tf( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +svfloat32_t test_famax_n_f32_m(svbool_t pg, svfloat32_t a, float32_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _n_f32, _m)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_n_f32_x( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famax_n_f32_xu10__SVBool_tu13__SVFloat32_tf( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]] +// +svfloat32_t test_famax_n_f32_x(svbool_t pg, svfloat32_t a, float32_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _n_f32, _x)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_n_f32_z( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famax_n_f32_zu10__SVBool_tu13__SVFloat32_tf( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer +// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP2]] +// +svfloat32_t test_famax_n_f32_z(svbool_t pg, svfloat32_t a, float32_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _n_f32, _z)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_f64_m( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famax_f64_mu10__SVBool_tu13__SVFloat64_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +svfloat64_t test_famax_f64_m(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _f64, _m)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_f64_x( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famax_f64_xu10__SVBool_tu13__SVFloat64_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +svfloat64_t test_famax_f64_x(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _f64, _x)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_f64_z( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famax_f64_zu10__SVBool_tu13__SVFloat64_tS0_( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer +// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]]) +// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP2]] +// +svfloat64_t test_famax_f64_z(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _f64, _z)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_n_f64_m( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famax_n_f64_mu10__SVBool_tu13__SVFloat64_td( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +svfloat64_t test_famax_n_f64_m(svbool_t pg, svfloat64_t a, float64_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _n_f64, _m)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_n_f64_x( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famax_n_f64_xu10__SVBool_tu13__SVFloat64_td( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]] +// +svfloat64_t test_famax_n_f64_x(svbool_t pg, svfloat64_t a, float64_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _n_f64, _x)(pg, a, b); +} + +// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_n_f64_z( +// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0 +// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer +// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]]) +// CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]] +// +// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famax_n_f64_zu10__SVBool_tu13__SVFloat64_td( +// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-CPP-NEXT: [[ENTRY:.*:]] +// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]]) +// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0 +// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer +// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer +// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]]) +// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP2]] +// +svfloat64_t test_famax_n_f64_z(svbool_t pg, svfloat64_t a, float64_t b) STREAMING { + return SVE_ACLE_FUNC(svamax, _n_f64, _z)(pg, a, b); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 3735bf5222fce3..ca4af6c2603ea6 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3730,3 +3730,10 @@ def int_aarch64_sve_pmov_to_vector_lane_zeroing : SVE2_Pred_1VectorArg_Intrinsic def int_aarch64_sme_mopa_nonwide : SME_OuterProduct_Intrinsic; def int_aarch64_sme_mops_nonwide : SME_OuterProduct_Intrinsic; +// SVE2/SME2 - Floating point absolute maximum and minimum + +def int_aarch64_sve_famax : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_famax_u : AdvSIMD_Pred2VectorArg_Intrinsic; + +def int_aarch64_sve_famin : AdvSIMD_Pred2VectorArg_Intrinsic; +def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 215f30128e7038..b1aa3c65745161 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2726,6 +2726,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FMINNMV_PRED) MAKE_CASE(AArch64ISD::FMUL_PRED) MAKE_CASE(AArch64ISD::FSUB_PRED) + MAKE_CASE(AArch64ISD::FAMAX_PRED) + MAKE_CASE(AArch64ISD::FAMIN_PRED) MAKE_CASE(AArch64ISD::RDSVL) MAKE_CASE(AArch64ISD::BIC) MAKE_CASE(AArch64ISD::CBZ) @@ -22056,6 +22058,12 @@ static SDValue performIntrinsicCombine(SDNode *N, AArch64CC::LAST_ACTIVE); case Intrinsic::aarch64_sve_whilelo: return tryCombineWhileLo(N, DCI, Subtarget); + case Intrinsic::aarch64_sve_famax_u: + return DAG.getNode(AArch64ISD::FAMAX_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); + case Intrinsic::aarch64_sve_famin_u: + return DAG.getNode(AArch64ISD::FAMIN_PRED, SDLoc(N), N->getValueType(0), + N->getOperand(1), N->getOperand(2), N->getOperand(3)); } return SDValue(); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 39d5df0de0eec7..cdeb988c3966e5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -135,6 +135,8 @@ enum NodeType : unsigned { UDIV_PRED, UMAX_PRED, UMIN_PRED, + FAMAX_PRED, + FAMIN_PRED, // Unpredicated vector instructions BIC, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 694b7fb2068a29..e733b4c02f7d47 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -218,6 +218,9 @@ def AArch64fsub_p_contract : PatFrag<(ops node:$op1, node:$op2, node:$op3), return N->getFlags().hasAllowContract(); }]>; +def AArch64famax_p : SDNode<"AArch64ISD::FAMAX_PRED", SDT_AArch64Arith>; +def AArch64famin_p : SDNode<"AArch64ISD::FAMIN_PRED", SDT_AArch64Arith>; + def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>, SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2> @@ -483,6 +486,8 @@ def AArch64fminnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fminnm, def AArch64fmaxnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmaxnm, AArch64fmaxnm_p>; def AArch64fmin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmin, AArch64fmin_p>; def AArch64fmax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmax, AArch64fmax_p>; +def AArch64famax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_famax, AArch64famax_p>; +def AArch64famin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_famin, AArch64famin_p>; def AArch64fadd : PatFrags<(ops node:$op1, node:$op2), [(fadd node:$op1, node:$op2), @@ -717,6 +722,11 @@ let Predicates = [HasSVEorSME] in { defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>; } // End HasSVEorSME +let Predicates = [HasSVE2orSME2, HasFAMINMAX] in { + defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64famax_p>; + defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64famin_p>; +} + let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in { defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>; defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>; @@ -4168,8 +4178,8 @@ defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>; let Predicates = [HasSVE2orSME2, HasFAMINMAX] in { // FP8 Arithmetic - Predicated Group -defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "", null_frag, DestructiveOther>; -defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "", null_frag, DestructiveOther>; +defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "FAMIN_ZPZZ", AArch64famin_m1, DestructiveBinaryComm>; +defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "FAMAX_ZPZZ", AArch64famax_m1, DestructiveBinaryComm>; } // End HasSVE2orSME2, HasFAMINMAX let Predicates = [HasSSVE_FP8FMA] in { diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll new file mode 100644 index 00000000000000..123756e88f6347 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll @@ -0,0 +1,266 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mattr=+sve2 < %s | FileCheck %s +; RUN: llc -mattr=+sme2 -force-streaming < %s | FileCheck %s + +target triple = "aarch64-linux" + +define <vscale x 8 x half> @famin_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { +; CHECK-LABEL: famin_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %r = call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) + ret <vscale x 8 x half> %r +} + +define <vscale x 4 x float> @famin_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { +; CHECK-LABEL: famin_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %r = call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) + ret <vscale x 4 x float> %r +} + +define <vscale x 2 x double> @famin_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { +; CHECK-LABEL: famin_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %r = call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) + ret <vscale x 2 x double> %r +} + +define <vscale x 8 x half> @famin_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { +; CHECK-LABEL: famin_u_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %r = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) + ret <vscale x 8 x half> %r +} + +define <vscale x 4 x float> @famin_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { +; CHECK-LABEL: famin_u_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %r = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) + ret <vscale x 4 x float> %r +} + +define <vscale x 2 x double> @famin_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { +; CHECK-LABEL: famin_u_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %r = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) + ret <vscale x 2 x double> %r +} + +define <vscale x 8 x half> @famax_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { +; CHECK-LABEL: famax_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %r = call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) + ret <vscale x 8 x half> %r +} + +define <vscale x 4 x float> @famax_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { +; CHECK-LABEL: famax_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %r = call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) + ret <vscale x 4 x float> %r +} + +define <vscale x 2 x double> @famax_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { +; CHECK-LABEL: famax_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %r = call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) + ret <vscale x 2 x double> %r +} + +define <vscale x 8 x half> @famax_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { +; CHECK-LABEL: famax_u_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %r = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) + ret <vscale x 8 x half> %r +} + +define <vscale x 4 x float> @famax_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { +; CHECK-LABEL: famax_u_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %r = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) + ret <vscale x 4 x float> %r +} + +define <vscale x 2 x double> @famax_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { +; CHECK-LABEL: famax_u_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %r = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) + ret <vscale x 2 x double> %r +} + +define <vscale x 8 x half> @select_famin_f16a(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { +; CHECK-LABEL: select_famin_f16a: +; CHECK: // %bb.0: +; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %m = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %a, <vscale x 8 x half> %b) + %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a + ret <vscale x 8 x half> %r +} + +define <vscale x 8 x half> @select_famin_f16b(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { +; CHECK-LABEL: select_famin_f16b: +; CHECK: // %bb.0: +; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %m = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %b, <vscale x 8 x half> %a) + %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a + ret <vscale x 8 x half> %r +} + +define <vscale x 4 x float> @select_famin_f32a(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { +; CHECK-LABEL: select_famin_f32a: +; CHECK: // %bb.0: +; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %m = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %a, <vscale x 4 x float> %b) + %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a + ret <vscale x 4 x float> %r +} + +define <vscale x 4 x float> @select_famin_f32b(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { +; CHECK-LABEL: select_famin_f32b: +; CHECK: // %bb.0: +; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %m = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %b, <vscale x 4 x float> %a) + %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a + ret <vscale x 4 x float> %r +} + +define <vscale x 2 x double> @select_famin_f64a(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { +; CHECK-LABEL: select_famin_f64a: +; CHECK: // %bb.0: +; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %m = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %a, <vscale x 2 x double> %b) + %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a + ret <vscale x 2 x double> %r +} + +define <vscale x 2 x double> @select_famin_f64b(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { +; CHECK-LABEL: select_famin_f64b: +; CHECK: // %bb.0: +; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %m = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %b, <vscale x 2 x double> %a) + %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a + ret <vscale x 2 x double> %r +} + + +define <vscale x 8 x half> @select_famax_f16a(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { +; CHECK-LABEL: select_famax_f16a: +; CHECK: // %bb.0: +; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %m = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %a, <vscale x 8 x half> %b) + %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a + ret <vscale x 8 x half> %r +} + +define <vscale x 8 x half> @select_famax_f16b(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { +; CHECK-LABEL: select_famax_f16b: +; CHECK: // %bb.0: +; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + %m = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %b, <vscale x 8 x half> %a) + %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a + ret <vscale x 8 x half> %r +} + +define <vscale x 4 x float> @select_famax_f32a(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { +; CHECK-LABEL: select_famax_f32a: +; CHECK: // %bb.0: +; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %m = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %a, <vscale x 4 x float> %b) + %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a + ret <vscale x 4 x float> %r +} + +define <vscale x 4 x float> @select_famax_f32b(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { +; CHECK-LABEL: select_famax_f32b: +; CHECK: // %bb.0: +; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) + %m = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %b, <vscale x 4 x float> %a) + %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a + ret <vscale x 4 x float> %r +} + +define <vscale x 2 x double> @select_famax_f64a(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { +; CHECK-LABEL: select_famax_f64a: +; CHECK: // %bb.0: +; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %m = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %a, <vscale x 2 x double> %b) + %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a + ret <vscale x 2 x double> %r +} + +define <vscale x 2 x double> @select_famax_f64b(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { +; CHECK-LABEL: select_famax_f64b: +; CHECK: // %bb.0: +; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) + %m = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %b, <vscale x 2 x double> %a) + %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a + ret <vscale x 2 x double> %r +} + +declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32) +declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32) +declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32) + +declare <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) +declare <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) + +declare <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) +declare <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) +declare <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) +declare <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) + +attributes #0 = { nounwind "target-features" = "+faminmax" } >From 7dcd5a7cb5558226f41acabc8d84b166dee74f64 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Thu, 29 Aug 2024 10:41:44 +0100 Subject: [PATCH 2/3] [fixup] Remove AArch64ISD::FA{MIN,MAX}_PRED --- .../Target/AArch64/AArch64ISelLowering.cpp | 8 - llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 - .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 13 +- .../AArch64/sve2-intrinsics-faminmax.ll | 151 ------------------ 4 files changed, 4 insertions(+), 170 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index b1aa3c65745161..215f30128e7038 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2726,8 +2726,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::FMINNMV_PRED) MAKE_CASE(AArch64ISD::FMUL_PRED) MAKE_CASE(AArch64ISD::FSUB_PRED) - MAKE_CASE(AArch64ISD::FAMAX_PRED) - MAKE_CASE(AArch64ISD::FAMIN_PRED) MAKE_CASE(AArch64ISD::RDSVL) MAKE_CASE(AArch64ISD::BIC) MAKE_CASE(AArch64ISD::CBZ) @@ -22058,12 +22056,6 @@ static SDValue performIntrinsicCombine(SDNode *N, AArch64CC::LAST_ACTIVE); case Intrinsic::aarch64_sve_whilelo: return tryCombineWhileLo(N, DCI, Subtarget); - case Intrinsic::aarch64_sve_famax_u: - return DAG.getNode(AArch64ISD::FAMAX_PRED, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); - case Intrinsic::aarch64_sve_famin_u: - return DAG.getNode(AArch64ISD::FAMIN_PRED, SDLoc(N), N->getValueType(0), - N->getOperand(1), N->getOperand(2), N->getOperand(3)); } return SDValue(); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index cdeb988c3966e5..39d5df0de0eec7 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -135,8 +135,6 @@ enum NodeType : unsigned { UDIV_PRED, UMAX_PRED, UMIN_PRED, - FAMAX_PRED, - FAMIN_PRED, // Unpredicated vector instructions BIC, diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index e733b4c02f7d47..0573bd717caa5b 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -218,9 +218,6 @@ def AArch64fsub_p_contract : PatFrag<(ops node:$op1, node:$op2, node:$op3), return N->getFlags().hasAllowContract(); }]>; -def AArch64famax_p : SDNode<"AArch64ISD::FAMAX_PRED", SDT_AArch64Arith>; -def AArch64famin_p : SDNode<"AArch64ISD::FAMIN_PRED", SDT_AArch64Arith>; - def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>, SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2> @@ -486,8 +483,6 @@ def AArch64fminnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fminnm, def AArch64fmaxnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmaxnm, AArch64fmaxnm_p>; def AArch64fmin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmin, AArch64fmin_p>; def AArch64fmax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmax, AArch64fmax_p>; -def AArch64famax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_famax, AArch64famax_p>; -def AArch64famin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_famin, AArch64famin_p>; def AArch64fadd : PatFrags<(ops node:$op1, node:$op2), [(fadd node:$op1, node:$op2), @@ -723,8 +718,8 @@ let Predicates = [HasSVEorSME] in { } // End HasSVEorSME let Predicates = [HasSVE2orSME2, HasFAMINMAX] in { - defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64famax_p>; - defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64famin_p>; + defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famax_u>; + defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famin_u>; } let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in { @@ -4178,8 +4173,8 @@ defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>; let Predicates = [HasSVE2orSME2, HasFAMINMAX] in { // FP8 Arithmetic - Predicated Group -defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "FAMIN_ZPZZ", AArch64famin_m1, DestructiveBinaryComm>; -defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "FAMAX_ZPZZ", AArch64famax_m1, DestructiveBinaryComm>; +defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "FAMIN_ZPZZ", int_aarch64_sve_famin, DestructiveBinaryComm>; +defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "FAMAX_ZPZZ", int_aarch64_sve_famax, DestructiveBinaryComm>; } // End HasSVE2orSME2, HasFAMINMAX let Predicates = [HasSSVE_FP8FMA] in { diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll index 123756e88f6347..0d247328a9475d 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll @@ -112,155 +112,4 @@ define <vscale x 2 x double> @famax_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x d ret <vscale x 2 x double> %r } -define <vscale x 8 x half> @select_famin_f16a(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { -; CHECK-LABEL: select_famin_f16a: -; CHECK: // %bb.0: -; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret - %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %m = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %a, <vscale x 8 x half> %b) - %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a - ret <vscale x 8 x half> %r -} - -define <vscale x 8 x half> @select_famin_f16b(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { -; CHECK-LABEL: select_famin_f16b: -; CHECK: // %bb.0: -; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret - %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %m = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %b, <vscale x 8 x half> %a) - %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a - ret <vscale x 8 x half> %r -} - -define <vscale x 4 x float> @select_famin_f32a(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { -; CHECK-LABEL: select_famin_f32a: -; CHECK: // %bb.0: -; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %m = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %a, <vscale x 4 x float> %b) - %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a - ret <vscale x 4 x float> %r -} - -define <vscale x 4 x float> @select_famin_f32b(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { -; CHECK-LABEL: select_famin_f32b: -; CHECK: // %bb.0: -; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %m = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %b, <vscale x 4 x float> %a) - %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a - ret <vscale x 4 x float> %r -} - -define <vscale x 2 x double> @select_famin_f64a(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { -; CHECK-LABEL: select_famin_f64a: -; CHECK: // %bb.0: -; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %m = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %a, <vscale x 2 x double> %b) - %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a - ret <vscale x 2 x double> %r -} - -define <vscale x 2 x double> @select_famin_f64b(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { -; CHECK-LABEL: select_famin_f64b: -; CHECK: // %bb.0: -; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %m = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %b, <vscale x 2 x double> %a) - %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a - ret <vscale x 2 x double> %r -} - - -define <vscale x 8 x half> @select_famax_f16a(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { -; CHECK-LABEL: select_famax_f16a: -; CHECK: // %bb.0: -; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret - %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %m = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %a, <vscale x 8 x half> %b) - %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a - ret <vscale x 8 x half> %r -} - -define <vscale x 8 x half> @select_famax_f16b(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 { -; CHECK-LABEL: select_famax_f16b: -; CHECK: // %bb.0: -; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: ret - %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) - %m = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %b, <vscale x 8 x half> %a) - %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a - ret <vscale x 8 x half> %r -} - -define <vscale x 4 x float> @select_famax_f32a(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { -; CHECK-LABEL: select_famax_f32a: -; CHECK: // %bb.0: -; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %m = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %a, <vscale x 4 x float> %b) - %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a - ret <vscale x 4 x float> %r -} - -define <vscale x 4 x float> @select_famax_f32b(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 { -; CHECK-LABEL: select_famax_f32b: -; CHECK: // %bb.0: -; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: ret - %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) - %m = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %b, <vscale x 4 x float> %a) - %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a - ret <vscale x 4 x float> %r -} - -define <vscale x 2 x double> @select_famax_f64a(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { -; CHECK-LABEL: select_famax_f64a: -; CHECK: // %bb.0: -; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %m = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %a, <vscale x 2 x double> %b) - %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a - ret <vscale x 2 x double> %r -} - -define <vscale x 2 x double> @select_famax_f64b(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 { -; CHECK-LABEL: select_famax_f64b: -; CHECK: // %bb.0: -; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: ret - %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) - %m = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %b, <vscale x 2 x double> %a) - %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a - ret <vscale x 2 x double> %r -} - -declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32) -declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32) -declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32) - -declare <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) -declare <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) -declare <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) -declare <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) -declare <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) -declare <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) - -declare <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) -declare <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) -declare <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) -declare <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>) -declare <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>) -declare <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>) - attributes #0 = { nounwind "target-features" = "+faminmax" } >From 261e75c84200467514473f64b213ba31b761dec9 Mon Sep 17 00:00:00 2001 From: Momchil Velikov <momchil.veli...@arm.com> Date: Thu, 29 Aug 2024 17:29:42 +0100 Subject: [PATCH 3/3] [fixup] Stuff (NFC) --- llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 9 +++------ .../test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll | 12 ++++++------ 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 0573bd717caa5b..ee2948c9b9f106 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -717,11 +717,6 @@ let Predicates = [HasSVEorSME] in { defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>; } // End HasSVEorSME -let Predicates = [HasSVE2orSME2, HasFAMINMAX] in { - defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famax_u>; - defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famin_u>; -} - let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in { defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>; defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>; @@ -4172,9 +4167,11 @@ defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>; } // End HasSVE2orSME2, HasFP8 let Predicates = [HasSVE2orSME2, HasFAMINMAX] in { -// FP8 Arithmetic - Predicated Group defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "FAMIN_ZPZZ", int_aarch64_sve_famin, DestructiveBinaryComm>; defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "FAMAX_ZPZZ", int_aarch64_sve_famax, DestructiveBinaryComm>; + +defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famax_u>; +defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famin_u>; } // End HasSVE2orSME2, HasFAMINMAX let Predicates = [HasSSVE_FP8FMA] in { diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll index 0d247328a9475d..7d16f8383d9682 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll @@ -36,7 +36,7 @@ define <vscale x 8 x half> @famin_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x hal ; CHECK: // %bb.0: ; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret - %r = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) + %r = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %b, <vscale x 8 x half> %a) ret <vscale x 8 x half> %r } @@ -45,7 +45,7 @@ define <vscale x 4 x float> @famin_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x fl ; CHECK: // %bb.0: ; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret - %r = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) + %r = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %b, <vscale x 4 x float> %a) ret <vscale x 4 x float> %r } @@ -54,7 +54,7 @@ define <vscale x 2 x double> @famin_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x d ; CHECK: // %bb.0: ; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret - %r = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) + %r = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %b, <vscale x 2 x double> %a) ret <vscale x 2 x double> %r } @@ -90,7 +90,7 @@ define <vscale x 8 x half> @famax_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x hal ; CHECK: // %bb.0: ; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret - %r = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) + %r = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %b, <vscale x 8 x half> %a) ret <vscale x 8 x half> %r } @@ -99,7 +99,7 @@ define <vscale x 4 x float> @famax_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x fl ; CHECK: // %bb.0: ; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret - %r = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) + %r = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %b, <vscale x 4 x float> %a) ret <vscale x 4 x float> %r } @@ -108,7 +108,7 @@ define <vscale x 2 x double> @famax_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x d ; CHECK: // %bb.0: ; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret - %r = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) + %r = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %b, <vscale x 2 x double> %a) ret <vscale x 2 x double> %r } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits