llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-clang Author: Virginia Cangelosi (virginia-cangelosi) <details> <summary>Changes</summary> Implement all {BF/F/S/U/SU/US}MOP4{A/S} instructions in clang and llvm following the acle in https://github.com/ARM-software/acle/pull/381/files --- Patch is 59.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/127797.diff 6 Files Affected: - (modified) clang/include/clang/Basic/arm_sme.td (+54) - (added) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c (+465) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+12-1) - (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+34-34) - (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+69-18) - (added) llvm/test/CodeGen/AArch64/sme2-intrinsics-mop4a_1x1.ll (+247) ``````````diff diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 288a8c04c217f..2af29ad6699b6 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -376,6 +376,19 @@ let SMETargetGuard = "sme2" in { // Outer product and accumulate/subtract // +multiclass MOP4SingleSingle<string name, string n, string t, string i, string wide> { + def NAME : Inst<"svmop4" # name # "_1x1_" # n # "[_{d}_{d}]", "vidd", t, MergeNone, i # wide # "_1x1", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; +} + +multiclass MOP4MixedSignsSingleSingle<string n_suffix1, string n_suffix2, string za, string t> { + def NAME : SInst<"sv" # n_suffix2 # "_1x1_" # za # "[_{2}_{3}]", + "vid" # !cond(!eq(n_suffix1, "su") : "u", true: "x"), + !cond(!eq(n_suffix1, "su") : "", true: "U") # t, + MergeNone, "aarch64_sme_" # n_suffix2 # "_wide_1x1", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_3>]>; +} + let SMETargetGuard = "sme2" in { def SVSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "s", MergeNone, "aarch64_sme_smopa_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; def SVUSMOPA : Inst<"svmopa_za32[_{d}]_m", "viPPdd", "Us", MergeNone, "aarch64_sme_umopa_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; @@ -387,6 +400,29 @@ let SMETargetGuard = "sme2" in { def SVBMOPS : Inst<"svbmops_za32[_{d}]_m", "viPPdd", "iUi", MergeNone, "aarch64_sme_bmops_za32", [IsInOutZA, IsStreaming], [ImmCheck<0, ImmCheck0_3>]>; + defm SVSMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "s", "aarch64_sme_mop4a", "_wide">; + defm SVSMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "s", "aarch64_sme_mop4s", "_wide">; + defm SVSMOP4A_MZZ_BToS : MOP4SingleSingle<"a", "za32", "c", "aarch64_sme_mop4a", "_wide">; + defm SVSMOP4S_MZZ_BToS : MOP4SingleSingle<"s", "za32", "c", "aarch64_sme_mop4s", "_wide">; + + defm SVUMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "Us", "aarch64_sme_mop4a", "_wide">; + defm SVUMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "Us", "aarch64_sme_mop4s", "_wide">; + defm SVUMOP4A_MZZ_BToS : MOP4SingleSingle<"a", "za32", "Uc", "aarch64_sme_mop4a", "_wide">; + defm SVUMOP4S_MZZ_BToS : MOP4SingleSingle<"s", "za32", "Uc", "aarch64_sme_mop4s", "_wide">; + + defm SVFMOP4A_MZZ_HtoS : MOP4SingleSingle<"a", "za32", "h", "aarch64_sme_mop4a", "_wide">; + defm SVFMOP4S_MZZ_HtoS : MOP4SingleSingle<"s", "za32", "h", "aarch64_sme_mop4s", "_wide">; + defm SVFMOP4A_MZZ_S : MOP4SingleSingle<"a", "za32", "f", "aarch64_sme_mop4a", "">; + defm SVFMOP4S_MZZ_S : MOP4SingleSingle<"s", "za32", "f", "aarch64_sme_mop4s", "">; + + defm SVBMOP4A_MZZ_S : MOP4SingleSingle<"a", "za32", "b", "aarch64_sme_mop4a", "_wide">; + defm SVBMOP4S_MZZ_S : MOP4SingleSingle<"s", "za32", "b", "aarch64_sme_mop4s", "_wide">; + + defm SVSUMOP4A_MZZ_BtoS : MOP4MixedSignsSingleSingle<"su", "mop4a", "za32", "c">; + defm SVUSMOP4A_MZZ_BtoS : MOP4MixedSignsSingleSingle<"us", "mop4a", "za32", "c">; + defm SVSUMOP4S_MZZ_BtoS : MOP4MixedSignsSingleSingle<"su", "mop4s", "za32", "c">; + defm SVUSMOP4S_MZZ_BtoS : MOP4MixedSignsSingleSingle<"us", "mop4s", "za32", "c">; + // VERTICAL DOT-PRODUCT def SVVDOT_LANE_ZA32_VG1x2_S : Inst<"svvdot_lane_za32[_{d}]_vg1x2", "vm2di", "s", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; def SVVDOT_LANE_ZA32_VG1x4_S : Inst<"svvdot_lane_za32[_{d}]_vg1x4", "vm4di", "c", MergeNone, "aarch64_sme_svdot_lane_za32_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_3>]>; @@ -437,6 +473,15 @@ let SMETargetGuard = "sme2" in { } let SMETargetGuard = "sme2,sme-i16i64" in { + defm SVSMOP4A_MZZ_HtoD : MOP4SingleSingle<"a", "za64", "s", "aarch64_sme_mop4a", "_wide">; + defm SVSMOP4S_MZZ_HtoD : MOP4SingleSingle<"s", "za64", "s", "aarch64_sme_mop4s", "_wide">; + defm SVUMOP4A_MZZ_HtoD : MOP4SingleSingle<"a", "za64", "Us", "aarch64_sme_mop4a", "_wide">; + defm SVUMOP4S_MZZ_HtoD : MOP4SingleSingle<"s", "za64", "Us", "aarch64_sme_mop4s", "_wide">; + defm SVSUMOP4A_MZZ_HtoD : MOP4MixedSignsSingleSingle<"su", "mop4a", "za64", "s">; + defm SVUSMOP4A_MZZ_HtoD : MOP4MixedSignsSingleSingle<"us", "mop4a", "za64", "s">; + defm SVSUMOP4S_MZZ_HtoD : MOP4MixedSignsSingleSingle<"su", "mop4s", "za64", "s">; + defm SVUSMOP4S_MZZ_HtoD : MOP4MixedSignsSingleSingle<"us", "mop4s", "za64", "s">; + def SVVDOT_LANE_ZA64_VG1x4_S : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "s", MergeNone, "aarch64_sme_svdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; def SVVDOT_LANE_ZA64_VG1x4_U : Inst<"svvdot_lane_za64[_{d}]_vg1x4", "vm4di", "Us", MergeNone, "aarch64_sme_uvdot_lane_za64_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; @@ -473,6 +518,9 @@ let SMETargetGuard = "sme2" in { } let SMETargetGuard = "sme2,sme-f64f64" in { + defm SVFMOP4A_MZZ_D : MOP4SingleSingle<"a", "za64", "d", "aarch64_sme_mop4a", "">; + defm SVFMOP4S_MZZ_D : MOP4SingleSingle<"s", "za64", "d", "aarch64_sme_mop4s", "">; + def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vm44", "d", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; def SVMLS_MULTI_VG1x2_F64 : Inst<"svmls_za64[_{d}]_vg1x2", "vm22", "d", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>; @@ -490,6 +538,9 @@ let SMETargetGuard = "sme2,sme-f64f64" in { } let SMETargetGuard = "sme-f16f16" in { + defm SVFMOP4A_MZZ_H : MOP4SingleSingle<"a", "za16", "h", "aarch64_sme_mop4a", "">; + defm SVFMOP4S_MZZ_H : MOP4SingleSingle<"s", "za16", "h", "aarch64_sme_mop4s", "">; + def SVMLA_MULTI_VG1x2_F16 : Inst<"svmla_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; def SVMLA_MULTI_VG1x4_F16 : Inst<"svmla_za16[_f16]_vg1x4", "vm44", "h", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; def SVMLS_MULTI_VG1x2_F16 : Inst<"svmls_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>; @@ -507,6 +558,9 @@ let SMETargetGuard = "sme-f16f16" in { } let SMETargetGuard = "sme-b16b16" in { + defm SVBMOP4A_MZZ_H : MOP4SingleSingle<"a", "za16", "bf", "aarch64_sme_mop4a", "">; + defm SVBMOP4S_MZZ_H : MOP4SingleSingle<"s", "za16", "bf", "aarch64_sme_mop4s", "">; + def SVMLA_MULTI_VG1x2_BF16 : Inst<"svmla_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; def SVMLA_MULTI_VG1x4_BF16 : Inst<"svmla_za16[_bf16]_vg1x4", "vm44", "b", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; def SVMLS_MULTI_VG1x2_BF16 : Inst<"svmls_za16[_bf16]_vg1x2", "vm22", "b", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>; diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c new file mode 100644 index 0000000000000..37238053009fd --- /dev/null +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_mop4_1x1.c @@ -0,0 +1,465 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64 -target-feature +bf16 -target-feature +sme-mop4 -target-feature +sme-f16f16 -target-feature +sme-i16i64 -target-feature +sme-b16b16 -target-feature +sme-f64f64 -target-feature +sme -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + + +#include <arm_sme.h> + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: @test_svmop4a_1x1_za32_s8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_s8_s8u10__SVInt8_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_s8_s8,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_s8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_s8_s8u10__SVInt8_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_s8_s8(svint8_t zn, svint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_s8_s8,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za32_u8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4a_1x1_za32_u8_u8u11__SVUint8_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_u8_u8,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_u8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svmop4s_1x1_za32_u8_u8u11__SVUint8_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv16i8(i32 3, <vscale x 16 x i8> [[ZN:%.*]], <vscale x 16 x i8> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_u8_u8(svuint8_t zn, svuint8_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_u8_u8,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za32_s16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_s16_s16u11__SVInt16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_s16_s16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_s16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_s16_s16u11__SVInt16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_s16_s16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za32_u16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_u16_u16u12__SVUint16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_u16_u16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_u16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_u16_u16u12__SVUint16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_u16_u16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za32_f16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8f16(i32 3, <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za32_f16_f16u13__SVFloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8f16(i32 3, <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_f16_f16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_f16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 3, <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za32_f16_f16u13__SVFloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8f16(i32 3, <vscale x 8 x half> [[ZN:%.*]], <vscale x 8 x half> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_f16_f16(svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_f16_f16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za32_bf16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8bf16(i32 3, <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svmop4a_1x1_za32_bf16_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8bf16(i32 3, <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za32_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_bf16_bf16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za32_bf16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8bf16(i32 3, <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z31test_svmop4s_1x1_za32_bf16_bf16u14__SVBfloat16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8bf16(i32 3, <vscale x 8 x bfloat> [[ZN:%.*]], <vscale x 8 x bfloat> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za32_bf16_bf16(svbfloat16_t zn, svbfloat16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_bf16_bf16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za64_s16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_s16_s16u11__SVInt16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za32,_s16_s16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za64_s16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_s16_s16u11__SVInt16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4s_1x1_za64_s16_s16(svint16_t zn, svint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4s_1x1_za32,_s16_s16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4a_1x1_za64_u16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4a_1x1_za64_u16_u16u12__SVUint16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4a.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmop4a_1x1_za64_u16_u16(svuint16_t zn, svuint16_t zm) __arm_streaming __arm_inout("za") { + SME_ACLE_FUNC(svmop4a_1x1_za64,_u16_u16,)(3, zn, zm); +} + +// CHECK-LABEL: @test_svmop4s_1x1_za64_u16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svmop4s_1x1_za64_u16_u16u12__SVUint16_tS_( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mop4s.wide.1x1.nxv8i16(i32 3, <vscale x 8 x i16> [[ZN:%.*]], <vscale x 8 x i16> [[ZM:%.*]]) +// CPP-CHECK... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/127797 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits