llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Jonathan Thackray (jthackray) <details> <summary>Changes</summary> Add support for the following SME 8 bit floating-point dot-product intrinsics: * svdot_single_za16_mf8_vg1x2_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm); * svdot_single_za16_mf8_vg1x4_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm); * svdot_single_za32_mf8_vg1x2_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm); * svdot_single_za32_mf8_vg1x4_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm); * svdot_za16_mf8_vg1x2_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm); * svdot_za16_mf8_vg1x4_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm); * svdot_za32_mf8_vg1x2_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm); * svdot_za32_mf8_vg1x4_fpm(uint32_t slice, svmfloat8x2_t f8x2, svmfloat8_t f8, fpm_t fpm); --- Patch is 38.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/119845.diff 7 Files Affected: - (modified) clang/include/clang/Basic/arm_sme.td (+12) - (modified) clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c (+167-7) - (modified) clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fp8_fdot.c (+16) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+36) - (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+8-9) - (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+64) - (modified) llvm/test/CodeGen/AArch64/sme2-intrinsics-fp8-fdot.ll (+112) ``````````diff diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 0fae70866cd55e..e7625d8d3e0b8a 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -748,11 +748,23 @@ let SMETargetGuard = "sme2" in { let SMETargetGuard = "sme-f8f32" in { def SVDOT_LANE_FP8_ZA32_VG1x2 : Inst<"svdot_lane_za32[_mf8]_vg1x2_fpm", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_FP8_ZA32_VG1x4 : Inst<"svdot_lane_za32[_mf8]_vg1x4_fpm", "vm4di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za32_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>; + + def SVDOT_SINGLE_FP8_ZA32_VG1x2 : Inst<"svdot[_single]_za32[_mf8]_vg1x2_fpm", "vm2d>", "m", MergeNone, "aarch64_sme_fp8_fdot_single_za32_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>; + def SVDOT_SINGLE_FP8_ZA32_VG1x4 : Inst<"svdot[_single]_za32[_mf8]_vg1x4_fpm", "vm4d>", "m", MergeNone, "aarch64_sme_fp8_fdot_single_za32_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>; + + def SVDOT_MULTI_FP8_ZA32_VG1x2 : Inst<"svdot_za32[_mf8]_vg1x2_fpm", "vm22>", "m", MergeNone, "aarch64_sme_fp8_fdot_multi_za32_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>; + def SVDOT_MULTI_FP8_ZA32_VG1x4 : Inst<"svdot_za32[_mf8]_vg1x4_fpm", "vm44>", "m", MergeNone, "aarch64_sme_fp8_fdot_multi_za32_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>; } let SMETargetGuard = "sme-f8f16" in { def SVDOT_LANE_FP8_ZA16_VG1x2 : Inst<"svdot_lane_za16[_mf8]_vg1x2_fpm", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; def SVDOT_LANE_FP8_ZA16_VG1x4 : Inst<"svdot_lane_za16[_mf8]_vg1x4_fpm", "vm4di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + + def SVDOT_SINGLE_FP8_ZA16_VG1x2 : Inst<"svdot[_single]_za16[_mf8]_vg1x2_fpm", "vm2d>", "m", MergeNone, "aarch64_sme_fp8_fdot_single_za16_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>; + def SVDOT_SINGLE_FP8_ZA16_VG1x4 : Inst<"svdot[_single]_za16[_mf8]_vg1x4_fpm", "vm4d>", "m", MergeNone, "aarch64_sme_fp8_fdot_single_za16_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>; + + def SVDOT_MULTI_FP8_ZA16_VG1x2 : Inst<"svdot_za16[_mf8]_vg1x2_fpm", "vm22>", "m", MergeNone, "aarch64_sme_fp8_fdot_multi_za16_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>; + def SVDOT_MULTI_FP8_ZA16_VG1x4 : Inst<"svdot_za16[_mf8]_vg1x4_fpm", "vm44>", "m", MergeNone, "aarch64_sme_fp8_fdot_multi_za16_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], []>; } //////////////////////////////////////////////////////////////////////////////// diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c index 74d18c32d5b3ab..a151d162e01085 100644 --- a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c +++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fdot.c @@ -1,18 +1,18 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 // REQUIRES: aarch64-registered-target -#include <arm_sme.h> // RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s // RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK // RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + #include <arm_sme.h> #ifdef SVE_OVERLOADED_FORMS -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 #else -#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 #endif // CHECK-LABEL: define dso_local void @test_svdot_lane_za32_f8_vg1x2( @@ -32,7 +32,7 @@ void test_svdot_lane_za32_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { - SVE_ACLE_FUNC(svdot_lane_za32,_mf8,_vg1x2_fpm)(slice, zn, zm, 3, fpmr); + SVE_ACLE_FUNC(svdot_lane_za32,_mf8,_vg1x2_fpm,,)(slice, zn, zm, 3, fpmr); } // CHECK-LABEL: define dso_local void @test_svdot_lane_za32_f8_vg1x4( @@ -52,7 +52,7 @@ void test_svdot_lane_za32_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn, void test_svdot_lane_za32_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { - SVE_ACLE_FUNC(svdot_lane_za32,_mf8,_vg1x4_fpm)(slice, zn, zm, 3, fpmr); + SVE_ACLE_FUNC(svdot_lane_za32,_mf8,_vg1x4_fpm,,)(slice, zn, zm, 3, fpmr); } // CHECK-LABEL: define dso_local void @test_svdot_lane_za16_f8_vg1x2( @@ -72,7 +72,7 @@ void test_svdot_lane_za32_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn, void test_svdot_lane_za16_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { - SVE_ACLE_FUNC(svdot_lane_za16,_mf8,_vg1x2_fpm)(slice, zn, zm, 3, fpmr); + SVE_ACLE_FUNC(svdot_lane_za16,_mf8,_vg1x2_fpm,,)(slice, zn, zm, 3, fpmr); } // CHECK-LABEL: define dso_local void @test_svdot_lane_za16_f8_vg1x4( @@ -92,5 +92,165 @@ void test_svdot_lane_za16_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn, void test_svdot_lane_za16_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn, svmfloat8_t zm, fpm_t fpmr) __arm_streaming __arm_inout("za") { - SVE_ACLE_FUNC(svdot_lane_za16,_mf8,_vg1x4_fpm)(slice, zn, zm, 3, fpmr); + SVE_ACLE_FUNC(svdot_lane_za16,_mf8,_vg1x4_fpm,,)(slice, zn, zm, 3, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svdot_single_za32_f8_vg1x2( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za32.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z31test_svdot_single_za32_f8_vg1x2j13svmfloat8x2_tu13__SVMfloat8_tm( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za32.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: ret void +// +void test_svdot_single_za32_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn, + svmfloat8_t zm, fpm_t fpmr) + __arm_streaming __arm_inout("za") { + SVE_ACLE_FUNC(svdot,_single,_za32,_mf8,_vg1x2_fpm)(slice, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svdot_single_za32_f8_vg1x4( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z31test_svdot_single_za32_f8_vg1x4j13svmfloat8x4_tu13__SVMfloat8_tm( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: ret void +// +void test_svdot_single_za32_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn, + svmfloat8_t zm, fpm_t fpmr) + __arm_streaming __arm_inout("za") { + SVE_ACLE_FUNC(svdot,_single,_za32,_mf8,_vg1x4_fpm)(slice, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svdot_multi_za32_f8_vg1x2( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.multi.za32.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z30test_svdot_multi_za32_f8_vg1x2j13svmfloat8x2_tS_m( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.multi.za32.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svdot_multi_za32_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn, + svmfloat8x2_t zm, fpm_t fpmr) + __arm_streaming __arm_inout("za") { + SVE_ACLE_FUNC(svdot,,_za32,_mf8,_vg1x2_fpm) (slice, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svdot_multi_za32_f8_vg1x4( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.multi.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE2]], <vscale x 16 x i8> [[ZM_COERCE3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z30test_svdot_multi_za32_f8_vg1x4j13svmfloat8x4_tS_m( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.multi.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE2]], <vscale x 16 x i8> [[ZM_COERCE3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svdot_multi_za32_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn, + svmfloat8x4_t zm, fpm_t fpmr) + __arm_streaming __arm_inout("za") { + SVE_ACLE_FUNC(svdot,,_za32,_mf8,_vg1x4_fpm)(slice, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svdot_single_za16_f8_vg1x2( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z31test_svdot_single_za16_f8_vg1x2j13svmfloat8x2_tu13__SVMfloat8_tm( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: ret void +// +void test_svdot_single_za16_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn, + svmfloat8_t zm, fpm_t fpmr) + __arm_streaming __arm_inout("za") { + SVE_ACLE_FUNC(svdot,_single,_za16,_mf8,_vg1x2_fpm)(slice, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svdot_single_za16_f8_vg1x4( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za16.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z31test_svdot_single_za16_f8_vg1x4j13svmfloat8x4_tu13__SVMfloat8_tm( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.single.za16.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM]]) +// CPP-CHECK-NEXT: ret void +// +void test_svdot_single_za16_f8_vg1x4(uint32_t slice, svmfloat8x4_t zn, + svmfloat8_t zm, fpm_t fpmr) + __arm_streaming __arm_inout("za") { + SVE_ACLE_FUNC(svdot,_single,_za16,_mf8,_vg1x4_fpm)(slice, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svdot_multi_za16_f8_vg1x2( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.multi.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z30test_svdot_multi_za16_f8_vg1x2j13svmfloat8x2_tS_m( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CPP-CHECK-NEXT: [[ENTRY:.*:]] +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.multi.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svdot_multi_za16_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn, + svmfloat8x2_t zm, fpm_t fpmr) + __arm_streaming __arm_inout("za") { + SVE_ACLE_FUNC(svdot,,_za16,_mf8,_vg1x2_fpm) (slice, zn, zm, fpmr); +} + +// CHECK-LABEL: define dso_local void @test_svdot_multi_za16_f8_vg1x4( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.*]], <vscale x 16 x i8> [[ZM_COERCE3:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fdot.multi.za16.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZN_COERCE2]], <vscale x 16 x i8> [[ZN_COERCE3]], <vscale x 16 x i8> [[ZM_COERCE0]], <vscale x 16 x i8> [[ZM_COERCE1]], <vscale x 16 x i8> [[ZM_COERCE2]], <vscale x 16 x i8> [[ZM_COERCE3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: define dso_local void @_Z30test_svdot_multi_za16_f8_vg1x4j13svmfloat8x4_tS_m( +// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZN_COERCE2:%.*]], <vscale x 16 x i8> [[ZN_COERCE3:%.*]], <vscale x 16 x i8> [[ZM_COERCE0:%.*]], <vscale x 16 x i8> [[ZM_COERCE1:%.*]], <vscale x 16 x i8> [[ZM_COERCE2:%.... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/119845 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits