https://github.com/SpencerAbson created https://github.com/llvm/llvm-project/pull/118025
This patch implements the following intrinsics: Convert to packed 8-bit floating-point format. ``` c // Variants are also available for: _mf8[_bf16_x2] and _mf8[_f32_x4] svmfloat8_t svcvt_mf8[_f16_x2]_fpm(svfloat16x2_t zn, fpm_t fpm) __arm_streaming; ``` Convert to interleaved 8-bit floating-point format. ``` c svmfloat8_t svcvtn_mf8[_f32_x4]_fpm(svfloat32x4_t zn, fpm_t fpm) __arm_streaming; ``` In accordance with https://github.com/ARM-software/acle/pull/323. Co-authored-by: Marin Lukac marian.lu...@arm.com Co-authored-by: Caroline Concatto caroline.conca...@arm.com >From 762f05d275a4b368f5f107cc50c6b5635c4b328a Mon Sep 17 00:00:00 2001 From: Spencer Abson <spencer.ab...@arm.com> Date: Sun, 24 Nov 2024 18:38:36 +0000 Subject: [PATCH 1/2] [AArch64] Implement intrinsics for FP8 FCVT/FCVTN/BFCVT --- clang/include/clang/Basic/arm_sve.td | 6 ++ .../fp8-intrinsics/acle_sme2_fp8_cvt.c | 64 +++++++++++++++++++ .../acle_sme2_fp8_cvt.c | 12 +++- llvm/include/llvm/IR/IntrinsicsAArch64.td | 16 +++++ .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 8 +-- llvm/lib/Target/AArch64/SMEInstrFormats.td | 15 ++++- .../AArch64/sme2-fp8-intrinsics-cvt.ll | 52 +++++++++++++++ 7 files changed, 165 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index b36e592042da0b..1d991095d5bf74 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2432,6 +2432,12 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in { // Convert from FP8 to deinterleaved half-precision/BFloat16 multi-vector def SVF1CVTL : Inst<"svcvtl1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl1_x2", [IsStreaming, SetsFPMR], []>; def SVF2CVTL : Inst<"svcvtl2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl2_x2", [IsStreaming, SetsFPMR], []>; + + // Convert from single/half/bfloat multivector to FP8 + def SVFCVT_X2 : Inst<"svcvt_mf8[_{d}_x2]_fpm", "~2>", "bh", MergeNone, "aarch64_sve_fp8_cvt_x2", [IsStreaming, SetsFPMR], []>; + def SVFCVT_X4 : Inst<"svcvt_mf8[_{d}_x4]_fpm", "~4>", "f", MergeNone, "aarch64_sve_fp8_cvt_x4", [IsOverloadNone, IsStreaming, SetsFPMR], []>; + // interleaved + def SVFCVTN_X4 : Inst<"svcvtn_mf8[_{d}_x4]_fpm", "~4>", "f", MergeNone, "aarch64_sve_fp8_cvtn_x4", [IsOverloadNone, IsStreaming, SetsFPMR], []>; } let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c index 5ba76671ff5d5b..16d3b551577d68 100644 --- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c @@ -16,6 +16,70 @@ #define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 #endif +// CHECK-LABEL: @test_cvt_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x2.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_cvt_f16_x213svfloat16x2_tm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x2.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_cvt_f16_x2(svfloat16x2_t zn, fpm_t fpmr) __arm_streaming { + return SVE_ACLE_FUNC(svcvt_mf8,_f16_x2,_fpm)(zn, fpmr); +} + +// CHECK-LABEL: @test_cvt_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x4(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZN_COERCE2:%.*]], <vscale x 4 x float> [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_cvt_f32_x413svfloat32x4_tm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x4(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZN_COERCE2:%.*]], <vscale x 4 x float> [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_cvt_f32_x4(svfloat32x4_t zn, fpm_t fpmr) __arm_streaming { + return SVE_ACLE_FUNC(svcvt_mf8,_f32_x4,_fpm)(zn, fpmr); +} + +// CHECK-LABEL: @test_cvtn_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.x4(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZN_COERCE2:%.*]], <vscale x 4 x float> [[ZN_COERCE3:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_cvtn_f32_x413svfloat32x4_tm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.x4(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]], <vscale x 4 x float> [[ZN_COERCE2:%.*]], <vscale x 4 x float> [[ZN_COERCE3:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_cvtn_f32_x4(svfloat32x4_t zn, fpm_t fpmr) __arm_streaming { + return SVE_ACLE_FUNC(svcvtn_mf8,_f32_x4,_fpm)(zn, fpmr); +} + +// CHECK-LABEL: @test_cvt_bf16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x2.nxv8bf16(<vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]]) +// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_cvt_bf16_x214svbfloat16x2_tm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x2.nxv8bf16(<vscale x 8 x bfloat> [[ZN_COERCE0:%.*]], <vscale x 8 x bfloat> [[ZN_COERCE1:%.*]]) +// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] +// +svmfloat8_t test_cvt_bf16_x2(svbfloat16x2_t zn, fpm_t fpmr) __arm_streaming { + return SVE_ACLE_FUNC(svcvt_mf8,_bf16_x2,_fpm)(zn, fpmr); +} + // CHECK-LABEL: @test_cvtl1_f16_x2( // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]]) diff --git a/clang/test/Sema/aarch64-fp8-intrinsics/acle_sme2_fp8_cvt.c b/clang/test/Sema/aarch64-fp8-intrinsics/acle_sme2_fp8_cvt.c index 09a80c9dff03ea..80fe8a0143dc84 100644 --- a/clang/test/Sema/aarch64-fp8-intrinsics/acle_sme2_fp8_cvt.c +++ b/clang/test/Sema/aarch64-fp8-intrinsics/acle_sme2_fp8_cvt.c @@ -5,7 +5,8 @@ #include <arm_sve.h> -void test_features_sme2_fp8(svmfloat8_t zn, fpm_t fpmr) __arm_streaming { +void test_features_sme2_fp8(svmfloat8_t zn, svfloat16x2_t znf16, svbfloat16x2_t znbf16, + svfloat32x4_t znf32, fpm_t fpmr) __arm_streaming { // expected-error@+1 {{'svcvtl1_f16_mf8_x2_fpm' needs target feature sme,sme2,fp8}} svcvtl1_f16_mf8_x2_fpm(zn, fpmr); // expected-error@+1 {{'svcvtl2_f16_mf8_x2_fpm' needs target feature sme,sme2,fp8}} @@ -14,4 +15,13 @@ void test_features_sme2_fp8(svmfloat8_t zn, fpm_t fpmr) __arm_streaming { svcvtl1_bf16_mf8_x2_fpm(zn, fpmr); // expected-error@+1 {{'svcvtl2_bf16_mf8_x2_fpm' needs target feature sme,sme2,fp8}} svcvtl2_bf16_mf8_x2_fpm(zn, fpmr); + + // expected-error@+1 {{'svcvt_mf8_f16_x2_fpm' needs target feature sme,sme2,fp8}} + svcvt_mf8_f16_x2_fpm(znf16, fpmr); + // expected-error@+1 {{'svcvt_mf8_bf16_x2_fpm' needs target feature sme,sme2,fp8}} + svcvt_mf8_bf16_x2_fpm(znbf16, fpmr); + // expected-error@+1 {{'svcvt_mf8_f32_x4_fpm' needs target feature sme,sme2,fp8}} + svcvt_mf8_f32_x4_fpm(znf32, fpmr); + // expected-error@+1 {{'svcvtn_mf8_f32_x4_fpm' needs target feature sme,sme2,fp8}} + svcvtn_mf8_f32_x4_fpm(znf32, fpmr); } \ No newline at end of file diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index a91616b9556828..992ce495b25ae2 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3817,11 +3817,27 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_nxv16i8_ty], [IntrReadMem, IntrInaccessibleMemOnly]>; + + class SME2_FP8_CVT_Single_X4_Intrinsic + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], + [llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty], + [IntrReadMem, IntrInaccessibleMemOnly]>; // // CVT from FP8 to deinterleaved half-precision/BFloat16 multi-vector // def int_aarch64_sve_fp8_cvtl1_x2 : SME2_FP8_CVT_X2_Single_Intrinsic; def int_aarch64_sve_fp8_cvtl2_x2 : SME2_FP8_CVT_X2_Single_Intrinsic; + + // + // CVT to FP8 from half-precision/BFloat16/single-precision multi-vector + // + def int_aarch64_sve_fp8_cvt_x2 + : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], + [llvm_anyvector_ty, LLVMMatchType<0>], + [IntrReadMem, IntrInaccessibleMemOnly]>; + + def int_aarch64_sve_fp8_cvt_x4 : SME2_FP8_CVT_Single_X4_Intrinsic; + def int_aarch64_sve_fp8_cvtn_x4 : SME2_FP8_CVT_Single_X4_Intrinsic; } // SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2 diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td index 37ac915d1d8808..f254b7d28d0600 100644 --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -954,10 +954,10 @@ defm F2CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"f2cvtl", 0b10, 0b1>; defm BF2CVT_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvt", 0b11, 0b0>; defm BF2CVTL_2ZZ_BtoH : sme2p1_fp8_cvt_vector_vg2_single<"bf2cvtl", 0b11, 0b1>; -defm FCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"fcvt", 0b0>; -defm BFCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"bfcvt", 0b1>; -defm FCVT_Z4Z_StoB : sme2_fp8_cvt_vg4_single<"fcvt", 0b0>; -defm FCVTN_Z4Z_StoB : sme2_fp8_cvt_vg4_single<"fcvtn", 0b1>; +defm FCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"fcvt", 0b0, nxv8f16, int_aarch64_sve_fp8_cvt_x2>; +defm BFCVT_Z2Z_HtoB : sme2_fp8_cvt_vg2_single<"bfcvt", 0b1, nxv8bf16, int_aarch64_sve_fp8_cvt_x2>; +defm FCVT_Z4Z_StoB : sme2_fp8_cvt_vg4_single<"fcvt", 0b0, int_aarch64_sve_fp8_cvt_x4>; +defm FCVTN_Z4Z_StoB : sme2_fp8_cvt_vg4_single<"fcvtn", 0b1, int_aarch64_sve_fp8_cvtn_x4>; defm FSCALE_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fscale", 0b0011000>; defm FSCALE_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fscale", 0b0011000>; diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 776472e72af05a..56155216d3b902 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -2376,10 +2376,14 @@ multiclass sme2_cvt_vg2_single<string mnemonic, bits<5> op, ValueType out_vt, } // SME2 multi-vec FP8 down convert two registers -multiclass sme2_fp8_cvt_vg2_single<string mnemonic, bit op> { +multiclass sme2_fp8_cvt_vg2_single<string mnemonic, bit op, ValueType in_vt, SDPatternOperator intrinsic> { def NAME : sme2_cvt_vg2_single<mnemonic, {op, 0b1000}, ZPR8, ZZ_h_mul_r>{ + let mayLoad = 1; + let mayStore = 0; let Uses = [FPMR, FPCR]; } + def : Pat<(nxv16i8 (intrinsic in_vt:$Zn1, in_vt:$Zn2)), + (!cast<Instruction>(NAME) (REG_SEQUENCE ZPR2Mul2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1))>; } class sme2_cvt_unpk_vector_vg2<bits<2>sz, bits<3> op, bit u, RegisterOperand first_ty, @@ -2445,8 +2449,13 @@ multiclass sme2_int_cvt_vg4_single<string mnemonic, bits<3> op, SDPatternOperato } //SME2 multi-vec FP8 down convert four registers -multiclass sme2_fp8_cvt_vg4_single<string mnemonic, bit N> { - def _NAME : sme2_cvt_vg4_single<0b0, {0b00, N}, 0b0100, ZPR8, ZZZZ_s_mul_r, mnemonic>; +multiclass sme2_fp8_cvt_vg4_single<string mnemonic, bit N, SDPatternOperator intrinsic> { + def NAME : sme2_cvt_vg4_single<0b0, {0b00, N}, 0b0100, ZPR8, ZZZZ_s_mul_r, mnemonic> { + let mayLoad = 1; + let mayStore = 0; + let Uses = [FPMR, FPCR]; + } + def : SME2_Cvt_VG4_Pat<NAME, intrinsic, nxv16i8, nxv4f32>; } class sme2_unpk_vector_vg4<bits<2>sz, bit u, RegisterOperand first_ty, diff --git a/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll b/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll index 076a3ad34eac3c..576588d7f36c19 100644 --- a/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll +++ b/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll @@ -1,6 +1,58 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+fp8 -verify-machineinstrs -force-streaming < %s | FileCheck %s +; FCVT / FCVTN / BFCVT + +define <vscale x 16 x i8> @fcvt_x2(<vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1) { +; CHECK-LABEL: fcvt_x2: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fcvt z0.b, { z0.h, z1.h } +; CHECK-NEXT: ret + %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x2.nxv8f16(<vscale x 8 x half> %zn0, <vscale x 8 x half> %zn1) + ret <vscale x 16 x i8> %res +} + +define <vscale x 16 x i8> @fcvt_x4(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) { +; CHECK-LABEL: fcvt_x4: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fcvt z0.b, { z0.s - z3.s } +; CHECK-NEXT: ret + %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x4(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, + <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) + ret <vscale x 16 x i8> %res +} + +define <vscale x 16 x i8> @fcvtn(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) { +; CHECK-LABEL: fcvtn: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fcvtn z0.b, { z0.s - z3.s } +; CHECK-NEXT: ret + %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvtn.x4(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, + <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) + ret <vscale x 16 x i8> %res +} + +define <vscale x 16 x i8> @bfcvt(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) { +; CHECK-LABEL: bfcvt: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bfcvt z0.b, { z0.h, z1.h } +; CHECK-NEXT: ret + %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x2.nxv8f16(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) + ret <vscale x 16 x i8> %res +} + ; F1CVTL / F2CVTL define { <vscale x 8 x half>, <vscale x 8 x half> } @f1cvtl(<vscale x 16 x i8> %zm) { >From 643602e315fd77c3f9c8c3cabcde596342793444 Mon Sep 17 00:00:00 2001 From: Spencer Abson <spencer.ab...@arm.com> Date: Thu, 28 Nov 2024 17:13:28 +0000 Subject: [PATCH 2/2] [NFC] Fixup llc test --- llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll b/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll index 576588d7f36c19..26394a81acbd42 100644 --- a/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll +++ b/llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll @@ -49,7 +49,7 @@ define <vscale x 16 x i8> @bfcvt(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloa ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfcvt z0.b, { z0.h, z1.h } ; CHECK-NEXT: ret - %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x2.nxv8f16(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) + %res = call <vscale x 16 x i8> @llvm.aarch64.sve.fp8.cvt.x2.nxv8bf16(<vscale x 8 x bfloat> %zn0, <vscale x 8 x bfloat> %zn1) ret <vscale x 16 x i8> %res } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits