llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-aarch64 Author: Jonathan Thackray (jthackray) <details> <summary>Changes</summary> Add support for the following new AArch64 Neon intrinsics: ``` float16x8_t vmmlaq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t, fpm_t); float32x4_t vmmlaq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t, fpm_t); ``` --- Full diff: https://github.com/llvm/llvm-project/pull/165431.diff 4 Files Affected: - (modified) clang/include/clang/Basic/arm_neon.td (+8) - (modified) clang/lib/CodeGen/TargetBuiltins/ARM.cpp (+8) - (modified) clang/test/CodeGen/AArch64/v8.6a-neon-intrinsics.c (+26-1) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+6) ``````````diff diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index ef196103035e8..8e2174c880ed8 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -1896,6 +1896,14 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "f def VFMLSL_LANEQ_HIGH : SOpInst<"vfmlsl_laneq_high", "(F>)(F>)F(FQ)I", "hQh", OP_FMLSL_LN_Hi>; } +let ArchGuard = "defined(__aarch64__)", TargetGuard = "f8f16mm,neon" in { + def VMMLA_F16_MF8 : VInst<"vmmla_f16_mf8_fpm", "(>F)(>F)..V", "Qm">; +} + +let ArchGuard = "defined(__aarch64__)", TargetGuard = "f8f32mm,neon" in { + def VMMLA_F32_MF8 : VInst<"vmmla_f32_mf8_fpm", "(>>F)(>>F)..V", "Qm">; +} + let TargetGuard = "i8mm,neon" in { def VMMLA : SInst<"vmmla", "..(<<)(<<)", "QUiQi">; def VUSMMLA : SInst<"vusmmla", "..(<<U)(<<)", "Qi">; diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index 60f9b86333670..4075f56e6a032 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -7793,6 +7793,14 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_vluti4q_laneq_x2; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vluti4q_laneq_x2"); } + case NEON::BI__builtin_neon_vmmlaq_f16_mf8_fpm: + return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla, + {llvm::FixedVectorType::get(HalfTy, 8)}, Ops, E, + "fmmla"); + case NEON::BI__builtin_neon_vmmlaq_f32_mf8_fpm: + return EmitFP8NeonCall(Intrinsic::aarch64_neon_fmmla, + {llvm::FixedVectorType::get(FloatTy, 4)}, Ops, E, + "fmmla"); case NEON::BI__builtin_neon_vcvt1_low_bf16_mf8_fpm: ExtractLow = true; [[fallthrough]]; diff --git a/clang/test/CodeGen/AArch64/v8.6a-neon-intrinsics.c b/clang/test/CodeGen/AArch64/v8.6a-neon-intrinsics.c index 6fffcb6c6b391..0d592af59f85c 100644 --- a/clang/test/CodeGen/AArch64/v8.6a-neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/v8.6a-neon-intrinsics.c @@ -1,5 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.6a -target-feature +i8mm \ +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.6a -target-feature +i8mm -target-feature +f8f16mm -target-feature +f8f32mm -target-feature +fp8 \ // RUN: -disable-O0-optnone -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg,sroa \ // RUN: | FileCheck %s @@ -32,6 +32,31 @@ uint32x4_t test_vmmlaq_u32(uint32x4_t r, uint8x16_t a, uint8x16_t b) { return vmmlaq_u32(r, a, b); } +// CHECK-LABEL: define dso_local <8 x half> @test_vmmlaq_f16_mf8( +// CHECK-SAME: <8 x half> noundef [[P0:%.*]], <16 x i8> [[P1:%.*]], <16 x i8> [[P2:%.*]], i64 noundef [[P3:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[P0]] to <8 x i16> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8> +// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[P3]]) +// CHECK-NEXT: [[FMMLA_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> +// CHECK-NEXT: [[FMMLA1_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmmla.v8f16(<8 x half> [[FMMLA_I]], <16 x i8> [[P1]], <16 x i8> [[P2]]) +// CHECK-NEXT: ret <8 x half> [[FMMLA1_I]] +// +float16x8_t test_vmmlaq_f16_mf8(float16x8_t p0, mfloat8x16_t p1, mfloat8x16_t p2, fpm_t p3) { + return vmmlaq_f16_mf8_fpm(p0, p1, p2, p3); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_vmmlaq_f32_mf8( +// CHECK-SAME: <4 x float> noundef [[P0:%.*]], <16 x i8> [[P1:%.*]], <16 x i8> [[P2:%.*]], i64 noundef [[P3:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: call void @llvm.aarch64.set.fpmr(i64 [[P3]]) +// CHECK-NEXT: [[FMMLA_I:%.*]] = call <4 x float> @llvm.aarch64.neon.fmmla.v4f32(<4 x float> [[P0]], <16 x i8> [[P1]], <16 x i8> [[P2]]) +// CHECK-NEXT: ret <4 x float> [[FMMLA_I]] +// +float32x4_t test_vmmlaq_f32_mf8(float32x4_t p0, mfloat8x16_t p1, mfloat8x16_t p2, fpm_t p3) { + return vmmlaq_f32_mf8_fpm(p0, p1, p2, p3); +} + // CHECK-LABEL: define dso_local <4 x i32> @test_vusmmlaq_s32( // CHECK-SAME: <4 x i32> noundef [[R:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index b81edc385cd43..78a60e839775e 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -217,6 +217,11 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.". : DefaultAttrsIntrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v8bf16_ty, llvm_v8bf16_ty], [IntrNoMem]>; + + class AdvSIMD_MatMul_fpm_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; } // Arithmetic ops @@ -499,6 +504,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_neon_ummla : AdvSIMD_MatMul_Intrinsic; def int_aarch64_neon_smmla : AdvSIMD_MatMul_Intrinsic; def int_aarch64_neon_usmmla : AdvSIMD_MatMul_Intrinsic; + def int_aarch64_neon_fmmla : AdvSIMD_MatMul_fpm_Intrinsic; def int_aarch64_neon_usdot : AdvSIMD_Dot_Intrinsic; def int_aarch64_neon_bfdot : AdvSIMD_Dot_Intrinsic; def int_aarch64_neon_bfmmla `````````` </details> https://github.com/llvm/llvm-project/pull/165431 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
