https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/142557
Now that #141786 handles scalar and neon types, this adds MVE definitions and legalization for llvm.roundeven intrinsics. The existing llvm.arm.mve.vrintn are auto-upgraded to llvm.roundeven like other vrint instructions, so should continue to work. >From b187e7baca5ba141f3524b11533a6f201b05b5bc Mon Sep 17 00:00:00 2001 From: David Green <david.gr...@arm.com> Date: Tue, 3 Jun 2025 08:18:51 +0100 Subject: [PATCH] [ARM] Handle roundeven for MVE. Now that #141786 handles scalar and neon types, this adds MVE definitions and legalization for llvm.roundeven intrinsics. The existing llvm.arm.mve.vrintn are auto-upgraded to llvm.roundeven like other vrint instructions, so should continue to work. --- clang/include/clang/Basic/arm_mve.td | 2 +- clang/test/CodeGen/arm-mve-intrinsics/vrnd.c | 4 +- llvm/include/llvm/IR/IntrinsicsARM.td | 2 - llvm/lib/IR/AutoUpgrade.cpp | 6 ++ llvm/lib/Target/ARM/ARMInstrMVE.td | 2 +- llvm/test/CodeGen/Thumb2/mve-frint.ll | 89 +++++++++++++++---- .../CodeGen/Thumb2/mve-intrinsics/vrintn.ll | 2 + 7 files changed, 83 insertions(+), 24 deletions(-) diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td index e33c065059c44..412ef9abac1bc 100644 --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -710,7 +710,7 @@ defm vrndmq: vrnd<IRIntBase<"floor", [Vector]>, "m">; defm vrndpq: vrnd<IRIntBase<"ceil", [Vector]>, "p">; defm vrndaq: vrnd<IRIntBase<"round", [Vector]>, "a">; defm vrndxq: vrnd<IRIntBase<"rint", [Vector]>, "x">; -defm vrndnq: vrnd<IRInt<"vrintn", [Vector]>, "n">; +defm vrndnq: vrnd<IRIntBase<"roundeven", [Vector]>, "n">; multiclass compare_with_pred<string condname, dag arguments, dag cmp, string suffix> { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c index 3e625c739bde9..4888bc8c5e98f 100644 --- a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c @@ -148,7 +148,7 @@ float32x4_t test_vrndxq_f32(float32x4_t a) // CHECK-LABEL: @test_vrndnq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.roundeven.v8f16(<8 x half> [[A:%.*]]) // CHECK-NEXT: ret <8 x half> [[TMP0]] // float16x8_t test_vrndnq_f16(float16x8_t a) @@ -162,7 +162,7 @@ float16x8_t test_vrndnq_f16(float16x8_t a) // CHECK-LABEL: @test_vrndnq_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x float> @llvm.roundeven.v4f32(<4 x float> [[A:%.*]]) // CHECK-NEXT: ret <4 x float> [[TMP0]] // float32x4_t test_vrndnq_f32(float32x4_t a) diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 9b7dd8099368d..3ee69b72cc5cd 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -1306,8 +1306,6 @@ foreach suffix = ["a","n","p","m"] in { [llvm_anyvector_ty /* input */], LLVMMatchType<0>, llvm_anyvector_ty>; } -def int_arm_mve_vrintn: DefaultAttrsIntrinsic< - [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_arm_mve_vcls: DefaultAttrsIntrinsic< [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 7ba6d411bc7b5..814c00c669cb3 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -767,6 +767,12 @@ static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, return false; // Not 'arm.mve.vctp64'. } + if (Name.starts_with("vrintn.v")) { + NewFn = Intrinsic::getOrInsertDeclaration( + F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType()); + return true; + } + // These too are changed to accept a v2i1 instead of the old v4i1. if (Name.consume_back(".v4i1")) { // 'arm.mve.*.v4i1'. diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index 6dd8a374a92af..9dffd945d5baa 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -3527,7 +3527,7 @@ multiclass MVE_VRINT_m<MVEVectorVTInfo VTI, string suffix, bits<3> opcode, } multiclass MVE_VRINT_ops<MVEVectorVTInfo VTI> { - defm N : MVE_VRINT_m<VTI, "n", 0b000, int_arm_mve_vrintn>; + defm N : MVE_VRINT_m<VTI, "n", 0b000, froundeven>; defm X : MVE_VRINT_m<VTI, "x", 0b001, frint>; defm A : MVE_VRINT_m<VTI, "a", 0b010, fround>; defm Z : MVE_VRINT_m<VTI, "z", 0b011, ftrunc>; diff --git a/llvm/test/CodeGen/Thumb2/mve-frint.ll b/llvm/test/CodeGen/Thumb2/mve-frint.ll index 1d7dcc8bf8440..6946ec37ddf33 100644 --- a/llvm/test/CodeGen/Thumb2/mve-frint.ll +++ b/llvm/test/CodeGen/Thumb2/mve-frint.ll @@ -424,21 +424,74 @@ entry: ret <2 x double> %0 } -declare <4 x float> @llvm.ceil.v4f32(<4 x float>) -declare <4 x float> @llvm.trunc.v4f32(<4 x float>) -declare <4 x float> @llvm.rint.v4f32(<4 x float>) -declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) -declare <4 x float> @llvm.floor.v4f32(<4 x float>) -declare <4 x float> @llvm.round.v4f32(<4 x float>) -declare <8 x half> @llvm.ceil.v8f16(<8 x half>) -declare <8 x half> @llvm.trunc.v8f16(<8 x half>) -declare <8 x half> @llvm.rint.v8f16(<8 x half>) -declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>) -declare <8 x half> @llvm.floor.v8f16(<8 x half>) -declare <8 x half> @llvm.round.v8f16(<8 x half>) -declare <2 x double> @llvm.ceil.v2f64(<2 x double>) -declare <2 x double> @llvm.trunc.v2f64(<2 x double>) -declare <2 x double> @llvm.rint.v2f64(<2 x double>) -declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) -declare <2 x double> @llvm.floor.v2f64(<2 x double>) -declare <2 x double> @llvm.round.v2f64(<2 x double>) +define arm_aapcs_vfpcc <4 x float> @froundeven_float32_t(<4 x float> %src) { +; CHECK-MVE-LABEL: froundeven_float32_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vrintn.f32 s3, s3 +; CHECK-MVE-NEXT: vrintn.f32 s2, s2 +; CHECK-MVE-NEXT: vrintn.f32 s1, s1 +; CHECK-MVE-NEXT: vrintn.f32 s0, s0 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: froundeven_float32_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vrintn.f32 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %0 = call fast <4 x float> @llvm.roundeven.v4f32(<4 x float> %src) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @froundeven_float16_t(<8 x half> %src) { +; CHECK-MVE-LABEL: froundeven_float16_t: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vmovx.f16 s4, s0 +; CHECK-MVE-NEXT: vrintn.f16 s0, s0 +; CHECK-MVE-NEXT: vrintn.f16 s4, s4 +; CHECK-MVE-NEXT: vins.f16 s0, s4 +; CHECK-MVE-NEXT: vmovx.f16 s4, s1 +; CHECK-MVE-NEXT: vrintn.f16 s4, s4 +; CHECK-MVE-NEXT: vrintn.f16 s1, s1 +; CHECK-MVE-NEXT: vins.f16 s1, s4 +; CHECK-MVE-NEXT: vmovx.f16 s4, s2 +; CHECK-MVE-NEXT: vrintn.f16 s4, s4 +; CHECK-MVE-NEXT: vrintn.f16 s2, s2 +; CHECK-MVE-NEXT: vins.f16 s2, s4 +; CHECK-MVE-NEXT: vmovx.f16 s4, s3 +; CHECK-MVE-NEXT: vrintn.f16 s4, s4 +; CHECK-MVE-NEXT: vrintn.f16 s3, s3 +; CHECK-MVE-NEXT: vins.f16 s3, s4 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: froundeven_float16_t: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vrintn.f16 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %0 = call fast <8 x half> @llvm.roundeven.v8f16(<8 x half> %src) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <2 x double> @froundeven_float64_t(<2 x double> %src) { +; CHECK-LABEL: froundeven_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl roundeven +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.roundeven.v2f64(<2 x double> %src) + ret <2 x double> %0 +} diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll index a70975e1e7318..b30bb2e3ad3ff 100644 --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s +; The llvm.arm.mve.vrintn should auto-upgrade to llvm.roundeven, which are selected to vrintn. + define arm_aapcs_vfpcc <8 x half> @test_vrndnq_f16(<8 x half> %a) { ; CHECK-LABEL: test_vrndnq_f16: ; CHECK: @ %bb.0: @ %entry _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits