https://github.com/simpal01 updated https://github.com/llvm/llvm-project/pull/71545
>From c7c02e2bea3cb6fb674a5e568b7d14998776c00f Mon Sep 17 00:00:00 2001 From: Simi Pallipurath <simi.pallipurath.com> Date: Tue, 7 Nov 2023 13:05:08 +0000 Subject: [PATCH 1/2] [ARM] .fpu equals fpv5-d16 disables floating point MVE which leads to unsupported MVE instructions for cortex M85/M55. The floating-point and MVE features together specify the MVE functionality that is supported on the Cortex-M85 processor. But the FPU extension for the underlying architecture(armv8.1-m.main) is FPV5 which does not include MVE-F. So either when we explictly specify -mfpu=fpv5-d16 or Compiler's -S output and `-save-temps=obj` loses MVE feature which leads to assembler error. What happening here is .fpu directive overrides any previously set features by .cpu directive. Since the the corresponding .fpu generated (.fpu fpv5-d16) does not include MVE-F, it overrides those features even though it is supported and set by the .cpu directive. Looks like .fpu is supposed to do this. In this case, there should be an .arch_extension directive re-enabling the relevant extensions after .fpu if the goal is to keep these extensions enabled. GCC also does the same. So this patch enables the MVE features by: .fpu fpv5-d16 .arch_extension mve.fp --- clang/test/CodeGen/arm-v8.1m-check-mve.ll | 56 +++++++++++++++++++ .../lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 3 + .../ARM/MCTargetDesc/ARMTargetStreamer.cpp | 16 ++++-- 3 files changed, 69 insertions(+), 6 deletions(-) create mode 100644 clang/test/CodeGen/arm-v8.1m-check-mve.ll diff --git a/clang/test/CodeGen/arm-v8.1m-check-mve.ll b/clang/test/CodeGen/arm-v8.1m-check-mve.ll new file mode 100644 index 000000000000000..cfcb0223961e31e --- /dev/null +++ b/clang/test/CodeGen/arm-v8.1m-check-mve.ll @@ -0,0 +1,56 @@ +; REQUIRES: arm-registered-target +; RUN: %clang --target=arm-none-eabi -mcpu=cortex-m85 -mfloat-abi=hard -save-temps=obj -S -o - %s | FileCheck %s +; RUN: %clang --target=arm-none-eabi -mcpu=cortex-m55 -mfloat-abi=hard -save-temps=obj -S -o - %s | FileCheck %s +; RUN: %clang --target=arm-none-eabi -mcpu=cortex-m85 -mfloat-abi=hard -O2 -c -mthumb -save-temps=obj %s +; RUN: %clang --target=arm-none-eabi -mcpu=cortex-m55 -mfloat-abi=hard -O2 -c -mthumb -save-temps=obj %s +; CHECK: .fpu fpv5-d16 +; CHECK .arch_extension mve.fp +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-none-unknown-eabihf" + +%struct.dummy_t = type { float, float, float, float } + +define dso_local signext i8 @foo(ptr noundef %handle) #0 { +entry: + %handle.addr = alloca ptr, align 4 + store ptr %handle, ptr %handle.addr, align 4 + %0 = load ptr, ptr %handle.addr, align 4 + %a = getelementptr inbounds %struct.dummy_t, ptr %0, i32 0, i32 0 + %1 = load float, ptr %a, align 4 + %sub = fsub float 0x3F5439DE40000000, %1 + %2 = load ptr, ptr %handle.addr, align 4 + %a1 = getelementptr inbounds %struct.dummy_t, ptr %2, i32 0, i32 0 + %3 = load float, ptr %a1, align 4 + %4 = call float @llvm.fmuladd.f32(float 0x3F847AE140000000, float %sub, float %3) + store float %4, ptr %a1, align 4 + %5 = load ptr, ptr %handle.addr, align 4 + %b = getelementptr inbounds %struct.dummy_t, ptr %5, i32 0, i32 1 + %6 = load float, ptr %b, align 4 + %sub2 = fsub float 0x3F5439DE40000000, %6 + %7 = load ptr, ptr %handle.addr, align 4 + %b3 = getelementptr inbounds %struct.dummy_t, ptr %7, i32 0, i32 1 + %8 = load float, ptr %b3, align 4 + %9 = call float @llvm.fmuladd.f32(float 0x3F947AE140000000, float %sub2, float %8) + store float %9, ptr %b3, align 4 + %10 = load ptr, ptr %handle.addr, align 4 + %c = getelementptr inbounds %struct.dummy_t, ptr %10, i32 0, i32 2 + %11 = load float, ptr %c, align 4 + %sub4 = fsub float 0x3F5439DE40000000, %11 + %12 = load ptr, ptr %handle.addr, align 4 + %c5 = getelementptr inbounds %struct.dummy_t, ptr %12, i32 0, i32 2 + %13 = load float, ptr %c5, align 4 + %14 = call float @llvm.fmuladd.f32(float 0x3F9EB851E0000000, float %sub4, float %13) + store float %14, ptr %c5, align 4 + %15 = load ptr, ptr %handle.addr, align 4 + %d = getelementptr inbounds %struct.dummy_t, ptr %15, i32 0, i32 3 + %16 = load float, ptr %d, align 4 + %sub6 = fsub float 0x3F5439DE40000000, %16 + %17 = load ptr, ptr %handle.addr, align 4 + %d7 = getelementptr inbounds %struct.dummy_t, ptr %17, i32 0, i32 3 + %18 = load float, ptr %d7, align 4 + %19 = call float @llvm.fmuladd.f32(float 0x3FA47AE140000000, float %sub6, float %18) + store float %19, ptr %d7, align 4 + ret i8 0 +} + +declare float @llvm.fmuladd.f32(float, float, float) #1 diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 373d5b59bca6640..20b52ebc544a1ed 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -12648,6 +12648,9 @@ bool ARMAsmParser::enableArchExtFeature(StringRef Name, SMLoc &ExtLoc) { {ARM::AEK_CRYPTO, {Feature_HasV8Bit}, {ARM::FeatureCrypto, ARM::FeatureNEON, ARM::FeatureFPARMv8}}, + {(ARM::AEK_DSP | ARM::AEK_SIMD | ARM::AEK_FP), + {Feature_HasV8_1MMainlineBit}, + {ARM::HasMVEFloatOps}}, {ARM::AEK_FP, {Feature_HasV8Bit}, {ARM::FeatureVFP2_SP, ARM::FeatureFPARMv8}}, diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index b65d1b24e63d39b..e84b597e4382edc 100644 --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -238,14 +238,18 @@ void ARMTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { ? ARMBuildAttrs::AllowNeonARMv8_1a : ARMBuildAttrs::AllowNeonARMv8); } else { - if (STI.hasFeature(ARM::FeatureFPARMv8_D16_SP)) + if (STI.hasFeature(ARM::FeatureFPARMv8_D16_SP)) { // FPv5 and FP-ARMv8 have the same instructions, so are modeled as one // FPU, but there are two different names for it depending on the CPU. - emitFPU(STI.hasFeature(ARM::FeatureD32) - ? ARM::FK_FP_ARMV8 - : (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_FPV5_D16 - : ARM::FK_FPV5_SP_D16)); - else if (STI.hasFeature(ARM::FeatureVFP4_D16_SP)) + if (STI.hasFeature(ARM::FeatureD32)) + emitFPU(ARM::FK_FP_ARMV8); + else { + emitFPU(STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_FPV5_D16 + : ARM::FK_FPV5_SP_D16); + if (STI.hasFeature(ARM::HasMVEFloatOps)) + emitArchExtension(ARM::AEK_SIMD | ARM::AEK_DSP | ARM::AEK_FP); + } + } else if (STI.hasFeature(ARM::FeatureVFP4_D16_SP)) emitFPU(STI.hasFeature(ARM::FeatureD32) ? ARM::FK_VFPV4 : (STI.hasFeature(ARM::FeatureFP64) ? ARM::FK_VFPV4_D16 >From 6c386635cbdd432d627f03fc0f544cf31cfdb883 Mon Sep 17 00:00:00 2001 From: Simi Pallipurath <simi.pallipurath.com> Date: Thu, 9 Nov 2023 16:25:25 +0000 Subject: [PATCH 2/2] fixup! [llvm][ARM] Emit MVE .arch_extension after .fpu directive if it does not include MVE features. 1. .arch_extension will always be on the very next line. CHECK-NEXT would be bit more robust. --- clang/test/CodeGen/arm-v8.1m-check-mve.ll | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/test/CodeGen/arm-v8.1m-check-mve.ll b/clang/test/CodeGen/arm-v8.1m-check-mve.ll index cfcb0223961e31e..6949f5529aeb653 100644 --- a/clang/test/CodeGen/arm-v8.1m-check-mve.ll +++ b/clang/test/CodeGen/arm-v8.1m-check-mve.ll @@ -4,9 +4,7 @@ ; RUN: %clang --target=arm-none-eabi -mcpu=cortex-m85 -mfloat-abi=hard -O2 -c -mthumb -save-temps=obj %s ; RUN: %clang --target=arm-none-eabi -mcpu=cortex-m55 -mfloat-abi=hard -O2 -c -mthumb -save-temps=obj %s ; CHECK: .fpu fpv5-d16 -; CHECK .arch_extension mve.fp -target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" -target triple = "thumbv8.1m.main-none-unknown-eabihf" +; CHECK-NEXT .arch_extension mve.fp %struct.dummy_t = type { float, float, float, float } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits