https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/100546
Backport 6da6772bf0a33131aa8540c9d4f60d5db75c32b5 Requested by: @kmclaughlin-arm >From 6271d3d03846216502487711f330227a4a6d3022 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin <kerry.mclaugh...@arm.com> Date: Wed, 24 Jul 2024 14:30:25 +0100 Subject: [PATCH] [AArch64][SME] Rewrite __arm_get_current_vg to preserve required registers (#100143) The documentation for the __arm_get_current_vg support routine specifies that the following registers are call-preserved: - X1-X15, X19-X29 and SP - Z0-Z31 - P0-P15 This patch rewrites the implementation of this routine in compiler-rt, as the current version does not guarantee that these registers will be preserved. (cherry picked from commit 6da6772bf0a33131aa8540c9d4f60d5db75c32b5) --- compiler-rt/lib/builtins/aarch64/sme-abi-vg.c | 28 ------------ compiler-rt/lib/builtins/aarch64/sme-abi.S | 44 +++++++++++++++++++ 2 files changed, 44 insertions(+), 28 deletions(-) diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c index 062cf80fc6848..20061012e16c6 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c +++ b/compiler-rt/lib/builtins/aarch64/sme-abi-vg.c @@ -10,15 +10,6 @@ struct FEATURES { extern struct FEATURES __aarch64_cpu_features; -struct SME_STATE { - long PSTATE; - long TPIDR2_EL0; -}; - -extern struct SME_STATE __arm_sme_state(void) __arm_streaming_compatible; - -extern bool __aarch64_has_sme_and_tpidr2_el0; - #if __GNUC__ >= 9 #pragma GCC diagnostic ignored "-Wprio-ctor-dtor" #endif @@ -28,22 +19,3 @@ __attribute__((constructor(90))) static void get_aarch64_cpu_features(void) { __init_cpu_features(); } - -__attribute__((target("sve"))) long -__arm_get_current_vg(void) __arm_streaming_compatible { - struct SME_STATE State = __arm_sme_state(); - unsigned long long features = - __atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED); - bool HasSVE = features & (1ULL << FEAT_SVE); - - if (!HasSVE && !__aarch64_has_sme_and_tpidr2_el0) - return 0; - - if (HasSVE || (State.PSTATE & 1)) { - long vl; - __asm__ __volatile__("cntd %0" : "=r"(vl)); - return vl; - } - - return 0; -} diff --git a/compiler-rt/lib/builtins/aarch64/sme-abi.S b/compiler-rt/lib/builtins/aarch64/sme-abi.S index 4c0ff66931db7..cd8153f60670f 100644 --- a/compiler-rt/lib/builtins/aarch64/sme-abi.S +++ b/compiler-rt/lib/builtins/aarch64/sme-abi.S @@ -12,11 +12,15 @@ #if !defined(__APPLE__) #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0) #define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0) +#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features) +#define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features) #else // MachO requires @page/@pageoff directives because the global is defined // in a different file. Otherwise this file may fail to build. #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page #define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff +#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page +#define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff #endif .arch armv9-a+sme @@ -180,6 +184,46 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable) ret END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable) +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg) + .variant_pcs __arm_get_current_vg + BTI_C + + stp x29, x30, [sp, #-16]! + .cfi_def_cfa_offset 16 + mov x29, sp + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + adrp x17, CPU_FEATS_SYMBOL + ldr w17, [x17, CPU_FEATS_SYMBOL_OFFSET] + tbnz w17, #30, 0f + adrp x16, TPIDR2_SYMBOL + ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET] + cbz w16, 1f +0: + mov x18, x1 + bl __arm_sme_state + mov x1, x18 + and x17, x17, #0x40000000 + bfxil x17, x0, #0, #1 + cbz x17, 1f + cntd x0 + .cfi_def_cfa wsp, 16 + ldp x29, x30, [sp], #16 + .cfi_def_cfa_offset 0 + .cfi_restore w30 + .cfi_restore w29 + ret +1: + mov x0, xzr + .cfi_def_cfa wsp, 16 + ldp x29, x30, [sp], #16 + .cfi_def_cfa_offset 0 + .cfi_restore w30 + .cfi_restore w29 + ret +END_COMPILERRT_OUTLINE_FUNCTION(__arm_get_current_vg) + NO_EXEC_STACK_DIRECTIVE // GNU property note for BTI and PAC _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits