This is a step towards enabling LTO on some files where it is currently disabled and/or allow building those files in Thumb mode.
Signed-off-by: Jerome Forissier <jerome.foriss...@linaro.org> --- arch/arm/cpu/arm926ejs/Makefile | 4 +- arch/arm/cpu/arm926ejs/cache.c | 32 +++----- arch/arm/cpu/arm926ejs/cp15.S | 46 ++++++++++++ arch/arm/cpu/arm926ejs/cpu.c | 10 +-- arch/arm/include/asm/system.h | 22 +++--- arch/arm/lib/Makefile | 9 ++- arch/arm/lib/cache-cp15.c | 62 ++++++--------- arch/arm/lib/cache.c | 6 +- arch/arm/lib/cp15.S | 92 +++++++++++++++++++++++ arch/arm/mach-kirkwood/Makefile | 5 +- arch/arm/mach-kirkwood/cp15.S | 13 ++++ arch/arm/mach-kirkwood/include/mach/cpu.h | 12 +-- 12 files changed, 216 insertions(+), 97 deletions(-) create mode 100644 arch/arm/cpu/arm926ejs/cp15.S create mode 100644 arch/arm/lib/cp15.S create mode 100644 arch/arm/mach-kirkwood/cp15.S diff --git a/arch/arm/cpu/arm926ejs/Makefile b/arch/arm/cpu/arm926ejs/Makefile index 41d8af506d8..1b40d901413 100644 --- a/arch/arm/cpu/arm926ejs/Makefile +++ b/arch/arm/cpu/arm926ejs/Makefile @@ -4,7 +4,7 @@ # Wolfgang Denk, DENX Software Engineering, w...@denx.de. extra-y = start.o -obj-y = cpu.o cache.o +obj-y = cpu.o cache.o cp15.o ifdef CONFIG_XPL_BUILD ifdef CONFIG_SPL_NO_CPU_SUPPORT @@ -25,5 +25,7 @@ CFLAGS_cache.o := -marm CFLAGS_REMOVE_cpu.o := $(LTO_CFLAGS) CFLAGS_REMOVE_cache.o := $(LTO_CFLAGS) +AFLAGS_REMOVE_cp15.o := -mthumb -mthumb-interwork + endif endif diff --git a/arch/arm/cpu/arm926ejs/cache.c b/arch/arm/cpu/arm926ejs/cache.c index 71b8ad0f71d..3524379d335 100644 --- a/arch/arm/cpu/arm926ejs/cache.c +++ b/arch/arm/cpu/arm926ejs/cache.c @@ -9,44 +9,33 @@ #include <linux/types.h> #if !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) +void _invalidate_dcache_all(void); void invalidate_dcache_all(void) { - asm volatile("mcr p15, 0, %0, c7, c6, 0\n" : : "r"(0)); + _invalidate_dcache_all(); } +void _flush_dcache_all(void); void flush_dcache_all(void) { - asm volatile( - "0:" - "mrc p15, 0, r15, c7, c14, 3\n" - "bne 0b\n" - "mcr p15, 0, %0, c7, c10, 4\n" - : : "r"(0) : "memory" - ); + _flush_dcache_all(); } +void _invalidate_dcache_range(unsigned long start, unsigned long stop); void invalidate_dcache_range(unsigned long start, unsigned long stop) { if (!check_cache_range(start, stop)) return; - - while (start < stop) { - asm volatile("mcr p15, 0, %0, c7, c6, 1\n" : : "r"(start)); - start += CONFIG_SYS_CACHELINE_SIZE; - } + _invalidate_dcache_range(start, stop); } +void _flush_dcache_range(unsigned long start, unsigned long stop); void flush_dcache_range(unsigned long start, unsigned long stop) { if (!check_cache_range(start, stop)) return; - while (start < stop) { - asm volatile("mcr p15, 0, %0, c7, c14, 1\n" : : "r"(start)); - start += CONFIG_SYS_CACHELINE_SIZE; - } - - asm volatile("mcr p15, 0, %0, c7, c10, 4\n" : : "r"(0)); + _flush_dcache_range(start, stop); } #else /* #if !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) */ void invalidate_dcache_all(void) @@ -70,11 +59,10 @@ __weak void invalidate_l2_cache(void) {} #if !CONFIG_IS_ENABLED(SYS_ICACHE_OFF) /* Invalidate entire I-cache and branch predictor array */ +void _invalidate_icache_all(void); void invalidate_icache_all(void) { - unsigned long i = 0; - - asm ("mcr p15, 0, %0, c7, c5, 0" : : "r" (i)); + _invalidate_icache_all(); } #else void invalidate_icache_all(void) {} diff --git a/arch/arm/cpu/arm926ejs/cp15.S b/arch/arm/cpu/arm926ejs/cp15.S new file mode 100644 index 00000000000..2d7626bc858 --- /dev/null +++ b/arch/arm/cpu/arm926ejs/cp15.S @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#include <config.h> +#include <linux/linkage.h> + +ENTRY(_cache_flush) + mcr p15, 0, r0, c7, c7 + bx lr +ENDPROC(_cache_flush) + +ENTRY(_invalidate_dcache_all) + mcr p15, 0, r0, c7, c6, 0 + bx lr +ENDPROC(_invalidate_dcache_all) + +ENTRY(_flush_dcache_all) +0: + mrc p15, 0, r15, c7, c14, 3 + bne 0b + mcr p15, 0, r0, c7, c10, 4 + bx lr +ENDPROC(_flush_dcache_all) + +ENTRY(_invalidate_dcache_range) +0: + mcr p15, 0, r0, c7, c6, 1 + add r0, r0, #CONFIG_SYS_CACHELINE_SIZE + cmp r0, r1 + blt 0b + bx lr +ENDPROC(_invalidate_dcache_range) + +ENTRY(_flush_dcache_range) +0: + mcr p15, 0, r0, c7, c14, 1 + add r0, r0, #CONFIG_SYS_CACHELINE_SIZE + cmp r0, r1 + blt 0b + mcr p15, 0, r0, c7, c10, 4 + bx lr +ENDPROC(_flush_dcache_range) + +ENTRY(_invalidate_icache_all) + mcr p15, 0, r0, c7, c5, 0 + bx lr +ENDPROC(_invalidate_icache_all) diff --git a/arch/arm/cpu/arm926ejs/cpu.c b/arch/arm/cpu/arm926ejs/cpu.c index 0e100e6f13d..9e59ff6c66f 100644 --- a/arch/arm/cpu/arm926ejs/cpu.c +++ b/arch/arm/cpu/arm926ejs/cpu.c @@ -55,12 +55,12 @@ int cleanup_before_linux (void) return 0; } +void _cache_flush(void); + /* flush I/D-cache */ static void cache_flush (void) { -#if !(CONFIG_IS_ENABLED(SYS_ICACHE_OFF) && CONFIG_IS_ENABLED(SYS_DCACHE_OFF)) - unsigned long i = 0; - - asm ("mcr p15, 0, %0, c7, c7, 0": :"r" (i)); -#endif + if (!(CONFIG_IS_ENABLED(SYS_ICACHE_OFF) && + CONFIG_IS_ENABLED(SYS_DCACHE_OFF))) + _cache_flush(); } diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 849b3d0efb7..e559a48a6c7 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -456,31 +456,29 @@ static inline int is_hyp(void) #endif } +unsigned int _get_cr_hyp(void); +unsigned int _get_cr_nohyp(void); + static inline unsigned int get_cr(void) { unsigned int val; if (is_hyp()) - asm volatile("mrc p15, 4, %0, c1, c0, 0 @ get CR" : "=r" (val) - : - : "cc"); + val = _get_cr_hyp(); else - asm volatile("mrc p15, 0, %0, c1, c0, 0 @ get CR" : "=r" (val) - : - : "cc"); + val = _get_cr_nohyp(); return val; } +unsigned int _set_cr_hyp(unsigned int val); +unsigned int _set_cr_nohyp(unsigned int val); + static inline void set_cr(unsigned int val) { if (is_hyp()) - asm volatile("mcr p15, 4, %0, c1, c0, 0 @ set CR" : - : "r" (val) - : "cc"); + _set_cr_hyp(val); else - asm volatile("mcr p15, 0, %0, c1, c0, 0 @ set CR" : - : "r" (val) - : "cc"); + _set_cr_nohyp(val); isb(); } diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index ade42d0ca43..23f73dbba32 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -81,6 +81,10 @@ endif obj-y += cache.o obj-$(CONFIG_SYS_ARM_CACHE_CP15) += cache-cp15.o CFLAGS_REMOVE_cache-cp15.o := $(LTO_CFLAGS) +# Low-level CP15 instructions (mrc p15...) cause problems with LTO +# when they are coded as inline assembly. They are implemented +# in their own .S file instead. +obj-$(CONFIG_SYS_ARM_CACHE_CP15) += cp15.o obj-y += psci-dt.o @@ -111,13 +115,14 @@ endif # For .S, drop -mthumb* and other thumb-related options. # CFLAGS_REMOVE_* would not have an effet, so AFLAGS_REMOVE_* # was implemented and is used here. -# Also, define ${target}_NO_THUMB_BUILD for these two targets -# so that the code knows it should not use Thumb. +# Also, define ${target}_NO_THUMB_BUILD for the targets that +# need to know they should not use Thumb. AFLAGS_REMOVE_memset.o := -mthumb -mthumb-interwork AFLAGS_REMOVE_memcpy.o := -mthumb -mthumb-interwork AFLAGS_memset.o := -DMEMSET_NO_THUMB_BUILD AFLAGS_memcpy.o := -DMEMCPY_NO_THUMB_BUILD +AFLAGS_REMOVE_cp15.o := -mthumb -mthumb-interwork # This is only necessary to force ARM mode on THUMB1 targets. ifneq ($(CONFIG_SYS_ARM_ARCH),4) diff --git a/arch/arm/lib/cache-cp15.c b/arch/arm/lib/cache-cp15.c index 947012f2996..46eded590eb 100644 --- a/arch/arm/lib/cache-cp15.c +++ b/arch/arm/lib/cache-cp15.c @@ -109,6 +109,15 @@ __weak void dram_bank_mmu_setup(int bank) set_section_dcache(i, DCACHE_DEFAULT_OPTION); } +void _mmu_helper_lpae_hyp(unsigned long reg, unsigned long addr, + unsigned long attr); +void _mmu_helper_lpae_nohyp(unsigned long reg, unsigned long addr, + unsigned long attr); +void _mmu_helper_nolpae_hyp(unsigned long reg); +void _mmu_helper_nolpae_nohyp(unsigned long reg); +void _mmu_helper_pt(unsigned long addr); +void _set_dacr(unsigned long val); + /* to activate the MMU we need to set up virtual memory: use 1M areas */ static inline void mmu_setup(void) { @@ -141,42 +150,13 @@ static inline void mmu_setup(void) reg |= TTBCR_ORGN0_WBNWA | TTBCR_IRGN0_WBNWA; #endif - if (is_hyp()) { - /* Set HTCR to enable LPAE */ - asm volatile("mcr p15, 4, %0, c2, c0, 2" - : : "r" (reg) : "memory"); - /* Set HTTBR0 */ - asm volatile("mcrr p15, 4, %0, %1, c2" - : - : "r"(gd->arch.tlb_addr + (4096 * 4)), "r"(0) - : "memory"); - /* Set HMAIR */ - asm volatile("mcr p15, 4, %0, c10, c2, 0" - : : "r" (MEMORY_ATTRIBUTES) : "memory"); - } else { - /* Set TTBCR to enable LPAE */ - asm volatile("mcr p15, 0, %0, c2, c0, 2" - : : "r" (reg) : "memory"); - /* Set 64-bit TTBR0 */ - asm volatile("mcrr p15, 0, %0, %1, c2" - : - : "r"(gd->arch.tlb_addr + (4096 * 4)), "r"(0) - : "memory"); - /* Set MAIR */ - asm volatile("mcr p15, 0, %0, c10, c2, 0" - : : "r" (MEMORY_ATTRIBUTES) : "memory"); - } + if (is_hyp()) + _mmu_helper_lpae_hyp(reg, gd->arch.tlb_addr + (4096 * 4), + MEMORY_ATTRIBUTES); + else + _mmu_helper_lpae_nohyp(reg, gd->arch.tlb_addr + (4096 * 4), + MEMORY_ATTRIBUTES); #elif defined(CONFIG_CPU_V7A) - if (is_hyp()) { - /* Set HTCR to disable LPAE */ - asm volatile("mcr p15, 4, %0, c2, c0, 2" - : : "r" (0) : "memory"); - } else { - /* Set TTBCR to disable LPAE */ - asm volatile("mcr p15, 0, %0, c2, c0, 2" - : : "r" (0) : "memory"); - } - /* Set TTBR0 */ reg = gd->arch.tlb_addr & TTBR0_BASE_ADDR_MASK; #if defined(CONFIG_SYS_ARM_CACHE_WRITETHROUGH) reg |= TTBR0_RGN_WT | TTBR0_IRGN_WT; @@ -185,19 +165,19 @@ static inline void mmu_setup(void) #else reg |= TTBR0_RGN_WB | TTBR0_IRGN_WB; #endif - asm volatile("mcr p15, 0, %0, c2, c0, 0" - : : "r" (reg) : "memory"); + if (is_hyp()) + _mmu_helper_nolpae_hyp(reg); + else + _mmu_helper_nolpae_nohyp(reg); #else /* Copy the page table address to cp15 */ - asm volatile("mcr p15, 0, %0, c2, c0, 0" - : : "r" (gd->arch.tlb_addr) : "memory"); + _mmu_helper_pt(gd->arch.tlb_addr); #endif /* * initial value of Domain Access Control Register (DACR) * Set the access control to client (1U) for each of the 16 domains */ - asm volatile("mcr p15, 0, %0, c3, c0, 0" - : : "r" (0x55555555)); + _set_dacr(0x55555555); /* and enable the mmu */ reg = get_cr(); /* get control reg. */ diff --git a/arch/arm/lib/cache.c b/arch/arm/lib/cache.c index dd19bd3e4fb..be3a0b42bff 100644 --- a/arch/arm/lib/cache.c +++ b/arch/arm/lib/cache.c @@ -122,12 +122,10 @@ phys_addr_t noncached_alloc(size_t size, size_t align) #endif /* CONFIG_SYS_NONCACHED_MEMORY */ #if CONFIG_IS_ENABLED(SYS_THUMB_BUILD) +void _invalidate_l2_cache(void); void invalidate_l2_cache(void) { - unsigned int val = 0; - - asm volatile("mcr p15, 1, %0, c15, c11, 0 @ invl l2 cache" - : : "r" (val) : "cc"); + _invalidate_l2_cache(); isb(); } #endif diff --git a/arch/arm/lib/cp15.S b/arch/arm/lib/cp15.S new file mode 100644 index 00000000000..c402d998d64 --- /dev/null +++ b/arch/arm/lib/cp15.S @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#include <config.h> +#include <linux/linkage.h> + +ENTRY(_get_cr_hyp) + mrc p15, 4, r0, c1, c0, 0 @ get CR + bx lr +ENDPROC(_get_cr_hyp) + +ENTRY(_get_cr_nohyp) + mrc p15, 0, r0, c1, c0, 0 @ get CR + bx lr +ENDPROC(_get_cr_nohyp) + +ENTRY(_set_cr_hyp) + mcr p15, 4, r0, c1, c0, 0 @ set CR + bx lr +ENDPROC(_set_cr_hyp) + +ENTRY(_set_cr_nohyp) + mcr p15, 0, r0, c1, c0, 0 @ set CR + bx lr +ENDPROC(_set_cr_nohyp) + +ENTRY(_invalidate_l2_cache) + mcr p15, 1, r0, c15, c11, 0 @ invl l2 cache + bx lr +ENDPROC(_invalidate_l2_cache) + +#if defined(CONFIG_ARMV7_LPAE) && __LINUX_ARM_ARCH__ != 4 + +ENTRY(_mmu_helper_lpae_hyp) + /* Set HTCR to enable LPAE */ + mcr p15, 4, r0, c2, c0, 2 + /* Set HTTBR0 */ + mov r4, #0 + mcrr p15, 4, r1, r4, c2 + /* Set HMAIR */ + mcr p15, 4, r3, c10, c2, 0 + bx lr +ENDPROC(_mmu_helper_lpae_hyp) + +ENTRY(_mmu_helper_lpae_nohyp) + /* Set TTBCR to enable LPAE */ + mcr p15, 0, r0, c2, c0, 2 + /* Set 64-bit TTBR0 */ + mov r4, #0 + mcrr p15, 0, r1, r4, c2 + /* Set MAIR */ + mcr p15, 0, r3, c10, c2, 0 + bx lr +ENDPROC(_mmu_helper_lpae_nohyp) + +#elif defined(CONFIG_CPU_V7A) + +ENTRY(_mmu_helper_nolpae_hyp) + /* Set HTCR to disable LPAE */ + mov r1, #0 + mcr p15, 4, r1, c2, c0, 2 + /* Set TTBR0 */ + mcr p15, 0, r0, c2, c0, 0 + bx lr +ENDPROC(_mmu_helper_nolpae_hyp) + +ENTRY(_mmu_helper_nolpae_nohyp) + /* Set TTBCR to disable LPAE */ + mov r1, #0 + mcr p15, 0, r1, c2, c0, 2 + /* Set TTBR0 */ + mcr p15, 0, r0, c2, c0, 0 + bx lr +ENDPROC(_mmu_helper_nolpae_nohyp) + +#else + +ENTRY(_mmu_helper_pt) + mcr p15, 0, r0, c2, c0, 0 + bx lr +ENDPROC(_mmu_helper_pt) + +#endif + +#if !(defined(CONFIG_SYS_ICACHE_OFF) && defined(CONFIG_SYS_DCACHE_OFF)) && \ + defined(CONFIG_SYS_ARM_MMU) + +ENTRY(_set_dacr) + mcr p15, 0, r0, c3, c0, 0 + bx lr +ENDPROC(_set_dacr) + +#endif diff --git a/arch/arm/mach-kirkwood/Makefile b/arch/arm/mach-kirkwood/Makefile index 0fb5a2326f5..9581c315af8 100644 --- a/arch/arm/mach-kirkwood/Makefile +++ b/arch/arm/mach-kirkwood/Makefile @@ -3,14 +3,17 @@ # (C) Copyright 2009 # Marvell Semiconductor <www.marvell.com> # Written-by: Prafulla Wadaskar <prafu...@marvell.com> +# Copyright (C) 2025 Linaro Ltd. obj-y = cpu.o +obj-y += cp15.o obj-y += cache.o obj-y += lowlevel.o obj-y += mpp.o -# cpu.o and cache.o contain CP15 instructions which cannot be run in +# cpu.o, cpu_asm.o and cache.o contain CP15 instructions which cannot be run in # Thumb state, so build them for ARM state even with CONFIG_SYS_THUMB_BUILD CFLAGS_cpu.o := -marm +CFLAGS_cpu_asm.o := -marm CFLAGS_cache.o := -marm diff --git a/arch/arm/mach-kirkwood/cp15.S b/arch/arm/mach-kirkwood/cp15.S new file mode 100644 index 00000000000..088db9895f7 --- /dev/null +++ b/arch/arm/mach-kirkwood/cp15.S @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#include <linux/linkage.h> + +ENTRY(readfr_extra_feature_reg) + mrc p15, 1, r0, c15, c1, 0 @ readfr exfr + bx lr +ENDPROC(readfr_extra_feature_reg) + +ENTRY(_writefr_extra_feature_reg) + mcr p15, 1, r0, c15, c1, 0 @ writefr exfr + bx lr +ENDPROC(_writefr_extra_feature_reg) diff --git a/arch/arm/mach-kirkwood/include/mach/cpu.h b/arch/arm/mach-kirkwood/include/mach/cpu.h index 9eec786fe8f..54487d2af85 100644 --- a/arch/arm/mach-kirkwood/include/mach/cpu.h +++ b/arch/arm/mach-kirkwood/include/mach/cpu.h @@ -82,22 +82,16 @@ struct mbus_win { * read feroceon/sheeva core extra feature register * using co-proc instruction */ -static inline unsigned int readfr_extra_feature_reg(void) -{ - unsigned int val; - asm volatile ("mrc p15, 1, %0, c15, c1, 0 @ readfr exfr":"=r" - (val)::"cc"); - return val; -} +unsigned int readfr_extra_feature_reg(void); /* * write feroceon/sheeva core extra feature register * using co-proc instruction */ +void _writefr_extra_feature_reg(unsigned int val); static inline void writefr_extra_feature_reg(unsigned int val) { - asm volatile ("mcr p15, 1, %0, c15, c1, 0 @ writefr exfr"::"r" - (val):"cc"); + _writefr_extra_feature_reg(val); isb(); } -- 2.43.0