[PATCH v12 05/28] riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit
Carves out space in arch specific thread struct for cfi status and shadow stack in usermode on riscv. This patch does following - defines a new structure cfi_status with status bit for cfi feature - defines shadow stack pointer, base and size in cfi_status structure - defines offsets to new member fields in thread in asm-offsets.c - Saves and restore shadow stack pointer on trap entry (U --> S) and exit (S --> U) Shadow stack save/restore is gated on feature availiblity and implemented using alternative. CSR can be context switched in `switch_to` as well but soon as kernel shadow stack support gets rolled in, shadow stack pointer will need to be switched at trap entry/exit point (much like `sp`). It can be argued that kernel using shadow stack deployment scenario may not be as prevalant as user mode using this feature. But even if there is some minimal deployment of kernel shadow stack, that means that it needs to be supported. And thus save/restore of shadow stack pointer in entry.S instead of in `switch_to.h`. Reviewed-by: Charlie Jenkins Reviewed-by: Zong Li Signed-off-by: Deepak Gupta --- arch/riscv/include/asm/processor.h | 1 + arch/riscv/include/asm/thread_info.h | 3 +++ arch/riscv/include/asm/usercfi.h | 24 arch/riscv/kernel/asm-offsets.c | 4 arch/riscv/kernel/entry.S| 26 ++ 5 files changed, 58 insertions(+) diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index e3aba3336e63..d851bb5c6da0 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -14,6 +14,7 @@ #include #include +#include #define arch_get_mmap_end(addr, len, flags)\ ({ \ diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h index f5916a70879a..a0cfe00c2ca6 100644 --- a/arch/riscv/include/asm/thread_info.h +++ b/arch/riscv/include/asm/thread_info.h @@ -62,6 +62,9 @@ struct thread_info { longuser_sp;/* User stack pointer */ int cpu; unsigned long syscall_work; /* SYSCALL_WORK_ flags */ +#ifdef CONFIG_RISCV_USER_CFI + struct cfi_status user_cfi_state; +#endif #ifdef CONFIG_SHADOW_CALL_STACK void*scs_base; void*scs_sp; diff --git a/arch/riscv/include/asm/usercfi.h b/arch/riscv/include/asm/usercfi.h new file mode 100644 index ..5f2027c51917 --- /dev/null +++ b/arch/riscv/include/asm/usercfi.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (C) 2024 Rivos, Inc. + * Deepak Gupta + */ +#ifndef _ASM_RISCV_USERCFI_H +#define _ASM_RISCV_USERCFI_H + +#ifndef __ASSEMBLY__ +#include + +#ifdef CONFIG_RISCV_USER_CFI +struct cfi_status { + unsigned long ubcfi_en : 1; /* Enable for backward cfi. */ + unsigned long rsvd : ((sizeof(unsigned long) * 8) - 1); + unsigned long user_shdw_stk; /* Current user shadow stack pointer */ + unsigned long shdw_stk_base; /* Base address of shadow stack */ + unsigned long shdw_stk_size; /* size of shadow stack */ +}; + +#endif /* CONFIG_RISCV_USER_CFI */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_RISCV_USERCFI_H */ diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index e89455a6a0e5..0c188aaf3925 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -50,6 +50,10 @@ void asm_offsets(void) #endif OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu); +#ifdef CONFIG_RISCV_USER_CFI + OFFSET(TASK_TI_CFI_STATUS, task_struct, thread_info.user_cfi_state); + OFFSET(TASK_TI_USER_SSP, task_struct, thread_info.user_cfi_state.user_shdw_stk); +#endif OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]); OFFSET(TASK_THREAD_F1, task_struct, thread.fstate.f[1]); OFFSET(TASK_THREAD_F2, task_struct, thread.fstate.f[2]); diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 33a5a9f2a0d4..68c99124ea55 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -147,6 +147,20 @@ SYM_CODE_START(handle_exception) REG_L s0, TASK_TI_USER_SP(tp) csrrc s1, CSR_STATUS, t0 + /* +* If previous mode was U, capture shadow stack pointer and save it away +* Zero CSR_SSP at the same time for sanitization. +*/ + ALTERNATIVE("nop; nop; nop; nop", + __stringify(\ + andi s2, s1, SR_SPP;\ + bnez s2, skip_ssp_save; \ + csrrw s2, CSR_SSP, x0; \ + REG_S s2, TASK_TI_USER_SSP(tp); \ + skip_ssp_save:), + 0, +
[PATCH v12 07/28] riscv mm: manufacture shadow stack pte
This patch implements creating shadow stack pte (on riscv). Creating shadow stack PTE on riscv means that clearing RWX and then setting W=1. Reviewed-by: Alexandre Ghiti Reviewed-by: Zong Li Signed-off-by: Deepak Gupta --- arch/riscv/include/asm/pgtable.h | 10 ++ 1 file changed, 10 insertions(+) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 8c528cd7347a..ede43185ffdf 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -421,6 +421,11 @@ static inline pte_t pte_mkwrite_novma(pte_t pte) return __pte(pte_val(pte) | _PAGE_WRITE); } +static inline pte_t pte_mkwrite_shstk(pte_t pte) +{ + return __pte((pte_val(pte) & ~(_PAGE_LEAF)) | _PAGE_WRITE); +} + /* static inline pte_t pte_mkexec(pte_t pte) */ static inline pte_t pte_mkdirty(pte_t pte) @@ -749,6 +754,11 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd) return pte_pmd(pte_mkwrite_novma(pmd_pte(pmd))); } +static inline pmd_t pmd_mkwrite_shstk(pmd_t pte) +{ + return __pmd((pmd_val(pte) & ~(_PAGE_LEAF)) | _PAGE_WRITE); +} + static inline pmd_t pmd_wrprotect(pmd_t pmd) { return pte_pmd(pte_wrprotect(pmd_pte(pmd))); -- 2.34.1
Re: [PATCH v3 08/17] riscv: misaligned: add a function to check misalign trap delegability
On 13/03/2025 14:19, Andrew Jones wrote: > On Mon, Mar 10, 2025 at 04:12:15PM +0100, Clément Léger wrote: >> Checking for the delegability of the misaligned access trap is needed >> for the KVM FWFT extension implementation. Add a function to get the >> delegability of the misaligned trap exception. >> >> Signed-off-by: Clément Léger >> --- >> arch/riscv/include/asm/cpufeature.h | 5 + >> arch/riscv/kernel/traps_misaligned.c | 17 +++-- >> 2 files changed, 20 insertions(+), 2 deletions(-) >> >> diff --git a/arch/riscv/include/asm/cpufeature.h >> b/arch/riscv/include/asm/cpufeature.h >> index ad7d26788e6a..8b97cba99fc3 100644 >> --- a/arch/riscv/include/asm/cpufeature.h >> +++ b/arch/riscv/include/asm/cpufeature.h >> @@ -69,12 +69,17 @@ int cpu_online_unaligned_access_init(unsigned int cpu); >> #if defined(CONFIG_RISCV_SCALAR_MISALIGNED) >> void unaligned_emulation_finish(void); >> bool unaligned_ctl_available(void); >> +bool misaligned_traps_can_delegate(void); >> DECLARE_PER_CPU(long, misaligned_access_speed); >> #else >> static inline bool unaligned_ctl_available(void) >> { >> return false; >> } >> +static inline bool misaligned_traps_can_delegate(void) >> +{ >> +return false; >> +} >> #endif >> >> bool check_vector_unaligned_access_emulated_all_cpus(void); >> diff --git a/arch/riscv/kernel/traps_misaligned.c >> b/arch/riscv/kernel/traps_misaligned.c >> index db31966a834e..a67a6e709a06 100644 >> --- a/arch/riscv/kernel/traps_misaligned.c >> +++ b/arch/riscv/kernel/traps_misaligned.c >> @@ -716,10 +716,10 @@ static int >> cpu_online_check_unaligned_access_emulated(unsigned int cpu) >> } >> #endif >> >> -#ifdef CONFIG_RISCV_SBI >> - >> static bool misaligned_traps_delegated; >> >> +#ifdef CONFIG_RISCV_SBI >> + >> static int cpu_online_sbi_unaligned_setup(unsigned int cpu) >> { >> if (sbi_fwft_set(SBI_FWFT_MISALIGNED_EXC_DELEG, 1, 0) && >> @@ -761,6 +761,7 @@ static int cpu_online_sbi_unaligned_setup(unsigned int >> cpu __always_unused) >> { >> return 0; >> } >> + >> #endif >> >> int cpu_online_unaligned_access_init(unsigned int cpu) >> @@ -773,3 +774,15 @@ int cpu_online_unaligned_access_init(unsigned int cpu) >> >> return cpu_online_check_unaligned_access_emulated(cpu); >> } >> + >> +bool misaligned_traps_can_delegate(void) >> +{ >> +/* >> + * Either we successfully requested misaligned traps delegation for all >> + * CPUS or the SBI does not implemented FWFT extension but delegated the >> + * exception by default. >> + */ >> +return misaligned_traps_delegated || >> + all_cpus_unaligned_scalar_access_emulated(); >> +} >> +EXPORT_SYMBOL_GPL(misaligned_traps_can_delegate); >> \ No newline at end of file > > Check your editor settings. I just enabled EditorConfig as well as clang-format so hopefully that will be ok in the next series. Thanks, Clément > >> -- >> 2.47.2 > > Reviewed-by: Andrew Jones
Re: [PATCH v3 03/17] riscv: sbi: add SBI FWFT extension calls
On 13/03/2025 13:44, Andrew Jones wrote: > On Mon, Mar 10, 2025 at 04:12:10PM +0100, Clément Léger wrote: >> Add FWFT extension calls. This will be ratified in SBI V3.0 hence, it is >> provided as a separate commit that can be left out if needed. >> >> Signed-off-by: Clément Léger >> --- >> arch/riscv/kernel/sbi.c | 30 -- >> 1 file changed, 28 insertions(+), 2 deletions(-) >> >> diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c >> index 256910db1307..af8e2199e32d 100644 >> --- a/arch/riscv/kernel/sbi.c >> +++ b/arch/riscv/kernel/sbi.c >> @@ -299,9 +299,19 @@ static int __sbi_rfence_v02(int fid, const struct >> cpumask *cpu_mask, >> return 0; >> } >> >> +static bool sbi_fwft_supported; >> + >> int sbi_fwft_get(u32 feature, unsigned long *value) >> { >> -return -EOPNOTSUPP; >> +struct sbiret ret; >> + >> +if (!sbi_fwft_supported) >> +return -EOPNOTSUPP; >> + >> +ret = sbi_ecall(SBI_EXT_FWFT, SBI_EXT_FWFT_GET, >> +feature, 0, 0, 0, 0, 0); >> + >> +return sbi_err_map_linux_errno(ret.error); >> } >> >> /** >> @@ -314,7 +324,15 @@ int sbi_fwft_get(u32 feature, unsigned long *value) >> */ >> int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags) >> { >> -return -EOPNOTSUPP; >> +struct sbiret ret; >> + >> +if (!sbi_fwft_supported) >> +return -EOPNOTSUPP; >> + >> +ret = sbi_ecall(SBI_EXT_FWFT, SBI_EXT_FWFT_SET, >> +feature, value, flags, 0, 0, 0); >> + >> +return sbi_err_map_linux_errno(ret.error); > > sbi_err_map_linux_errno() doesn't know about SBI_ERR_DENIED_LOCKED. Not only it doesn't knows about DENIED_LOCKED but also another bunch of errors. I'll add them in a separate commit. > >> } >> >> struct fwft_set_req { >> @@ -389,6 +407,9 @@ static int sbi_fwft_feature_local_set(u32 feature, >> unsigned long value, >> int sbi_fwft_all_cpus_set(u32 feature, unsigned long value, unsigned long >> flags, >>bool revert_on_fail) >> { >> +if (!sbi_fwft_supported) >> +return -EOPNOTSUPP; >> + >> if (feature & SBI_FWFT_GLOBAL_FEATURE_BIT) >> return sbi_fwft_set(feature, value, flags); >> >> @@ -719,6 +740,11 @@ void __init sbi_init(void) >> pr_info("SBI DBCN extension detected\n"); >> sbi_debug_console_available = true; >> } >> +if ((sbi_spec_version >= sbi_mk_version(2, 0)) && > > Should check sbi_mk_version(3, 0) Oh yes that was for testing purpose and I incorrectly squashed it. > >> +(sbi_probe_extension(SBI_EXT_FWFT) > 0)) { >> +pr_info("SBI FWFT extension detected\n"); >> +sbi_fwft_supported = true; >> +} >> } else { >> __sbi_set_timer = __sbi_set_timer_v01; >> __sbi_send_ipi = __sbi_send_ipi_v01; >> -- >> 2.47.2 >> > Thanks, Clément > Thanks, > drew
Re: [PATCH v3 02/17] riscv: sbi: add FWFT extension interface
On 14/03/2025 13:02, Andrew Jones wrote: > On Fri, Mar 14, 2025 at 12:33:55PM +0100, Clément Léger wrote: >> >> >> On 13/03/2025 13:39, Andrew Jones wrote: >>> On Mon, Mar 10, 2025 at 04:12:09PM +0100, Clément Léger wrote: This SBI extensions enables supervisor mode to control feature that are under M-mode control (For instance, Svadu menvcfg ADUE bit, Ssdbltrp DTE, etc). Signed-off-by: Clément Léger --- arch/riscv/include/asm/sbi.h | 5 ++ arch/riscv/kernel/sbi.c | 97 2 files changed, 102 insertions(+) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index bb077d0c912f..fc87c609c11a 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -503,6 +503,11 @@ int sbi_remote_hfence_vvma_asid(const struct cpumask *cpu_mask, unsigned long asid); long sbi_probe_extension(int ext); +int sbi_fwft_all_cpus_set(u32 feature, unsigned long value, unsigned long flags, +bool revert_on_failure); +int sbi_fwft_get(u32 feature, unsigned long *value); +int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags); + /* Check if current SBI specification version is 0.1 or not */ static inline int sbi_spec_is_0_1(void) { diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index 1989b8cade1b..256910db1307 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -299,6 +299,103 @@ static int __sbi_rfence_v02(int fid, const struct cpumask *cpu_mask, return 0; } +int sbi_fwft_get(u32 feature, unsigned long *value) +{ + return -EOPNOTSUPP; +} + +/** + * sbi_fwft_set() - Set a feature on all online cpus >>> >>> copy+paste of description from sbi_fwft_all_cpus_set(). This function >>> only sets the feature on the calling hart. >>> + * @feature: The feature to be set + * @value: The feature value to be set + * @flags: FWFT feature set flags + * + * Return: 0 on success, appropriate linux error code otherwise. + */ +int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags) +{ + return -EOPNOTSUPP; +} + +struct fwft_set_req { + u32 feature; + unsigned long value; + unsigned long flags; + cpumask_t mask; +}; + +static void cpu_sbi_fwft_set(void *arg) +{ + struct fwft_set_req *req = arg; + + if (sbi_fwft_set(req->feature, req->value, req->flags)) + cpumask_clear_cpu(smp_processor_id(), &req->mask); +} + +static int sbi_fwft_feature_local_set(u32 feature, unsigned long value, +unsigned long flags, +bool revert_on_fail) +{ + int ret; + unsigned long prev_value; + cpumask_t tmp; + struct fwft_set_req req = { + .feature = feature, + .value = value, + .flags = flags, + }; + + cpumask_copy(&req.mask, cpu_online_mask); + + /* We can not revert if features are locked */ + if (revert_on_fail && flags & SBI_FWFT_SET_FLAG_LOCK) >>> >>> Should use () around the flags &. I thought checkpatch complained about >>> that? >>> + return -EINVAL; + + /* Reset value is the same for all cpus, read it once. */ >>> >>> How do we know we're reading the reset value? sbi_fwft_all_cpus_set() may >>> be called multiple times on the same feature. And harts may have had >>> sbi_fwft_set() called on them independently. I think we should drop the >>> whole prev_value optimization. >> >> That's actually used for revert_on_failure as well not only the >> optimization. > > At least the comment should drop the word 'Reset' and if there's a chance > that not all harts having the same value then we should call get on all > of them. (We'll probably want SBI FWFT functions which operate on > hartmasks eventually.) Ok, then I can pass a cpu_mask as well so that caller just have to pass online_cpus() if they want it on all cpus. > >> >>> + ret = sbi_fwft_get(feature, &prev_value); + if (ret) + return ret; + + /* Feature might already be set to the value we want */ + if (prev_value == value) + return 0; + + on_each_cpu_mask(&req.mask, cpu_sbi_fwft_set, &req, 1); + if (cpumask_equal(&req.mask, cpu_online_mask)) + return 0; + + pr_err("Failed to set feature %x for all online cpus, reverting\n", + feature); >>> >>> nit: I'd let the above line stick out. We have 100 chars. >>> + + req.value = prev_value; + cpumask_copy(&tmp, &req.mask); + on_each_cpu_mask(&req.mask, cpu_sbi_fwft_se
[PATCH v12 02/28] dt-bindings: riscv: zicfilp and zicfiss in dt-bindings (extensions.yaml)
Make an entry for cfi extensions in extensions.yaml. Signed-off-by: Deepak Gupta Acked-by: Rob Herring (Arm) --- Documentation/devicetree/bindings/riscv/extensions.yaml | 14 ++ 1 file changed, 14 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index a63b994e0763..9b9024dbc8d2 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -426,6 +426,20 @@ properties: The standard Zicboz extension for cache-block zeroing as ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs. +- const: zicfilp + description: | +The standard Zicfilp extension for enforcing forward edge +control-flow integrity as ratified in commit 3f8e450 ("merge +pull request #227 from ved-rivos/0709") of riscv-cfi +github repo. + +- const: zicfiss + description: | +The standard Zicfiss extension for enforcing backward edge +control-flow integrity as ratified in commit 3f8e450 ("merge +pull request #227 from ved-rivos/0709") of riscv-cfi +github repo. + - const: zicntr description: The standard Zicntr extension for base counters and timers, as -- 2.34.1
[PATCH v12 11/28] riscv/shstk: If needed allocate a new shadow stack on clone
Userspace specifies CLONE_VM to share address space and spawn new thread. `clone` allow userspace to specify a new stack for new thread. However there is no way to specify new shadow stack base address without changing API. This patch allocates a new shadow stack whenever CLONE_VM is given. In case of CLONE_VFORK, parent is suspended until child finishes and thus can child use parent shadow stack. In case of !CLONE_VM, COW kicks in because entire address space is copied from parent to child. `clone3` is extensible and can provide mechanisms using which shadow stack as an input parameter can be provided. This is not settled yet and being extensively discussed on mailing list. Once that's settled, this commit will adapt to that. Reviewed-by: Zong Li Signed-off-by: Deepak Gupta --- arch/riscv/include/asm/mmu_context.h | 7 ++ arch/riscv/include/asm/usercfi.h | 25 arch/riscv/kernel/process.c | 9 +++ arch/riscv/kernel/usercfi.c | 120 +++ 4 files changed, 161 insertions(+) diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h index 8c4bc49a3a0f..dbf27a78df6c 100644 --- a/arch/riscv/include/asm/mmu_context.h +++ b/arch/riscv/include/asm/mmu_context.h @@ -48,6 +48,13 @@ static inline unsigned long mm_untag_mask(struct mm_struct *mm) } #endif +#define deactivate_mm deactivate_mm +static inline void deactivate_mm(struct task_struct *tsk, +struct mm_struct *mm) +{ + shstk_release(tsk); +} + #include #endif /* _ASM_RISCV_MMU_CONTEXT_H */ diff --git a/arch/riscv/include/asm/usercfi.h b/arch/riscv/include/asm/usercfi.h index 5f2027c51917..82d28ac98d76 100644 --- a/arch/riscv/include/asm/usercfi.h +++ b/arch/riscv/include/asm/usercfi.h @@ -8,6 +8,9 @@ #ifndef __ASSEMBLY__ #include +struct task_struct; +struct kernel_clone_args; + #ifdef CONFIG_RISCV_USER_CFI struct cfi_status { unsigned long ubcfi_en : 1; /* Enable for backward cfi. */ @@ -17,6 +20,28 @@ struct cfi_status { unsigned long shdw_stk_size; /* size of shadow stack */ }; +unsigned long shstk_alloc_thread_stack(struct task_struct *tsk, + const struct kernel_clone_args *args); +void shstk_release(struct task_struct *tsk); +void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size); +unsigned long get_shstk_base(struct task_struct *task, unsigned long *size); +void set_active_shstk(struct task_struct *task, unsigned long shstk_addr); +bool is_shstk_enabled(struct task_struct *task); + +#else + +#define shstk_alloc_thread_stack(tsk, args) 0 + +#define shstk_release(tsk) + +#define get_shstk_base(task, size) 0UL + +#define set_shstk_base(task, shstk_addr, size) + +#define set_active_shstk(task, shstk_addr) + +#define is_shstk_enabled(task) false + #endif /* CONFIG_RISCV_USER_CFI */ #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 7c244de77180..99acb6342a37 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -29,6 +29,7 @@ #include #include #include +#include #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include @@ -211,6 +212,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) unsigned long clone_flags = args->flags; unsigned long usp = args->stack; unsigned long tls = args->tls; + unsigned long ssp = 0; struct pt_regs *childregs = task_pt_regs(p); /* Ensure all threads in this mm have the same pointer masking mode. */ @@ -229,11 +231,18 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.s[0] = (unsigned long)args->fn; p->thread.s[1] = (unsigned long)args->fn_arg; } else { + /* allocate new shadow stack if needed. In case of CLONE_VM we have to */ + ssp = shstk_alloc_thread_stack(p, args); + if (IS_ERR_VALUE(ssp)) + return PTR_ERR((void *)ssp); + *childregs = *(current_pt_regs()); /* Turn off status.VS */ riscv_v_vstate_off(childregs); if (usp) /* User fork */ childregs->sp = usp; + /* if needed, set new ssp */ + ssp ? set_active_shstk(p, ssp) : 0; if (clone_flags & CLONE_SETTLS) childregs->tp = tls; childregs->a0 = 0; /* Return value of fork() */ diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c index 24022809a7b5..73cf87dab186 100644 --- a/arch/riscv/kernel/usercfi.c +++ b/arch/riscv/kernel/usercfi.c @@ -19,6 +19,41 @@ #define SHSTK_ENTRY_SIZE sizeof(void *) +bool is_shstk_enabled(struct task_struct *task) +{ + return task->thread_info.user_cfi_state.ubcfi_en ?
[PATCH v12 10/28] riscv/mm: Implement map_shadow_stack() syscall
As discussed extensively in the changelog for the addition of this syscall on x86 ("x86/shstk: Introduce map_shadow_stack syscall") the existing mmap() and madvise() syscalls do not map entirely well onto the security requirements for shadow stack memory since they lead to windows where memory is allocated but not yet protected or stacks which are not properly and safely initialised. Instead a new syscall map_shadow_stack() has been defined which allocates and initialises a shadow stack page. This patch implements this syscall for riscv. riscv doesn't require token to be setup by kernel because user mode can do that by itself. However to provide compatibility and portability with other architectues, user mode can specify token set flag. Reviewed-by: Zong Li Signed-off-by: Deepak Gupta --- arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/usercfi.c | 144 2 files changed, 145 insertions(+) diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 8d186bfced45..3a861d320654 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -125,3 +125,4 @@ obj-$(CONFIG_ACPI) += acpi.o obj-$(CONFIG_ACPI_NUMA)+= acpi_numa.o obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += bugs.o +obj-$(CONFIG_RISCV_USER_CFI) += usercfi.o diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c new file mode 100644 index ..24022809a7b5 --- /dev/null +++ b/arch/riscv/kernel/usercfi.c @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Rivos, Inc. + * Deepak Gupta + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SHSTK_ENTRY_SIZE sizeof(void *) + +/* + * Writes on shadow stack can either be `sspush` or `ssamoswap`. `sspush` can happen + * implicitly on current shadow stack pointed to by CSR_SSP. `ssamoswap` takes pointer to + * shadow stack. To keep it simple, we plan to use `ssamoswap` to perform writes on shadow + * stack. + */ +static noinline unsigned long amo_user_shstk(unsigned long *addr, unsigned long val) +{ + /* +* Never expect -1 on shadow stack. Expect return addresses and zero +*/ + unsigned long swap = -1; + + __enable_user_access(); + asm goto( + ".option push\n" + ".option arch, +zicfiss\n" + "1: ssamoswap.d %[swap], %[val], %[addr]\n" + _ASM_EXTABLE(1b, %l[fault]) + RISCV_ACQUIRE_BARRIER + ".option pop\n" + : [swap] "=r" (swap), [addr] "+A" (*addr) + : [val] "r" (val) + : "memory" + : fault + ); + __disable_user_access(); + return swap; +fault: + __disable_user_access(); + return -1; +} + +/* + * Create a restore token on the shadow stack. A token is always XLEN wide + * and aligned to XLEN. + */ +static int create_rstor_token(unsigned long ssp, unsigned long *token_addr) +{ + unsigned long addr; + + /* Token must be aligned */ + if (!IS_ALIGNED(ssp, SHSTK_ENTRY_SIZE)) + return -EINVAL; + + /* On RISC-V we're constructing token to be function of address itself */ + addr = ssp - SHSTK_ENTRY_SIZE; + + if (amo_user_shstk((unsigned long __user *)addr, (unsigned long)ssp) == -1) + return -EFAULT; + + if (token_addr) + *token_addr = addr; + + return 0; +} + +static unsigned long allocate_shadow_stack(unsigned long addr, unsigned long size, + unsigned long token_offset, bool set_tok) +{ + int flags = MAP_ANONYMOUS | MAP_PRIVATE; + struct mm_struct *mm = current->mm; + unsigned long populate, tok_loc = 0; + + if (addr) + flags |= MAP_FIXED_NOREPLACE; + + mmap_write_lock(mm); + addr = do_mmap(NULL, addr, size, PROT_READ, flags, + VM_SHADOW_STACK | VM_WRITE, 0, &populate, NULL); + mmap_write_unlock(mm); + + if (!set_tok || IS_ERR_VALUE(addr)) + goto out; + + if (create_rstor_token(addr + token_offset, &tok_loc)) { + vm_munmap(addr, size); + return -EINVAL; + } + + addr = tok_loc; + +out: + return addr; +} + +SYSCALL_DEFINE3(map_shadow_stack, unsigned long, addr, unsigned long, size, unsigned int, flags) +{ + bool set_tok = flags & SHADOW_STACK_SET_TOKEN; + unsigned long aligned_size = 0; + + if (!cpu_supports_shadow_stack()) + return -EOPNOTSUPP; + + /* Anything other than set token should result in invalid param */ + if (flags & ~SHADOW_STACK_SET_TOKEN) + return -EINVAL; + + /* +* Unlike other architectures, on RISC-V, SSP pointer is held in CSR_SSP and is available +* CSR in all modes. CSR ac
[PATCH v12 08/28] riscv mmu: teach pte_mkwrite to manufacture shadow stack PTEs
pte_mkwrite creates PTEs with WRITE encodings for underlying arch. Underlying arch can have two types of writeable mappings. One that can be written using regular store instructions. Another one that can only be written using specialized store instructions (like shadow stack stores). pte_mkwrite can select write PTE encoding based on VMA range (i.e. VM_SHADOW_STACK) Reviewed-by: Alexandre Ghiti Reviewed-by: Zong Li Signed-off-by: Deepak Gupta --- arch/riscv/include/asm/pgtable.h | 7 +++ arch/riscv/mm/pgtable.c | 17 + 2 files changed, 24 insertions(+) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index ede43185ffdf..ccd2fa34afb8 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -416,6 +416,10 @@ static inline pte_t pte_wrprotect(pte_t pte) /* static inline pte_t pte_mkread(pte_t pte) */ +struct vm_area_struct; +pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma); +#define pte_mkwrite pte_mkwrite + static inline pte_t pte_mkwrite_novma(pte_t pte) { return __pte(pte_val(pte) | _PAGE_WRITE); @@ -749,6 +753,9 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd) return pte_pmd(pte_mkyoung(pmd_pte(pmd))); } +pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); +#define pmd_mkwrite pmd_mkwrite + static inline pmd_t pmd_mkwrite_novma(pmd_t pmd) { return pte_pmd(pte_mkwrite_novma(pmd_pte(pmd))); diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c index 4ae67324f992..be5d38546bb3 100644 --- a/arch/riscv/mm/pgtable.c +++ b/arch/riscv/mm/pgtable.c @@ -155,3 +155,20 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, return pmd; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +pte_t pte_mkwrite(pte_t pte, struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_SHADOW_STACK) + return pte_mkwrite_shstk(pte); + + return pte_mkwrite_novma(pte); +} + +pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_SHADOW_STACK) + return pmd_mkwrite_shstk(pmd); + + return pmd_mkwrite_novma(pmd); +} + -- 2.34.1
[PATCH v12 16/28] riscv: signal: abstract header saving for setup_sigcontext
From: Andy Chiu The function save_v_state() served two purposes. First, it saved extension context into the signal stack. Then, it constructed the extension header if there was no fault. The second part is independent of the extension itself. As a result, we can pull that part out, so future extensions may reuse it. This patch adds arch_ext_list and makes setup_sigcontext() go through all possible extensions' save() callback. The callback returns a positive value indicating the size of the successfully saved extension. Then the kernel proceeds to construct the header for that extension. The kernel skips an extension if it does not exist, or if the saving fails for some reasons. The error code is propagated out on the later case. This patch does not introduce any functional changes. Signed-off-by: Andy Chiu --- arch/riscv/include/asm/vector.h | 3 ++ arch/riscv/kernel/signal.c | 62 +++-- 2 files changed, 44 insertions(+), 21 deletions(-) diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index e8a83f55be2b..05390538ea8a 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -407,6 +407,9 @@ static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; } #define riscv_v_thread_free(tsk) do {} while (0) #define riscv_v_setup_ctx_cache() do {} while (0) #define riscv_v_thread_alloc(tsk) do {} while (0) +#define get_cpu_vector_context() do {} while (0) +#define put_cpu_vector_context() do {} while (0) +#define riscv_v_vstate_set_restore(task, regs) do {} while (0) #endif /* CONFIG_RISCV_ISA_V */ diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 94e905eea1de..80c70dccf09f 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -68,18 +68,19 @@ static long save_fp_state(struct pt_regs *regs, #define restore_fp_state(task, regs) (0) #endif -#ifdef CONFIG_RISCV_ISA_V - -static long save_v_state(struct pt_regs *regs, void __user **sc_vec) +static long save_v_state(struct pt_regs *regs, void __user *sc_vec) { - struct __riscv_ctx_hdr __user *hdr; struct __sc_riscv_v_state __user *state; void __user *datap; long err; - hdr = *sc_vec; - /* Place state to the user's signal context space after the hdr */ - state = (struct __sc_riscv_v_state __user *)(hdr + 1); + if (!IS_ENABLED(CONFIG_RISCV_ISA_V) || + !((has_vector() || has_xtheadvector()) && + riscv_v_vstate_query(regs))) + return 0; + + /* Place state to the user's signal context spac */ + state = (struct __sc_riscv_v_state __user *)sc_vec; /* Point datap right after the end of __sc_riscv_v_state */ datap = state + 1; @@ -97,15 +98,11 @@ static long save_v_state(struct pt_regs *regs, void __user **sc_vec) err |= __put_user((__force void *)datap, &state->v_state.datap); /* Copy the whole vector content to user space datap. */ err |= __copy_to_user(datap, current->thread.vstate.datap, riscv_v_vsize); - /* Copy magic to the user space after saving all vector conetext */ - err |= __put_user(RISCV_V_MAGIC, &hdr->magic); - err |= __put_user(riscv_v_sc_size, &hdr->size); if (unlikely(err)) - return err; + return -EFAULT; - /* Only progress the sv_vec if everything has done successfully */ - *sc_vec += riscv_v_sc_size; - return 0; + /* Only return the size if everything has done successfully */ + return riscv_v_sc_size; } /* @@ -142,10 +139,20 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) */ return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); } -#else -#define save_v_state(task, regs) (0) -#define __restore_v_state(task, regs) (0) -#endif + +struct arch_ext_priv { + __u32 magic; + long (*save)(struct pt_regs *regs, void __user *sc_vec); +}; + +struct arch_ext_priv arch_ext_list[] = { + { + .magic = RISCV_V_MAGIC, + .save = &save_v_state, + }, +}; + +const size_t nr_arch_exts = ARRAY_SIZE(arch_ext_list); static long restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) @@ -276,7 +283,8 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, { struct sigcontext __user *sc = &frame->uc.uc_mcontext; struct __riscv_ctx_hdr __user *sc_ext_ptr = &sc->sc_extdesc.hdr; - long err; + struct arch_ext_priv *arch_ext; + long err, i, ext_size; /* sc_regs is structured the same as the start of pt_regs */ err = __copy_to_user(&sc->sc_regs, regs, sizeof(sc->sc_regs)); @@ -284,8 +292,20 @@ static long setup_sigcontext(struct rt_sigframe __user *frame, if (has_fpu()) err |= save_f
[PATCH v12 12/28] riscv: Implements arch agnostic shadow stack prctls
Implement architecture agnostic prctls() interface for setting and getting shadow stack status. prctls implemented are PR_GET_SHADOW_STACK_STATUS, PR_SET_SHADOW_STACK_STATUS and PR_LOCK_SHADOW_STACK_STATUS. As part of PR_SET_SHADOW_STACK_STATUS/PR_GET_SHADOW_STACK_STATUS, only PR_SHADOW_STACK_ENABLE is implemented because RISCV allows each mode to write to their own shadow stack using `sspush` or `ssamoswap`. PR_LOCK_SHADOW_STACK_STATUS locks current configuration of shadow stack enabling. Signed-off-by: Deepak Gupta --- arch/riscv/include/asm/usercfi.h | 18 ++- arch/riscv/kernel/process.c | 8 +++ arch/riscv/kernel/usercfi.c | 110 +++ 3 files changed, 135 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/usercfi.h b/arch/riscv/include/asm/usercfi.h index 82d28ac98d76..c4dcd256f19a 100644 --- a/arch/riscv/include/asm/usercfi.h +++ b/arch/riscv/include/asm/usercfi.h @@ -7,6 +7,7 @@ #ifndef __ASSEMBLY__ #include +#include struct task_struct; struct kernel_clone_args; @@ -14,7 +15,8 @@ struct kernel_clone_args; #ifdef CONFIG_RISCV_USER_CFI struct cfi_status { unsigned long ubcfi_en : 1; /* Enable for backward cfi. */ - unsigned long rsvd : ((sizeof(unsigned long) * 8) - 1); + unsigned long ubcfi_locked : 1; + unsigned long rsvd : ((sizeof(unsigned long) * 8) - 2); unsigned long user_shdw_stk; /* Current user shadow stack pointer */ unsigned long shdw_stk_base; /* Base address of shadow stack */ unsigned long shdw_stk_size; /* size of shadow stack */ @@ -27,6 +29,12 @@ void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned unsigned long get_shstk_base(struct task_struct *task, unsigned long *size); void set_active_shstk(struct task_struct *task, unsigned long shstk_addr); bool is_shstk_enabled(struct task_struct *task); +bool is_shstk_locked(struct task_struct *task); +bool is_shstk_allocated(struct task_struct *task); +void set_shstk_lock(struct task_struct *task); +void set_shstk_status(struct task_struct *task, bool enable); + +#define PR_SHADOW_STACK_SUPPORTED_STATUS_MASK (PR_SHADOW_STACK_ENABLE) #else @@ -42,6 +50,14 @@ bool is_shstk_enabled(struct task_struct *task); #define is_shstk_enabled(task) false +#define is_shstk_locked(task) false + +#define is_shstk_allocated(task) false + +#define set_shstk_lock(task) + +#define set_shstk_status(task, enable) + #endif /* CONFIG_RISCV_USER_CFI */ #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 99acb6342a37..cd11667593fe 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -153,6 +153,14 @@ void start_thread(struct pt_regs *regs, unsigned long pc, regs->epc = pc; regs->sp = sp; + /* +* clear shadow stack state on exec. +* libc will set it later via prctl. +*/ + set_shstk_status(current, false); + set_shstk_base(current, 0, 0); + set_active_shstk(current, 0); + #ifdef CONFIG_64BIT regs->status &= ~SR_UXL; diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c index 73cf87dab186..b93b324eed26 100644 --- a/arch/riscv/kernel/usercfi.c +++ b/arch/riscv/kernel/usercfi.c @@ -24,6 +24,16 @@ bool is_shstk_enabled(struct task_struct *task) return task->thread_info.user_cfi_state.ubcfi_en ? true : false; } +bool is_shstk_allocated(struct task_struct *task) +{ + return task->thread_info.user_cfi_state.shdw_stk_base ? true : false; +} + +bool is_shstk_locked(struct task_struct *task) +{ + return task->thread_info.user_cfi_state.ubcfi_locked ? true : false; +} + void set_shstk_base(struct task_struct *task, unsigned long shstk_addr, unsigned long size) { task->thread_info.user_cfi_state.shdw_stk_base = shstk_addr; @@ -42,6 +52,26 @@ void set_active_shstk(struct task_struct *task, unsigned long shstk_addr) task->thread_info.user_cfi_state.user_shdw_stk = shstk_addr; } +void set_shstk_status(struct task_struct *task, bool enable) +{ + if (!cpu_supports_shadow_stack()) + return; + + task->thread_info.user_cfi_state.ubcfi_en = enable ? 1 : 0; + + if (enable) + task->thread.envcfg |= ENVCFG_SSE; + else + task->thread.envcfg &= ~ENVCFG_SSE; + + csr_write(CSR_ENVCFG, task->thread.envcfg); +} + +void set_shstk_lock(struct task_struct *task) +{ + task->thread_info.user_cfi_state.ubcfi_locked = 1; +} + /* * If size is 0, then to be compatible with regular stack we want it to be as big as * regular stack. Else PAGE_ALIGN it and return back @@ -262,3 +292,83 @@ void shstk_release(struct task_struct *tsk) vm_munmap(base, size); set_shstk_base(tsk, 0, 0); } + +int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status) +{ + unsigned long bcfi_status = 0; + +
[PATCH v12 14/28] riscv: Implements arch agnostic indirect branch tracking prctls
prctls implemented are: PR_SET_INDIR_BR_LP_STATUS, PR_GET_INDIR_BR_LP_STATUS and PR_LOCK_INDIR_BR_LP_STATUS Signed-off-by: Deepak Gupta --- arch/riscv/include/asm/usercfi.h | 16 +++- arch/riscv/kernel/entry.S| 2 +- arch/riscv/kernel/process.c | 5 +++ arch/riscv/kernel/usercfi.c | 79 4 files changed, 100 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/usercfi.h b/arch/riscv/include/asm/usercfi.h index c4dcd256f19a..a8cec7c14d1d 100644 --- a/arch/riscv/include/asm/usercfi.h +++ b/arch/riscv/include/asm/usercfi.h @@ -16,7 +16,9 @@ struct kernel_clone_args; struct cfi_status { unsigned long ubcfi_en : 1; /* Enable for backward cfi. */ unsigned long ubcfi_locked : 1; - unsigned long rsvd : ((sizeof(unsigned long) * 8) - 2); + unsigned long ufcfi_en : 1; /* Enable for forward cfi. Note that ELP goes in sstatus */ + unsigned long ufcfi_locked : 1; + unsigned long rsvd : ((sizeof(unsigned long) * 8) - 4); unsigned long user_shdw_stk; /* Current user shadow stack pointer */ unsigned long shdw_stk_base; /* Base address of shadow stack */ unsigned long shdw_stk_size; /* size of shadow stack */ @@ -33,6 +35,10 @@ bool is_shstk_locked(struct task_struct *task); bool is_shstk_allocated(struct task_struct *task); void set_shstk_lock(struct task_struct *task); void set_shstk_status(struct task_struct *task, bool enable); +bool is_indir_lp_enabled(struct task_struct *task); +bool is_indir_lp_locked(struct task_struct *task); +void set_indir_lp_status(struct task_struct *task, bool enable); +void set_indir_lp_lock(struct task_struct *task); #define PR_SHADOW_STACK_SUPPORTED_STATUS_MASK (PR_SHADOW_STACK_ENABLE) @@ -58,6 +64,14 @@ void set_shstk_status(struct task_struct *task, bool enable); #define set_shstk_status(task, enable) +#define is_indir_lp_enabled(task) false + +#define is_indir_lp_locked(task) false + +#define set_indir_lp_status(task, enable) + +#define set_indir_lp_lock(task) + #endif /* CONFIG_RISCV_USER_CFI */ #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 68c99124ea55..00494b54ff4a 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -143,7 +143,7 @@ SYM_CODE_START(handle_exception) * Disable the FPU/Vector to detect illegal usage of floating point * or vector in kernel space. */ - li t0, SR_SUM | SR_FS_VS + li t0, SR_SUM | SR_FS_VS | SR_ELP REG_L s0, TASK_TI_USER_SP(tp) csrrc s1, CSR_STATUS, t0 diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index cd11667593fe..4587201dd81d 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -160,6 +160,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, set_shstk_status(current, false); set_shstk_base(current, 0, 0); set_active_shstk(current, 0); + /* +* disable indirect branch tracking on exec. +* libc will enable it later via prctl. +*/ + set_indir_lp_status(current, false); #ifdef CONFIG_64BIT regs->status &= ~SR_UXL; diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c index b93b324eed26..7937bcef9271 100644 --- a/arch/riscv/kernel/usercfi.c +++ b/arch/riscv/kernel/usercfi.c @@ -72,6 +72,35 @@ void set_shstk_lock(struct task_struct *task) task->thread_info.user_cfi_state.ubcfi_locked = 1; } +bool is_indir_lp_enabled(struct task_struct *task) +{ + return task->thread_info.user_cfi_state.ufcfi_en ? true : false; +} + +bool is_indir_lp_locked(struct task_struct *task) +{ + return task->thread_info.user_cfi_state.ufcfi_locked ? true : false; +} + +void set_indir_lp_status(struct task_struct *task, bool enable) +{ + if (!cpu_supports_indirect_br_lp_instr()) + return; + + task->thread_info.user_cfi_state.ufcfi_en = enable ? 1 : 0; + + if (enable) + task->thread.envcfg |= ENVCFG_LPE; + else + task->thread.envcfg &= ~ENVCFG_LPE; + + csr_write(CSR_ENVCFG, task->thread.envcfg); +} + +void set_indir_lp_lock(struct task_struct *task) +{ + task->thread_info.user_cfi_state.ufcfi_locked = 1; +} /* * If size is 0, then to be compatible with regular stack we want it to be as big as * regular stack. Else PAGE_ALIGN it and return back @@ -372,3 +401,53 @@ int arch_lock_shadow_stack_status(struct task_struct *task, return 0; } + +int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status) +{ + unsigned long fcfi_status = 0; + + if (!cpu_supports_indirect_br_lp_instr()) + return -EINVAL; + + /* indirect branch tracking is enabled on the task or not */ + fcfi_status |= (is_indir_lp_enabled(t) ? PR_INDIR_BR_LP_ENABLE : 0); + + return copy_to_user(status, &fcfi_st
[PATCH v12 13/28] prctl: arch-agnostic prctl for indirect branch tracking
Three architectures (x86, aarch64, riscv) have support for indirect branch tracking feature in a very similar fashion. On a very high level, indirect branch tracking is a CPU feature where CPU tracks branches which uses memory operand to perform control transfer in program. As part of this tracking on indirect branches, CPU goes in a state where it expects a landing pad instr on target and if not found then CPU raises some fault (architecture dependent) x86 landing pad instr - `ENDBRANCH` arch64 landing pad instr - `BTI` riscv landing instr - `lpad` Given that three major arches have support for indirect branch tracking, This patch makes `prctl` for indirect branch tracking arch agnostic. To allow userspace to enable this feature for itself, following prtcls are defined: - PR_GET_INDIR_BR_LP_STATUS: Gets current configured status for indirect branch tracking. - PR_SET_INDIR_BR_LP_STATUS: Sets a configuration for indirect branch tracking. Following status options are allowed - PR_INDIR_BR_LP_ENABLE: Enables indirect branch tracking on user thread. - PR_INDIR_BR_LP_DISABLE; Disables indirect branch tracking on user thread. - PR_LOCK_INDIR_BR_LP_STATUS: Locks configured status for indirect branch tracking for user thread. Signed-off-by: Deepak Gupta Reviewed-by: Mark Brown --- include/linux/cpu.h| 4 include/uapi/linux/prctl.h | 27 +++ kernel/sys.c | 30 ++ 3 files changed, 61 insertions(+) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 6a0a8f1c7c90..fb0c394430c6 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -204,4 +204,8 @@ static inline bool cpu_mitigations_auto_nosmt(void) } #endif +int arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status); +int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status); +int arch_lock_indir_br_lp_status(struct task_struct *t, unsigned long status); + #endif /* _LINUX_CPU_H_ */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 5c6080680cb2..6cd90460cbad 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -353,4 +353,31 @@ struct prctl_mm_map { */ #define PR_LOCK_SHADOW_STACK_STATUS 76 +/* + * Get the current indirect branch tracking configuration for the current + * thread, this will be the value configured via PR_SET_INDIR_BR_LP_STATUS. + */ +#define PR_GET_INDIR_BR_LP_STATUS 77 + +/* + * Set the indirect branch tracking configuration. PR_INDIR_BR_LP_ENABLE will + * enable cpu feature for user thread, to track all indirect branches and ensure + * they land on arch defined landing pad instruction. + * x86 - If enabled, an indirect branch must land on `ENDBRANCH` instruction. + * arch64 - If enabled, an indirect branch must land on `BTI` instruction. + * riscv - If enabled, an indirect branch must land on `lpad` instruction. + * PR_INDIR_BR_LP_DISABLE will disable feature for user thread and indirect + * branches will no more be tracked by cpu to land on arch defined landing pad + * instruction. + */ +#define PR_SET_INDIR_BR_LP_STATUS 78 +# define PR_INDIR_BR_LP_ENABLE(1UL << 0) + +/* + * Prevent further changes to the specified indirect branch tracking + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_INDIR_BR_LP_STATUS 79 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index cb366ff8703a..f347f3518d0b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2336,6 +2336,21 @@ int __weak arch_lock_shadow_stack_status(struct task_struct *t, unsigned long st return -EINVAL; } +int __weak arch_get_indir_br_lp_status(struct task_struct *t, unsigned long __user *status) +{ + return -EINVAL; +} + +int __weak arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) +{ + return -EINVAL; +} + +int __weak arch_lock_indir_br_lp_status(struct task_struct *t, unsigned long status) +{ + return -EINVAL; +} + #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE) #ifdef CONFIG_ANON_VMA_NAME @@ -2811,6 +2826,21 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; error = arch_lock_shadow_stack_status(me, arg2); break; + case PR_GET_INDIR_BR_LP_STATUS: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_get_indir_br_lp_status(me, (unsigned long __user *)arg2); + break; + case PR_SET_INDIR_BR_LP_STATUS: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = arch_set_indir_br_lp_status(me, arg2); + break; + case PR_LOCK_INDIR_BR_LP_STATUS: + if (arg3 || arg4 || arg5) + return -
[PATCH v12 15/28] riscv/traps: Introduce software check exception
zicfiss / zicfilp introduces a new exception to priv isa `software check exception` with cause code = 18. This patch implements software check exception. Additionally it implements a cfi violation handler which checks for code in xtval. If xtval=2, it means that sw check exception happened because of an indirect branch not landing on 4 byte aligned PC or not landing on `lpad` instruction or label value embedded in `lpad` not matching label value setup in `x7`. If xtval=3, it means that sw check exception happened because of mismatch between link register (x1 or x5) and top of shadow stack (on execution of `sspopchk`). In case of cfi violation, SIGSEGV is raised with code=SEGV_CPERR. SEGV_CPERR was introduced by x86 shadow stack patches. Reviewed-by: Zong Li Signed-off-by: Deepak Gupta --- arch/riscv/include/asm/asm-prototypes.h | 1 + arch/riscv/include/asm/entry-common.h | 2 ++ arch/riscv/kernel/entry.S | 3 +++ arch/riscv/kernel/traps.c | 43 + 4 files changed, 49 insertions(+) diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index cd627ec289f1..5a27cefd7805 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -51,6 +51,7 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_u); DECLARE_DO_ERROR_INFO(do_trap_ecall_s); DECLARE_DO_ERROR_INFO(do_trap_ecall_m); DECLARE_DO_ERROR_INFO(do_trap_break); +DECLARE_DO_ERROR_INFO(do_trap_software_check); asmlinkage void handle_bad_stack(struct pt_regs *regs); asmlinkage void do_page_fault(struct pt_regs *regs); diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h index b28ccc6cdeea..34ed149af5d1 100644 --- a/arch/riscv/include/asm/entry-common.h +++ b/arch/riscv/include/asm/entry-common.h @@ -40,4 +40,6 @@ static inline int handle_misaligned_store(struct pt_regs *regs) } #endif +bool handle_user_cfi_violation(struct pt_regs *regs); + #endif /* _ASM_RISCV_ENTRY_COMMON_H */ diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 00494b54ff4a..9c00cac3f6f2 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -472,6 +472,9 @@ SYM_DATA_START_LOCAL(excp_vect_table) RISCV_PTR do_page_fault /* load page fault */ RISCV_PTR do_trap_unknown RISCV_PTR do_page_fault /* store page fault */ + RISCV_PTR do_trap_unknown /* cause=16 */ + RISCV_PTR do_trap_unknown /* cause=17 */ + RISCV_PTR do_trap_software_check /* cause=18 is sw check exception */ SYM_DATA_END_LABEL(excp_vect_table, SYM_L_LOCAL, excp_vect_table_end) #ifndef CONFIG_MMU diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 8ff8e8b36524..3f7709f4595a 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -354,6 +354,49 @@ void do_trap_ecall_u(struct pt_regs *regs) } +#define CFI_TVAL_FCFI_CODE 2 +#define CFI_TVAL_BCFI_CODE 3 +/* handle cfi violations */ +bool handle_user_cfi_violation(struct pt_regs *regs) +{ + bool ret = false; + unsigned long tval = csr_read(CSR_TVAL); + + if ((tval == CFI_TVAL_FCFI_CODE && cpu_supports_indirect_br_lp_instr()) || + (tval == CFI_TVAL_BCFI_CODE && cpu_supports_shadow_stack())) { + do_trap_error(regs, SIGSEGV, SEGV_CPERR, regs->epc, + "Oops - control flow violation"); + ret = true; + } + + return ret; +} + +/* + * software check exception is defined with risc-v cfi spec. Software check + * exception is raised when:- + * a) An indirect branch doesn't land on 4 byte aligned PC or `lpad` + *instruction or `label` value programmed in `lpad` instr doesn't + *match with value setup in `x7`. reported code in `xtval` is 2. + * b) `sspopchk` instruction finds a mismatch between top of shadow stack (ssp) + *and x1/x5. reported code in `xtval` is 3. + */ +asmlinkage __visible __trap_section void do_trap_software_check(struct pt_regs *regs) +{ + if (user_mode(regs)) { + irqentry_enter_from_user_mode(regs); + + /* not a cfi violation, then merge into flow of unknown trap handler */ + if (!handle_user_cfi_violation(regs)) + do_trap_unknown(regs); + + irqentry_exit_to_user_mode(regs); + } else { + /* sw check exception coming from kernel is a bug in kernel */ + die(regs, "Kernel BUG"); + } +} + #ifdef CONFIG_MMU asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs) { -- 2.34.1
[PATCH v12 17/28] riscv/signal: save and restore of shadow stack for signal
Save shadow stack pointer in sigcontext structure while delivering signal. Restore shadow stack pointer from sigcontext on sigreturn. As part of save operation, kernel uses `ssamoswap` to save snapshot of current shadow stack on shadow stack itself (can be called as a save token). During restore on sigreturn, kernel retrieves token from top of shadow stack and validates it. This allows that user mode can't arbitrary pivot to any shadow stack address without having a token and thus provide strong security assurance between signaly delivery and sigreturn window. Use ABI compatible way of saving/restoring shadow stack pointer into signal stack. This follows what Vector extension, where extra registers are placed in a form of extension header + extension body in the stack. The extension header indicates the size of the extra architectural states plus the size of header itself, and a magic identifier of the extension. Then, the extensions body contains the new architectural states in the form defined by uapi. Signed-off-by: Andy Chiu Signed-off-by: Deepak Gupta --- arch/riscv/include/asm/usercfi.h | 10 arch/riscv/include/uapi/asm/ptrace.h | 4 ++ arch/riscv/include/uapi/asm/sigcontext.h | 1 + arch/riscv/kernel/signal.c | 80 arch/riscv/kernel/usercfi.c | 56 ++ 5 files changed, 151 insertions(+) diff --git a/arch/riscv/include/asm/usercfi.h b/arch/riscv/include/asm/usercfi.h index a8cec7c14d1d..361f59edbdef 100644 --- a/arch/riscv/include/asm/usercfi.h +++ b/arch/riscv/include/asm/usercfi.h @@ -8,6 +8,7 @@ #ifndef __ASSEMBLY__ #include #include +#include struct task_struct; struct kernel_clone_args; @@ -35,6 +36,9 @@ bool is_shstk_locked(struct task_struct *task); bool is_shstk_allocated(struct task_struct *task); void set_shstk_lock(struct task_struct *task); void set_shstk_status(struct task_struct *task, bool enable); +unsigned long get_active_shstk(struct task_struct *task); +int restore_user_shstk(struct task_struct *tsk, unsigned long shstk_ptr); +int save_user_shstk(struct task_struct *tsk, unsigned long *saved_shstk_ptr); bool is_indir_lp_enabled(struct task_struct *task); bool is_indir_lp_locked(struct task_struct *task); void set_indir_lp_status(struct task_struct *task, bool enable); @@ -72,6 +76,12 @@ void set_indir_lp_lock(struct task_struct *task); #define set_indir_lp_lock(task) +#define restore_user_shstk(tsk, shstk_ptr) -EINVAL + +#define save_user_shstk(tsk, saved_shstk_ptr) -EINVAL + +#define get_active_shstk(task) 0UL + #endif /* CONFIG_RISCV_USER_CFI */ #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h index a38268b19c3d..659ea3af5680 100644 --- a/arch/riscv/include/uapi/asm/ptrace.h +++ b/arch/riscv/include/uapi/asm/ptrace.h @@ -127,6 +127,10 @@ struct __riscv_v_regset_state { */ #define RISCV_MAX_VLENB (8192) +struct __sc_riscv_cfi_state { + unsigned long ss_ptr; /* shadow stack pointer */ +}; + #endif /* __ASSEMBLY__ */ #endif /* _UAPI_ASM_RISCV_PTRACE_H */ diff --git a/arch/riscv/include/uapi/asm/sigcontext.h b/arch/riscv/include/uapi/asm/sigcontext.h index cd4f175dc837..f37e4beffe03 100644 --- a/arch/riscv/include/uapi/asm/sigcontext.h +++ b/arch/riscv/include/uapi/asm/sigcontext.h @@ -10,6 +10,7 @@ /* The Magic number for signal context frame header. */ #define RISCV_V_MAGIC 0x53465457 +#define RISCV_ZICFISS_MAGIC0x9487 #define END_MAGIC 0x0 /* The size of END signal context header. */ diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 80c70dccf09f..a7472a6fcdca 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -22,11 +22,13 @@ #include #include #include +#include unsigned long signal_minsigstksz __ro_after_init; extern u32 __user_rt_sigreturn[2]; static size_t riscv_v_sc_size __ro_after_init; +static size_t riscv_zicfiss_sc_size __ro_after_init; #define DEBUG_SIG 0 @@ -140,6 +142,62 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); } +static long save_cfiss_state(struct pt_regs *regs, void __user *sc_cfi) +{ + struct __sc_riscv_cfi_state __user *state = sc_cfi; + unsigned long ss_ptr = 0; + long err = 0; + + if (!IS_ENABLED(CONFIG_RISCV_USER_CFI) || !is_shstk_enabled(current)) + return 0; + + /* +* Save a pointer to shadow stack itself on shadow stack as a form of token. +* A token on shadow gives following properties +* - Safe save and restore for shadow stack switching. Any save of shadow stack +* must have had saved a token on shadow stack. Similarly any restore of shadow +* stack must check the token before restore. Since writing to shadow stack with +* address
[PATCH v12 21/28] riscv: Add Firmware Feature SBI extensions definitions
From: Clément Léger Add necessary SBI definitions to use the FWFT extension. Signed-off-by: Clément Léger Reviewed-by: Zong Li --- arch/riscv/include/asm/sbi.h | 26 ++ 1 file changed, 26 insertions(+) diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 3d250824178b..23bfb254e3f4 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -35,6 +35,7 @@ enum sbi_ext_id { SBI_EXT_DBCN = 0x4442434E, SBI_EXT_STA = 0x535441, SBI_EXT_NACL = 0x4E41434C, + SBI_EXT_FWFT = 0x46574654, /* Experimentals extensions must lie within this range */ SBI_EXT_EXPERIMENTAL_START = 0x0800, @@ -401,6 +402,31 @@ enum sbi_ext_nacl_feature { #define SBI_NACL_SHMEM_SRET_X(__i) ((__riscv_xlen / 8) * (__i)) #define SBI_NACL_SHMEM_SRET_X_LAST 31 +/* SBI function IDs for FW feature extension */ +#define SBI_EXT_FWFT_SET 0x0 +#define SBI_EXT_FWFT_GET 0x1 + +enum sbi_fwft_feature_t { + SBI_FWFT_MISALIGNED_EXC_DELEG = 0x0, + SBI_FWFT_LANDING_PAD= 0x1, + SBI_FWFT_SHADOW_STACK = 0x2, + SBI_FWFT_DOUBLE_TRAP= 0x3, + SBI_FWFT_PTE_AD_HW_UPDATING = 0x4, + SBI_FWFT_LOCAL_RESERVED_START = 0x5, + SBI_FWFT_LOCAL_RESERVED_END = 0x3fff, + SBI_FWFT_LOCAL_PLATFORM_START = 0x4000, + SBI_FWFT_LOCAL_PLATFORM_END = 0x7fff, + + SBI_FWFT_GLOBAL_RESERVED_START = 0x8000, + SBI_FWFT_GLOBAL_RESERVED_END= 0xbfff, + SBI_FWFT_GLOBAL_PLATFORM_START = 0xc000, + SBI_FWFT_GLOBAL_PLATFORM_END= 0x, +}; + +#define SBI_FWFT_GLOBAL_FEATURE_BIT(1 << 31) +#define SBI_FWFT_PLATFORM_FEATURE_BIT (1 << 30) + +#define SBI_FWFT_SET_FLAG_LOCK (1 << 0) /* SBI spec version fields */ #define SBI_SPEC_VERSION_DEFAULT 0x1 -- 2.34.1
[PATCH v12 26/28] riscv: Documentation for landing pad / indirect branch tracking
Adding documentation on landing pad aka indirect branch tracking on riscv and kernel interfaces exposed so that user tasks can enable it. Reviewed-by: Zong Li Signed-off-by: Deepak Gupta --- Documentation/arch/riscv/index.rst | 1 + Documentation/arch/riscv/zicfilp.rst | 115 +++ 2 files changed, 116 insertions(+) diff --git a/Documentation/arch/riscv/index.rst b/Documentation/arch/riscv/index.rst index eecf347ce849..be7237b69682 100644 --- a/Documentation/arch/riscv/index.rst +++ b/Documentation/arch/riscv/index.rst @@ -14,6 +14,7 @@ RISC-V architecture uabi vector cmodx +zicfilp features diff --git a/Documentation/arch/riscv/zicfilp.rst b/Documentation/arch/riscv/zicfilp.rst new file mode 100644 index ..a188d78fcde6 --- /dev/null +++ b/Documentation/arch/riscv/zicfilp.rst @@ -0,0 +1,115 @@ +.. SPDX-License-Identifier: GPL-2.0 + +:Author: Deepak Gupta +:Date: 12 January 2024 + + +Tracking indirect control transfers on RISC-V Linux + + +This document briefly describes the interface provided to userspace by Linux +to enable indirect branch tracking for user mode applications on RISV-V + +1. Feature Overview + + +Memory corruption issues usually result in to crashes, however when in hands of +an adversary and if used creatively can result into variety security issues. + +One of those security issues can be code re-use attacks on program where adversary +can use corrupt function pointers and chain them together to perform jump oriented +programming (JOP) or call oriented programming (COP) and thus compromising control +flow integrity (CFI) of the program. + +Function pointers live in read-write memory and thus are susceptible to corruption +and allows an adversary to reach any program counter (PC) in address space. On +RISC-V zicfilp extension enforces a restriction on such indirect control +transfers: + +- indirect control transfers must land on a landing pad instruction ``lpad``. + There are two exception to this rule: + + - rs1 = x1 or rs1 = x5, i.e. a return from a function and returns are +protected using shadow stack (see zicfiss.rst) + + - rs1 = x7. On RISC-V compiler usually does below to reach function +which is beyond the offset possible J-type instruction:: + + auipc x7, + jalr (x7) + + Such form of indirect control transfer are still immutable and don't rely +on memory and thus rs1=x7 is exempted from tracking and considered software +guarded jumps. + +``lpad`` instruction is pseudo of ``auipc rd, `` with ``rd=x0`` and +is a HINT nop. ``lpad`` instruction must be aligned on 4 byte boundary and +compares 20 bit immediate withx7. If ``imm_20bit`` == 0, CPU don't perform any +comparision with ``x7``. If ``imm_20bit`` != 0, then ``imm_20bit`` must match +``x7`` else CPU will raise ``software check exception`` (``cause=18``) with +``*tval = 2``. + +Compiler can generate a hash over function signatures and setup them (truncated +to 20bit) in x7 at callsites and function prologues can have ``lpad`` with same +function hash. This further reduces number of program counters a call site can +reach. + +2. ELF and psABI +- + +Toolchain sets up :c:macro:`GNU_PROPERTY_RISCV_FEATURE_1_FCFI` for property +:c:macro:`GNU_PROPERTY_RISCV_FEATURE_1_AND` in notes section of the object file. + +3. Linux enabling +-- + +User space programs can have multiple shared objects loaded in its address space +and it's a difficult task to make sure all the dependencies have been compiled +with support of indirect branch. Thus it's left to dynamic loader to enable +indirect branch tracking for the program. + +4. prctl() enabling + + +:c:macro:`PR_SET_INDIR_BR_LP_STATUS` / :c:macro:`PR_GET_INDIR_BR_LP_STATUS` / +:c:macro:`PR_LOCK_INDIR_BR_LP_STATUS` are three prctls added to manage indirect +branch tracking. prctls are arch agnostic and returns -EINVAL on other arches. + +* prctl(PR_SET_INDIR_BR_LP_STATUS, unsigned long arg) + +If arg1 is :c:macro:`PR_INDIR_BR_LP_ENABLE` and if CPU supports ``zicfilp`` +then kernel will enabled indirect branch tracking for the task. Dynamic loader +can issue this :c:macro:`prctl` once it has determined that all the objects +loaded in address space support indirect branch tracking. Additionally if there +is a `dlopen` to an object which wasn't compiled with ``zicfilp``, dynamic +loader can issue this prctl with arg1 set to 0 (i.e. +:c:macro:`PR_INDIR_BR_LP_ENABLE` being clear) + +* prctl(PR_GET_INDIR_BR_LP_STATUS, unsigned long arg) + +Returns current status of indirect branch tracking. If enabled it'll return +:c:macro:`PR_INDIR_BR_LP_ENABLE` + +* prctl(PR_LOCK_INDIR_BR_LP_STATUS, unsigned long arg) + +Locks current status of indirect branch tracking on the task. User space may +want to run with stric
[PATCH v12 23/28] riscv: kernel command line option to opt out of user cfi
This commit adds a kernel command line option using which user cfi can be disabled. Signed-off-by: Deepak Gupta --- arch/riscv/kernel/usercfi.c | 21 + 1 file changed, 21 insertions(+) diff --git a/arch/riscv/kernel/usercfi.c b/arch/riscv/kernel/usercfi.c index d31d89618763..813162ce4f15 100644 --- a/arch/riscv/kernel/usercfi.c +++ b/arch/riscv/kernel/usercfi.c @@ -17,6 +17,8 @@ #include #include +bool disable_riscv_usercfi; + #define SHSTK_ENTRY_SIZE sizeof(void *) bool is_shstk_enabled(struct task_struct *task) @@ -396,6 +398,9 @@ int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status) unsigned long size = 0, addr = 0; bool enable_shstk = false; + if (disable_riscv_usercfi) + return 0; + if (!cpu_supports_shadow_stack()) return -EINVAL; @@ -475,6 +480,9 @@ int arch_set_indir_br_lp_status(struct task_struct *t, unsigned long status) { bool enable_indir_lp = false; + if (disable_riscv_usercfi) + return 0; + if (!cpu_supports_indirect_br_lp_instr()) return -EINVAL; @@ -507,3 +515,16 @@ int arch_lock_indir_br_lp_status(struct task_struct *task, return 0; } + +static int __init setup_global_riscv_enable(char *str) +{ + if (strcmp(str, "true") == 0) + disable_riscv_usercfi = true; + + pr_info("Setting riscv usercfi to be %s\n", + (disable_riscv_usercfi ? "disabled" : "enabled")); + + return 1; +} + +__setup("disable_riscv_usercfi=", setup_global_riscv_enable); -- 2.34.1
[PATCH v12 24/28] arch/riscv: compile vdso with landing pad
From: Jim Shu user mode tasks compiled with zicfilp may call indirectly into vdso (like hwprobe indirect calls). Add landing pad compile support in vdso. vdso with landing pad in it will be nop for tasks which have not enabled landing pad. This patch allows to run user mode tasks with cfi eanbled and do no harm. Future work can be done on this to do below - labeled landing pad on vdso functions (whenever labeling support shows up in gnu-toolchain) - emit shadow stack instructions only in vdso compiled objects as part of kernel compile. Signed-off-by: Jim Shu Reviewed-by: Zong Li Signed-off-by: Deepak Gupta --- arch/riscv/Makefile | 5 +++- arch/riscv/include/asm/assembler.h| 44 +++ arch/riscv/kernel/vdso/Makefile | 12 ++ arch/riscv/kernel/vdso/flush_icache.S | 4 arch/riscv/kernel/vdso/getcpu.S | 4 arch/riscv/kernel/vdso/rt_sigreturn.S | 4 arch/riscv/kernel/vdso/sys_hwprobe.S | 4 7 files changed, 76 insertions(+), 1 deletion(-) diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 13fbc0f94238..eca94246cda6 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -88,9 +88,12 @@ riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas # Check if the toolchain supports Zabha riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZABHA) := $(riscv-march-y)_zabha +KBUILD_BASE_ISA = -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') +export KBUILD_BASE_ISA + # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by # matching non-v and non-multi-letter extensions out with the filter ([^v_]*) -KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') +KBUILD_CFLAGS += $(KBUILD_BASE_ISA) KBUILD_AFLAGS += -march=$(riscv-march-y) diff --git a/arch/riscv/include/asm/assembler.h b/arch/riscv/include/asm/assembler.h index 44b1457d3e95..a058ea5e9c58 100644 --- a/arch/riscv/include/asm/assembler.h +++ b/arch/riscv/include/asm/assembler.h @@ -80,3 +80,47 @@ .endm #endif /* __ASM_ASSEMBLER_H */ + +#if defined(CONFIG_RISCV_USER_CFI) && (__riscv_xlen == 64) +.macro vdso_lpad +lpad 0 +.endm +#else +.macro vdso_lpad +.endm +#endif + +/* + * This macro emits a program property note section identifying + * architecture features which require special handling, mainly for + * use in assembly files included in the VDSO. + */ +#define NT_GNU_PROPERTY_TYPE_0 5 +#define GNU_PROPERTY_RISCV_FEATURE_1_AND 0xc000 + +#define GNU_PROPERTY_RISCV_FEATURE_1_ZICFILP (1U << 0) +#define GNU_PROPERTY_RISCV_FEATURE_1_ZICFISS (1U << 1) + +#if defined(CONFIG_RISCV_USER_CFI) && (__riscv_xlen == 64) +#define GNU_PROPERTY_RISCV_FEATURE_1_DEFAULT \ + (GNU_PROPERTY_RISCV_FEATURE_1_ZICFILP) +#endif + +#ifdef GNU_PROPERTY_RISCV_FEATURE_1_DEFAULT +.macro emit_riscv_feature_1_and, feat = GNU_PROPERTY_RISCV_FEATURE_1_DEFAULT + .pushsection .note.gnu.property, "a" + .p2align3 + .word 4 + .word 16 + .word NT_GNU_PROPERTY_TYPE_0 + .asciz "GNU" + .word GNU_PROPERTY_RISCV_FEATURE_1_AND + .word 4 + .word \feat + .word 0 + .popsection +.endm +#else +.macro emit_riscv_feature_1_and, feat = 0 +.endm +#endif diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 9a1b555e8733..daa10c2b0dd1 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -13,12 +13,18 @@ vdso-syms += flush_icache vdso-syms += hwprobe vdso-syms += sys_hwprobe +ifdef CONFIG_RISCV_USER_CFI +LPAD_MARCH = _zicfilp +endif + # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o ccflags-y := -fno-stack-protector ccflags-y += -DDISABLE_BRANCH_PROFILING ccflags-y += -fno-builtin +ccflags-y += $(KBUILD_BASE_ISA)$(LPAD_MARCH) +asflags-y += $(KBUILD_BASE_ISA)$(LPAD_MARCH) ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) @@ -40,6 +46,12 @@ endif CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) +# Disable profiling and instrumentation for VDSO code +GCOV_PROFILE := n +KCOV_INSTRUMENT := n +KASAN_SANITIZE := n +UBSAN_SANITIZE := n + # Force dependency $(obj)/vdso.o: $(obj)/vdso.so diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S index 8f884227e8bc..e4c56970905e 100644 --- a/arch/riscv/kernel/vdso/flush_icache.S +++ b/arch/riscv/kernel/vdso/flush_icache.S @@ -5,11 +5,13 @@ #include #include +#include .text /* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */ SYM_FUNC_START(__vdso_flush_icache) .cfi_startproc + vdso_lpad #ifdef CONFIG_SMP li a7, __NR_riscv_fl
[PATCH v12 25/28] riscv: create a config for shadow stack and landing pad instr support
This patch creates a config for shadow stack support and landing pad instr support. Shadow stack support and landing instr support can be enabled by selecting `CONFIG_RISCV_USER_CFI`. Selecting `CONFIG_RISCV_USER_CFI` wires up path to enumerate CPU support and if cpu support exists, kernel will support cpu assisted user mode cfi. If CONFIG_RISCV_USER_CFI is selected, select `ARCH_USES_HIGH_VMA_FLAGS`, `ARCH_HAS_USER_SHADOW_STACK` and DYNAMIC_SIGFRAME for riscv. Reviewed-by: Zong Li Signed-off-by: Deepak Gupta --- arch/riscv/Kconfig | 20 1 file changed, 20 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 7612c52e9b1e..0a2e50f056e8 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -250,6 +250,26 @@ config ARCH_HAS_BROKEN_DWARF5 # https://github.com/llvm/llvm-project/commit/7ffabb61a5569444b5ac9322e22e5471cc5e4a77 depends on LD_IS_LLD && LLD_VERSION < 18 +config RISCV_USER_CFI + def_bool y + bool "riscv userspace control flow integrity" + depends on 64BIT && $(cc-option,-mabi=lp64 -march=rv64ima_zicfiss) + depends on RISCV_ALTERNATIVE + select ARCH_HAS_USER_SHADOW_STACK + select ARCH_USES_HIGH_VMA_FLAGS + select DYNAMIC_SIGFRAME + help + Provides CPU assisted control flow integrity to userspace tasks. + Control flow integrity is provided by implementing shadow stack for + backward edge and indirect branch tracking for forward edge in program. + Shadow stack protection is a hardware feature that detects function + return address corruption. This helps mitigate ROP attacks. + Indirect branch tracking enforces that all indirect branches must land + on a landing pad instruction else CPU will fault. This mitigates against + JOP / COP attacks. Applications must be enabled to use it, and old user- + space does not get protection "for free". + default y + config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT default 8 -- 2.34.1
Re: [PATCH v11 01/27] mm: VM_SHADOW_STACK definition for riscv
On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta wrote: > > VM_HIGH_ARCH_5 is used for riscv > > Signed-off-by: Deepak Gupta > --- > include/linux/mm.h | 7 +++ > 1 file changed, 7 insertions(+) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 7b1068ddcbb7..1ef231cbc8fe 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -378,6 +378,13 @@ extern unsigned int kobjsize(const void *objp); > # define VM_SHADOW_STACK VM_HIGH_ARCH_6 > #endif > > +#if defined(CONFIG_RISCV_USER_CFI) > +/* > + * Following x86 and picking up the same bitpos. > + */ > +# define VM_SHADOW_STACK VM_HIGH_ARCH_5 > +#endif > + > #ifndef VM_SHADOW_STACK > # define VM_SHADOW_STACK VM_NONE > #endif > LGTM. Reviewed-by: Zong Li > -- > 2.34.1 > > > ___ > linux-riscv mailing list > linux-ri...@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
Re: [PATCH v11 05/27] riscv: usercfi state for task and save/restore of CSR_SSP on trap entry/exit
On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta wrote: > > Carves out space in arch specific thread struct for cfi status and shadow > stack in usermode on riscv. > > This patch does following > - defines a new structure cfi_status with status bit for cfi feature > - defines shadow stack pointer, base and size in cfi_status structure > - defines offsets to new member fields in thread in asm-offsets.c > - Saves and restore shadow stack pointer on trap entry (U --> S) and exit > (S --> U) > > Shadow stack save/restore is gated on feature availiblity and implemented > using alternative. CSR can be context switched in `switch_to` as well but > soon as kernel shadow stack support gets rolled in, shadow stack pointer > will need to be switched at trap entry/exit point (much like `sp`). It can > be argued that kernel using shadow stack deployment scenario may not be as > prevalant as user mode using this feature. But even if there is some > minimal deployment of kernel shadow stack, that means that it needs to be > supported. And thus save/restore of shadow stack pointer in entry.S instead > of in `switch_to.h`. > > Signed-off-by: Deepak Gupta > Reviewed-by: Charlie Jenkins > --- > arch/riscv/include/asm/processor.h | 1 + > arch/riscv/include/asm/thread_info.h | 3 +++ > arch/riscv/include/asm/usercfi.h | 24 > arch/riscv/kernel/asm-offsets.c | 4 > arch/riscv/kernel/entry.S| 26 ++ > 5 files changed, 58 insertions(+) > > diff --git a/arch/riscv/include/asm/processor.h > b/arch/riscv/include/asm/processor.h > index e3aba3336e63..d851bb5c6da0 100644 > --- a/arch/riscv/include/asm/processor.h > +++ b/arch/riscv/include/asm/processor.h > @@ -14,6 +14,7 @@ > > #include > #include > +#include > > #define arch_get_mmap_end(addr, len, flags)\ > ({ \ > diff --git a/arch/riscv/include/asm/thread_info.h > b/arch/riscv/include/asm/thread_info.h > index f5916a70879a..a0cfe00c2ca6 100644 > --- a/arch/riscv/include/asm/thread_info.h > +++ b/arch/riscv/include/asm/thread_info.h > @@ -62,6 +62,9 @@ struct thread_info { > longuser_sp;/* User stack pointer */ > int cpu; > unsigned long syscall_work; /* SYSCALL_WORK_ flags */ > +#ifdef CONFIG_RISCV_USER_CFI > + struct cfi_status user_cfi_state; > +#endif > #ifdef CONFIG_SHADOW_CALL_STACK > void*scs_base; > void*scs_sp; > diff --git a/arch/riscv/include/asm/usercfi.h > b/arch/riscv/include/asm/usercfi.h > new file mode 100644 > index ..5f2027c51917 > --- /dev/null > +++ b/arch/riscv/include/asm/usercfi.h > @@ -0,0 +1,24 @@ > +/* SPDX-License-Identifier: GPL-2.0 > + * Copyright (C) 2024 Rivos, Inc. > + * Deepak Gupta > + */ > +#ifndef _ASM_RISCV_USERCFI_H > +#define _ASM_RISCV_USERCFI_H > + > +#ifndef __ASSEMBLY__ > +#include > + > +#ifdef CONFIG_RISCV_USER_CFI > +struct cfi_status { > + unsigned long ubcfi_en : 1; /* Enable for backward cfi. */ > + unsigned long rsvd : ((sizeof(unsigned long) * 8) - 1); > + unsigned long user_shdw_stk; /* Current user shadow stack pointer */ > + unsigned long shdw_stk_base; /* Base address of shadow stack */ > + unsigned long shdw_stk_size; /* size of shadow stack */ > +}; > + > +#endif /* CONFIG_RISCV_USER_CFI */ > + > +#endif /* __ASSEMBLY__ */ > + > +#endif /* _ASM_RISCV_USERCFI_H */ > diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c > index e89455a6a0e5..0c188aaf3925 100644 > --- a/arch/riscv/kernel/asm-offsets.c > +++ b/arch/riscv/kernel/asm-offsets.c > @@ -50,6 +50,10 @@ void asm_offsets(void) > #endif > > OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu); > +#ifdef CONFIG_RISCV_USER_CFI > + OFFSET(TASK_TI_CFI_STATUS, task_struct, thread_info.user_cfi_state); > + OFFSET(TASK_TI_USER_SSP, task_struct, > thread_info.user_cfi_state.user_shdw_stk); > +#endif > OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]); > OFFSET(TASK_THREAD_F1, task_struct, thread.fstate.f[1]); > OFFSET(TASK_THREAD_F2, task_struct, thread.fstate.f[2]); > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > index 33a5a9f2a0d4..68c99124ea55 100644 > --- a/arch/riscv/kernel/entry.S > +++ b/arch/riscv/kernel/entry.S > @@ -147,6 +147,20 @@ SYM_CODE_START(handle_exception) > > REG_L s0, TASK_TI_USER_SP(tp) > csrrc s1, CSR_STATUS, t0 > + /* > +* If previous mode was U, capture shadow stack pointer and save it > away > +* Zero CSR_SSP at the same time for sanitization. > +*/ > + ALTERNATIVE("nop; nop; nop; nop", > + __stringify(\ > + andi s2, s1, SR_SPP;\ > +
Re: [PATCH v11 07/27] riscv mm: manufacture shadow stack pte
On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta wrote: > > This patch implements creating shadow stack pte (on riscv). Creating > shadow stack PTE on riscv means that clearing RWX and then setting W=1. > > Signed-off-by: Deepak Gupta > Reviewed-by: Alexandre Ghiti > --- > arch/riscv/include/asm/pgtable.h | 10 ++ > 1 file changed, 10 insertions(+) > > diff --git a/arch/riscv/include/asm/pgtable.h > b/arch/riscv/include/asm/pgtable.h > index 8c528cd7347a..ede43185ffdf 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -421,6 +421,11 @@ static inline pte_t pte_mkwrite_novma(pte_t pte) > return __pte(pte_val(pte) | _PAGE_WRITE); > } > > +static inline pte_t pte_mkwrite_shstk(pte_t pte) > +{ > + return __pte((pte_val(pte) & ~(_PAGE_LEAF)) | _PAGE_WRITE); > +} > + > /* static inline pte_t pte_mkexec(pte_t pte) */ > > static inline pte_t pte_mkdirty(pte_t pte) > @@ -749,6 +754,11 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd) > return pte_pmd(pte_mkwrite_novma(pmd_pte(pmd))); > } > > +static inline pmd_t pmd_mkwrite_shstk(pmd_t pte) > +{ > + return __pmd((pmd_val(pte) & ~(_PAGE_LEAF)) | _PAGE_WRITE); > +} > + > static inline pmd_t pmd_wrprotect(pmd_t pmd) > { > return pte_pmd(pte_wrprotect(pmd_pte(pmd))); > LGTM. Reviewed-by: Zong Li > -- > 2.34.1 > > > ___ > linux-riscv mailing list > linux-ri...@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
Re: [PATCH v11 13/27] prctl: arch-agnostic prctl for indirect branch tracking
On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta wrote: > > Three architectures (x86, aarch64, riscv) have support for indirect branch > tracking feature in a very similar fashion. On a very high level, indirect > branch tracking is a CPU feature where CPU tracks branches which uses > memory operand to perform control transfer in program. As part of this > tracking on indirect branches, CPU goes in a state where it expects a > landing pad instr on target and if not found then CPU raises some fault > (architecture dependent) > > x86 landing pad instr - `ENDBRANCH` > aarch64 landing pad instr - `BTI` > riscv landing instr - `lpad` > > Given that three major arches have support for indirect branch tracking, > This patch makes `prctl` for indirect branch tracking arch agnostic. > > To allow userspace to enable this feature for itself, following prtcls are > defined: > - PR_GET_INDIR_BR_LP_STATUS: Gets current configured status for indirect >branch tracking. > - PR_SET_INDIR_BR_LP_STATUS: Sets a configuration for indirect branch >tracking. >Following status options are allowed >- PR_INDIR_BR_LP_ENABLE: Enables indirect branch tracking on user > thread. >- PR_INDIR_BR_LP_DISABLE; Disables indirect branch tracking on user > thread. > - PR_LOCK_INDIR_BR_LP_STATUS: Locks configured status for indirect branch >tracking for user thread. > > Signed-off-by: Deepak Gupta > Reviewed-by: Mark Brown > --- > arch/riscv/include/asm/usercfi.h | 16 - > arch/riscv/kernel/entry.S| 2 +- > arch/riscv/kernel/process.c | 5 +++ > arch/riscv/kernel/usercfi.c | 76 > > include/linux/cpu.h | 4 +++ > include/uapi/linux/prctl.h | 27 ++ > kernel/sys.c | 30 > 7 files changed, 158 insertions(+), 2 deletions(-) > > diff --git a/arch/riscv/include/asm/usercfi.h > b/arch/riscv/include/asm/usercfi.h > index c4dcd256f19a..a8cec7c14d1d 100644 > --- a/arch/riscv/include/asm/usercfi.h > +++ b/arch/riscv/include/asm/usercfi.h > @@ -16,7 +16,9 @@ struct kernel_clone_args; > struct cfi_status { > unsigned long ubcfi_en : 1; /* Enable for backward cfi. */ > unsigned long ubcfi_locked : 1; > - unsigned long rsvd : ((sizeof(unsigned long) * 8) - 2); > + unsigned long ufcfi_en : 1; /* Enable for forward cfi. Note that ELP > goes in sstatus */ > + unsigned long ufcfi_locked : 1; > + unsigned long rsvd : ((sizeof(unsigned long) * 8) - 4); > unsigned long user_shdw_stk; /* Current user shadow stack pointer */ > unsigned long shdw_stk_base; /* Base address of shadow stack */ > unsigned long shdw_stk_size; /* size of shadow stack */ > @@ -33,6 +35,10 @@ bool is_shstk_locked(struct task_struct *task); > bool is_shstk_allocated(struct task_struct *task); > void set_shstk_lock(struct task_struct *task); > void set_shstk_status(struct task_struct *task, bool enable); > +bool is_indir_lp_enabled(struct task_struct *task); > +bool is_indir_lp_locked(struct task_struct *task); > +void set_indir_lp_status(struct task_struct *task, bool enable); > +void set_indir_lp_lock(struct task_struct *task); > > #define PR_SHADOW_STACK_SUPPORTED_STATUS_MASK (PR_SHADOW_STACK_ENABLE) > > @@ -58,6 +64,14 @@ void set_shstk_status(struct task_struct *task, bool > enable); > > #define set_shstk_status(task, enable) > > +#define is_indir_lp_enabled(task) false > + > +#define is_indir_lp_locked(task) false > + > +#define set_indir_lp_status(task, enable) > + > +#define set_indir_lp_lock(task) > + > #endif /* CONFIG_RISCV_USER_CFI */ > > #endif /* __ASSEMBLY__ */ > diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S > index 68c99124ea55..00494b54ff4a 100644 > --- a/arch/riscv/kernel/entry.S > +++ b/arch/riscv/kernel/entry.S > @@ -143,7 +143,7 @@ SYM_CODE_START(handle_exception) > * Disable the FPU/Vector to detect illegal usage of floating point > * or vector in kernel space. > */ > - li t0, SR_SUM | SR_FS_VS > + li t0, SR_SUM | SR_FS_VS | SR_ELP > > REG_L s0, TASK_TI_USER_SP(tp) > csrrc s1, CSR_STATUS, t0 > diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c > index cd11667593fe..4587201dd81d 100644 > --- a/arch/riscv/kernel/process.c > +++ b/arch/riscv/kernel/process.c > @@ -160,6 +160,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, > set_shstk_status(current, false); > set_shstk_base(current, 0, 0); > set_active_shstk(current, 0); > + /* > +* disable indirect branch tracking on exec. > +* libc will enable it later via prctl. > +*/ > + set_indir_lp_status(current, false); In set_indir_lp_status and set_shstk_status, the $senvcfg.LPE and $senvcfg.SSE fields are set. However, if the CPU does not support this CSR, writing to it will trigger an illegal instruc
Re: [PATCH v11 20/27] riscv: Add Firmware Feature SBI extensions definitions
On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta wrote: > > From: Clément Léger > > Add necessary SBI definitions to use the FWFT extension. > > Signed-off-by: Clément Léger > --- > arch/riscv/include/asm/sbi.h | 26 ++ > 1 file changed, 26 insertions(+) > > diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h > index 3d250824178b..23bfb254e3f4 100644 > --- a/arch/riscv/include/asm/sbi.h > +++ b/arch/riscv/include/asm/sbi.h > @@ -35,6 +35,7 @@ enum sbi_ext_id { > SBI_EXT_DBCN = 0x4442434E, > SBI_EXT_STA = 0x535441, > SBI_EXT_NACL = 0x4E41434C, > + SBI_EXT_FWFT = 0x46574654, > > /* Experimentals extensions must lie within this range */ > SBI_EXT_EXPERIMENTAL_START = 0x0800, > @@ -401,6 +402,31 @@ enum sbi_ext_nacl_feature { > > #define SBI_NACL_SHMEM_SRET_X(__i) ((__riscv_xlen / 8) * (__i)) > #define SBI_NACL_SHMEM_SRET_X_LAST 31 > +/* SBI function IDs for FW feature extension */ > +#define SBI_EXT_FWFT_SET 0x0 > +#define SBI_EXT_FWFT_GET 0x1 > + > +enum sbi_fwft_feature_t { > + SBI_FWFT_MISALIGNED_EXC_DELEG = 0x0, > + SBI_FWFT_LANDING_PAD= 0x1, > + SBI_FWFT_SHADOW_STACK = 0x2, > + SBI_FWFT_DOUBLE_TRAP= 0x3, > + SBI_FWFT_PTE_AD_HW_UPDATING = 0x4, > + SBI_FWFT_LOCAL_RESERVED_START = 0x5, > + SBI_FWFT_LOCAL_RESERVED_END = 0x3fff, > + SBI_FWFT_LOCAL_PLATFORM_START = 0x4000, > + SBI_FWFT_LOCAL_PLATFORM_END = 0x7fff, > + > + SBI_FWFT_GLOBAL_RESERVED_START = 0x8000, > + SBI_FWFT_GLOBAL_RESERVED_END= 0xbfff, > + SBI_FWFT_GLOBAL_PLATFORM_START = 0xc000, > + SBI_FWFT_GLOBAL_PLATFORM_END= 0x, > +}; > + > +#define SBI_FWFT_GLOBAL_FEATURE_BIT(1 << 31) > +#define SBI_FWFT_PLATFORM_FEATURE_BIT (1 << 30) > + > +#define SBI_FWFT_SET_FLAG_LOCK (1 << 0) > > /* SBI spec version fields */ > #define SBI_SPEC_VERSION_DEFAULT 0x1 > LGTM. Reviewed-by: Zong Li > -- > 2.34.1 > > > ___ > linux-riscv mailing list > linux-ri...@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
Re: [PATCH v11 26/27] riscv: Documentation for shadow stack on riscv
On Mon, Mar 10, 2025 at 11:44 PM Deepak Gupta wrote: > > Adding documentation on shadow stack for user mode on riscv and kernel > interfaces exposed so that user tasks can enable it. > > Signed-off-by: Deepak Gupta > --- > Documentation/arch/riscv/index.rst | 1 + > Documentation/arch/riscv/zicfiss.rst | 176 > +++ > 2 files changed, 177 insertions(+) > > diff --git a/Documentation/arch/riscv/index.rst > b/Documentation/arch/riscv/index.rst > index be7237b69682..e240eb0ceb70 100644 > --- a/Documentation/arch/riscv/index.rst > +++ b/Documentation/arch/riscv/index.rst > @@ -15,6 +15,7 @@ RISC-V architecture > vector > cmodx > zicfilp > +zicfiss > > features > > diff --git a/Documentation/arch/riscv/zicfiss.rst > b/Documentation/arch/riscv/zicfiss.rst > new file mode 100644 > index ..5ba389f15b3f > --- /dev/null > +++ b/Documentation/arch/riscv/zicfiss.rst > @@ -0,0 +1,176 @@ > +.. SPDX-License-Identifier: GPL-2.0 > + > +:Author: Deepak Gupta > +:Date: 12 January 2024 > + > += > +Shadow stack to protect function returns on RISC-V Linux > += > + > +This document briefly describes the interface provided to userspace by Linux > +to enable shadow stack for user mode applications on RISV-V > + > +1. Feature Overview > + > + > +Memory corruption issues usually result in to crashes, however when in hands > of > +an adversary and if used creatively can result into variety security issues. > + > +One of those security issues can be code re-use attacks on program where > +adversary can use corrupt return addresses present on stack and chain them > +together to perform return oriented programming (ROP) and thus compromising > +control flow integrity (CFI) of the program. > + > +Return addresses live on stack and thus in read-write memory and thus are > +susceptible to corruption and allows an adversary to reach any program > counter > +(PC) in address space. On RISC-V ``zicfiss`` extension provides an alternate > +stack termed as shadow stack on which return addresses can be safely placed > in > +prolog of the function and retrieved in epilog. ``zicfiss`` extension makes > +following changes: > + > +- PTE encodings for shadow stack virtual memory > + An earlier reserved encoding in first stage translation i.e. > + PTE.R=0, PTE.W=1, PTE.X=0 becomes PTE encoding for shadow stack pages. > + > +- ``sspush x1/x5`` instruction pushes (stores) ``x1/x5`` to shadow stack. > + > +- ``sspopchk x1/x5`` instruction pops (loads) from shadow stack and compares > + with ``x1/x5`` and if un-equal, CPU raises ``software check exception`` > with > + ``*tval = 3`` > + > +Compiler toolchain makes sure that function prologue have ``sspush x1/x5`` to > +save return address on shadow stack in addition to regular stack. Similarly > +function epilogs have ``ld x5, offset(x2)`` followed by ``sspopchk x5`` to > +ensure that popped value from regular stack matches with popped value from > +shadow stack. > + > +2. Shadow stack protections and linux memory manager > +- > + > +As mentioned earlier, shadow stack get new page table encodings and thus have > +some special properties assigned to them and instructions that operate on > them > +as below: > + > +- Regular stores to shadow stack memory raises access store faults. This way > + shadow stack memory is protected from stray inadvertant writes. > + > +- Regular loads to shadow stack memory are allowed. This allows stack trace > + utilities or backtrace functions to read true callstack (not tampered). > + > +- Only shadow stack instructions can generate shadow stack load or shadow > stack > + store. > + > +- Shadow stack load / shadow stack store on read-only memory raises AMO/store > + page fault. Thus both ``sspush x1/x5`` and ``sspopchk x1/x5`` will raise > AMO/ > + store page fault. This simplies COW handling in kernel During fork, kernel > + can convert shadow stack pages into read-only memory (as it does for > regular > + read-write memory) and as soon as subsequent ``sspush`` or ``sspopchk`` in > + userspace is encountered, then kernel can perform COW. > + > +- Shadow stack load / shadow stack store on read-write, read-write-execute > + memory raises an access fault. This is a fatal condition because shadow > stack > + should never be operating on read-write, read-write-execute memory. > + > +3. ELF and psABI > +- > + > +Toolchain sets up :c:macro:`GNU_PROPERTY_RISCV_FEATURE_1_BCFI` for property > +:c:macro:`GNU_PROPERTY_RISCV_FEATURE_1_AND` in notes section of the object > file. > + > +4. Linux enabling > +-- > + > +User space programs can have multiple shared objects loaded in its address > space > +and it's a difficult task to make sure all the dependencies have been > co
Re: [PATCH v11 24/27] riscv: create a config for shadow stack and landing pad instr support
On Mon, Mar 10, 2025 at 11:44 PM Deepak Gupta wrote: > > This patch creates a config for shadow stack support and landing pad instr > support. Shadow stack support and landing instr support can be enabled by > selecting `CONFIG_RISCV_USER_CFI`. Selecting `CONFIG_RISCV_USER_CFI` wires > up path to enumerate CPU support and if cpu support exists, kernel will > support cpu assisted user mode cfi. > > If CONFIG_RISCV_USER_CFI is selected, select `ARCH_USES_HIGH_VMA_FLAGS`, > `ARCH_HAS_USER_SHADOW_STACK` and DYNAMIC_SIGFRAME for riscv. > > Signed-off-by: Deepak Gupta > --- > arch/riscv/Kconfig | 20 > 1 file changed, 20 insertions(+) > > diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig > index 7612c52e9b1e..0a2e50f056e8 100644 > --- a/arch/riscv/Kconfig > +++ b/arch/riscv/Kconfig > @@ -250,6 +250,26 @@ config ARCH_HAS_BROKEN_DWARF5 > # > https://github.com/llvm/llvm-project/commit/7ffabb61a5569444b5ac9322e22e5471cc5e4a77 > depends on LD_IS_LLD && LLD_VERSION < 18 > > +config RISCV_USER_CFI > + def_bool y > + bool "riscv userspace control flow integrity" > + depends on 64BIT && $(cc-option,-mabi=lp64 -march=rv64ima_zicfiss) > + depends on RISCV_ALTERNATIVE > + select ARCH_HAS_USER_SHADOW_STACK > + select ARCH_USES_HIGH_VMA_FLAGS > + select DYNAMIC_SIGFRAME > + help > + Provides CPU assisted control flow integrity to userspace tasks. > + Control flow integrity is provided by implementing shadow stack for > + backward edge and indirect branch tracking for forward edge in > program. > + Shadow stack protection is a hardware feature that detects function > + return address corruption. This helps mitigate ROP attacks. > + Indirect branch tracking enforces that all indirect branches must > land > + on a landing pad instruction else CPU will fault. This mitigates > against > + JOP / COP attacks. Applications must be enabled to use it, and old > user- > + space does not get protection "for free". > + default y > + > config ARCH_MMAP_RND_BITS_MIN > default 18 if 64BIT > default 8 > LGTM. Reviewed-by: Zong Li > -- > 2.34.1 > > > ___ > linux-riscv mailing list > linux-ri...@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
[PATCH v12 04/28] riscv: zicfiss / zicfilp extension csr and bit definitions
zicfiss and zicfilp extension gets enabled via b3 and b2 in *envcfg CSR. menvcfg controls enabling for S/HS mode. henvcfg control enabling for VS while senvcfg controls enabling for U/VU mode. zicfilp extension extends *status CSR to hold `expected landing pad` bit. A trap or interrupt can occur between an indirect jmp/call and target instr. `expected landing pad` bit from CPU is recorded into xstatus CSR so that when supervisor performs xret, `expected landing pad` state of CPU can be restored. zicfiss adds one new CSR - CSR_SSP: CSR_SSP contains current shadow stack pointer. Signed-off-by: Deepak Gupta Reviewed-by: Charlie Jenkins --- arch/riscv/include/asm/csr.h | 16 1 file changed, 16 insertions(+) diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 6fed42e37705..2f49b9663640 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -18,6 +18,15 @@ #define SR_MPP _AC(0x1800, UL) /* Previously Machine */ #define SR_SUM _AC(0x0004, UL) /* Supervisor User Memory Access */ +/* zicfilp landing pad status bit */ +#define SR_SPELP _AC(0x0080, UL) +#define SR_MPELP _AC(0x0200, UL) +#ifdef CONFIG_RISCV_M_MODE +#define SR_ELP SR_MPELP +#else +#define SR_ELP SR_SPELP +#endif + #define SR_FS _AC(0x6000, UL) /* Floating-point Status */ #define SR_FS_OFF _AC(0x, UL) #define SR_FS_INITIAL _AC(0x2000, UL) @@ -212,6 +221,8 @@ #define ENVCFG_PMM_PMLEN_16(_AC(0x3, ULL) << 32) #define ENVCFG_CBZE(_AC(1, UL) << 7) #define ENVCFG_CBCFE (_AC(1, UL) << 6) +#define ENVCFG_LPE (_AC(1, UL) << 2) +#define ENVCFG_SSE (_AC(1, UL) << 3) #define ENVCFG_CBIE_SHIFT 4 #define ENVCFG_CBIE(_AC(0x3, UL) << ENVCFG_CBIE_SHIFT) #define ENVCFG_CBIE_ILL_AC(0x0, UL) @@ -230,6 +241,11 @@ #define SMSTATEEN0_HSENVCFG(_ULL(1) << SMSTATEEN0_HSENVCFG_SHIFT) #define SMSTATEEN0_SSTATEEN0_SHIFT 63 #define SMSTATEEN0_SSTATEEN0 (_ULL(1) << SMSTATEEN0_SSTATEEN0_SHIFT) +/* + * zicfiss user mode csr + * CSR_SSP holds current shadow stack pointer. + */ +#define CSR_SSP 0x011 /* mseccfg bits */ #define MSECCFG_PMMENVCFG_PMM -- 2.34.1
[PATCH v12 01/28] mm: VM_SHADOW_STACK definition for riscv
VM_HIGH_ARCH_5 is used for riscv Reviewed-by: Zong Li Signed-off-by: Deepak Gupta --- include/linux/mm.h | 7 +++ 1 file changed, 7 insertions(+) diff --git a/include/linux/mm.h b/include/linux/mm.h index 7b1068ddcbb7..1ef231cbc8fe 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -378,6 +378,13 @@ extern unsigned int kobjsize(const void *objp); # define VM_SHADOW_STACK VM_HIGH_ARCH_6 #endif +#if defined(CONFIG_RISCV_USER_CFI) +/* + * Following x86 and picking up the same bitpos. + */ +# define VM_SHADOW_STACK VM_HIGH_ARCH_5 +#endif + #ifndef VM_SHADOW_STACK # define VM_SHADOW_STACK VM_NONE #endif -- 2.34.1
[PATCH v12 06/28] riscv/mm : ensure PROT_WRITE leads to VM_READ | VM_WRITE
`arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ | VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ). This is to make sure that any existing apps using PROT_WRITE still work. Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings. Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE encodings for shadow stack. Above changes ensure that existing apps continue to work because underneath kernel will be picking `protection_map[VM_WRITE|VM_READ]` PTE encodings. Reviewed-by: Zong Li Signed-off-by: Deepak Gupta --- arch/riscv/include/asm/mman.h| 25 + arch/riscv/include/asm/pgtable.h | 1 + arch/riscv/kernel/sys_riscv.c| 10 ++ arch/riscv/mm/init.c | 2 +- 4 files changed, 37 insertions(+), 1 deletion(-) diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h new file mode 100644 index ..392c9c2d2e78 --- /dev/null +++ b/arch/riscv/include/asm/mman.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#include +#include +#include + +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, + unsigned long pkey __always_unused) +{ + unsigned long ret = 0; + + /* +* If PROT_WRITE was specified, force it to VM_READ | VM_WRITE. +* Only VM_WRITE means shadow stack. +*/ + if (prot & PROT_WRITE) + ret = (VM_READ | VM_WRITE); + return ret; +} + +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) + +#endif /* ! __ASM_MMAN_H__ */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 050fdc49b5ad..8c528cd7347a 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -178,6 +178,7 @@ extern struct pt_alloc_ops pt_ops __meminitdata; #define PAGE_READ_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) #define PAGE_WRITE_EXEC__pgprot(_PAGE_BASE | _PAGE_READ | \ _PAGE_EXEC | _PAGE_WRITE) +#define PAGE_SHADOWSTACK __pgprot(_PAGE_BASE | _PAGE_WRITE) #define PAGE_COPY PAGE_READ #define PAGE_COPY_EXEC PAGE_READ_EXEC diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index d77afe05578f..43a448bf254b 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -7,6 +7,7 @@ #include #include +#include static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -16,6 +17,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned long len, if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) return -EINVAL; + /* +* If PROT_WRITE is specified then extend that to PROT_READ +* protection_map[VM_WRITE] is now going to select shadow stack encodings. +* So specifying PROT_WRITE actually should select protection_map [VM_WRITE | VM_READ] +* If user wants to create shadow stack then they should use `map_shadow_stack` syscall. +*/ + if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ))) + prot |= PROT_READ; + return ksys_mmap_pgoff(addr, len, prot, flags, fd, offset >> (PAGE_SHIFT - page_shift_offset)); } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 15b2eda4c364..9d6661638d0b 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -342,7 +342,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); static const pgprot_t protection_map[16] = { [VM_NONE] = PAGE_NONE, [VM_READ] = PAGE_READ, - [VM_WRITE] = PAGE_COPY, + [VM_WRITE] = PAGE_SHADOWSTACK, [VM_WRITE | VM_READ]= PAGE_COPY, [VM_EXEC] = PAGE_EXEC, [VM_EXEC | VM_READ] = PAGE_READ_EXEC, -- 2.34.1
[PATCH net-next 3/6] netconsole: add 'sysdata' suffix to related functions
This commit appends a common "sysdata" suffix to functions responsible for appending data to sysdata. This change enhances code clarity and prevents naming conflicts with other "append" functions, particularly in anticipation of the upcoming inclusion of the `release` field in the next patch. Signed-off-by: Breno Leitao --- drivers/net/netconsole.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 0914d29b48d8e..970dfc3ac9d41 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -1224,7 +1224,7 @@ static void populate_configfs_item(struct netconsole_target *nt, init_target_config_group(nt, target_name); } -static int append_cpu_nr(struct netconsole_target *nt, int offset) +static int sysdata_append_cpu_nr(struct netconsole_target *nt, int offset) { /* Append cpu=%d at extradata_complete after userdata str */ return scnprintf(&nt->extradata_complete[offset], @@ -1232,7 +1232,7 @@ static int append_cpu_nr(struct netconsole_target *nt, int offset) raw_smp_processor_id()); } -static int append_taskname(struct netconsole_target *nt, int offset) +static int sysdata_append_taskname(struct netconsole_target *nt, int offset) { return scnprintf(&nt->extradata_complete[offset], MAX_EXTRADATA_ENTRY_LEN, " taskname=%s\n", @@ -1256,9 +1256,9 @@ static int prepare_extradata(struct netconsole_target *nt) goto out; if (nt->sysdata_fields & SYSDATA_CPU_NR) - extradata_len += append_cpu_nr(nt, extradata_len); + extradata_len += sysdata_append_cpu_nr(nt, extradata_len); if (nt->sysdata_fields & SYSDATA_TASKNAME) - extradata_len += append_taskname(nt, extradata_len); + extradata_len += sysdata_append_taskname(nt, extradata_len); WARN_ON_ONCE(extradata_len > MAX_EXTRADATA_ENTRY_LEN * MAX_EXTRADATA_ITEMS); -- 2.47.1
[PATCH net-next 0/6] netconsole: Add support for userdata release
I am submitting a series of patches that introduce a new feature for the netconsole subsystem, specifically the addition of the 'release' field to the sysdata structure. This feature allows the kernel release/version to be appended to the userdata dictionary in every message sent, enhancing the information available for debugging and monitoring purposes. This complements the already supported release prepend feature, which was added some time ago. The release prepend appends the release information at the message header, which is not ideal for two reasons: 1) It is difficult to determine if a message includes this information, making it hard and resource-intensive to parse. 2) When a message is fragmented, the release information is appended to every message fragment, consuming valuable space in the packet. The "release prepend" feature was created before the concept of userdata and sysdata. Now that this format has proven successful, we are implementing the release feature as part of this enhanced structure. This patch series aims to improve the netconsole subsystem by providing a more efficient and user-friendly way to include kernel release information in messages. I believe these changes will significantly aid in system analysis and troubleshooting. Suggested-by: Manu Bretelle Signed-off-by: Breno Leitao --- Breno Leitao (6): netconsole: introduce 'release' as a new sysdata field netconsole: implement configfs for release_enabled netconsole: add 'sysdata' suffix to related functions netconsole: append release to sysdata selftests: netconsole: Add tests for 'release' feature in sysdata docs: netconsole: document release feature Documentation/networking/netconsole.rst| 25 drivers/net/netconsole.c | 71 -- .../selftests/drivers/net/netcons_sysdata.sh | 44 +- 3 files changed, 133 insertions(+), 7 deletions(-) --- base-commit: 941defcea7e11ad7ff8f0d4856716dd637d757dd change-id: 20250314-netcons_release-dc1f1f5ca0f7 Best regards, -- Breno Leitao
[PATCH net-next 2/6] netconsole: implement configfs for release_enabled
Implement the configfs helpers to show and set release_enabled configfs directories under userdata. When enabled, set the feature bit in netconsole_target->sysdata_fields. Signed-off-by: Breno Leitao --- drivers/net/netconsole.c | 49 1 file changed, 49 insertions(+) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index b7f7ec39e8318..0914d29b48d8e 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -442,6 +442,19 @@ static ssize_t sysdata_taskname_enabled_show(struct config_item *item, return sysfs_emit(buf, "%d\n", taskname_enabled); } +static ssize_t sysdata_release_enabled_show(struct config_item *item, + char *buf) +{ + struct netconsole_target *nt = to_target(item->ci_parent); + bool release_enabled; + + mutex_lock(&dynamic_netconsole_mutex); + release_enabled = !!(nt->sysdata_fields & SYSDATA_TASKNAME); + mutex_unlock(&dynamic_netconsole_mutex); + + return sysfs_emit(buf, "%d\n", release_enabled); +} + /* * This one is special -- targets created through the configfs interface * are not enabled (and the corresponding netpoll activated) by default. @@ -859,6 +872,40 @@ static void disable_sysdata_feature(struct netconsole_target *nt, nt->extradata_complete[nt->userdata_length] = 0; } +static ssize_t sysdata_release_enabled_store(struct config_item *item, +const char *buf, size_t count) +{ + struct netconsole_target *nt = to_target(item->ci_parent); + bool release_enabled, curr; + ssize_t ret; + + ret = kstrtobool(buf, &release_enabled); + if (ret) + return ret; + + mutex_lock(&dynamic_netconsole_mutex); + curr = !!(nt->sysdata_fields & SYSDATA_RELEASE); + if (release_enabled == curr) + goto unlock_ok; + + if (release_enabled && + count_extradata_entries(nt) >= MAX_EXTRADATA_ITEMS) { + ret = -ENOSPC; + goto unlock; + } + + if (release_enabled) + nt->sysdata_fields |= SYSDATA_RELEASE; + else + disable_sysdata_feature(nt, SYSDATA_RELEASE); + +unlock_ok: + ret = strnlen(buf, count); +unlock: + mutex_unlock(&dynamic_netconsole_mutex); + return ret; +} + static ssize_t sysdata_taskname_enabled_store(struct config_item *item, const char *buf, size_t count) { @@ -939,6 +986,7 @@ static ssize_t sysdata_cpu_nr_enabled_store(struct config_item *item, CONFIGFS_ATTR(userdatum_, value); CONFIGFS_ATTR(sysdata_, cpu_nr_enabled); CONFIGFS_ATTR(sysdata_, taskname_enabled); +CONFIGFS_ATTR(sysdata_, release_enabled); static struct configfs_attribute *userdatum_attrs[] = { &userdatum_attr_value, @@ -1000,6 +1048,7 @@ static void userdatum_drop(struct config_group *group, struct config_item *item) static struct configfs_attribute *userdata_attrs[] = { &sysdata_attr_cpu_nr_enabled, &sysdata_attr_taskname_enabled, + &sysdata_attr_release_enabled, NULL, }; -- 2.47.1
[PATCH net-next 5/6] selftests: netconsole: Add tests for 'release' feature in sysdata
Expands the self-tests to include the 'release' feature in sysdata. Verifies that enabling the 'release' feature appends the correct data and ensures that disabling it functions as expected. When enabled, the message should have an item similar to in the userdata: `release=$(uname -r)` Signed-off-by: Breno Leitao --- .../selftests/drivers/net/netcons_sysdata.sh | 44 -- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh index f351206ed1bda..a737e377bf085 100755 --- a/tools/testing/selftests/drivers/net/netcons_sysdata.sh +++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh @@ -42,6 +42,17 @@ function set_taskname() { echo 1 > "${NETCONS_PATH}/userdata/taskname_enabled" } +# Enable the release to be appended to sysdata +function set_release() { + if [[ ! -f "${NETCONS_PATH}/userdata/release_enabled" ]] + then + echo "Not able to enable release sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/release_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/release_enabled" +} + # Disable the sysdata cpu_nr feature function unset_cpu_nr() { echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled" @@ -52,6 +63,10 @@ function unset_taskname() { echo 0 > "${NETCONS_PATH}/userdata/taskname_enabled" } +function unset_release() { + echo 0 > "${NETCONS_PATH}/userdata/release_enabled" +} + # Test if MSG contains sysdata function validate_sysdata() { # OUTPUT_FILE will contain something like: @@ -93,6 +108,21 @@ function validate_sysdata() { pkill_socat } +function validate_release() { + RELEASE=$(uname -r) + + if [ ! -f "$OUTPUT_FILE" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "release=${RELEASE}" "${OUTPUT_FILE}"; then + echo "FAIL: 'release=${RELEASE}' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi +} + # Test if MSG content exists in OUTPUT_FILE but no `cpu=` and `taskname=` # strings function validate_no_sysdata() { @@ -119,6 +149,12 @@ function validate_no_sysdata() { exit "${ksft_fail}" fi + if grep -q "release=" "${OUTPUT_FILE}"; then + echo "FAIL: 'release= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + rm "${OUTPUT_FILE}" } @@ -169,9 +205,11 @@ MSG="Test #1 from CPU${CPU}" set_cpu_nr # Enable taskname to be appended to sysdata set_taskname +set_release runtest # Make sure the message was received in the dst part # and exit +validate_release validate_sysdata # @@ -184,19 +222,19 @@ OUTPUT_FILE="/tmp/${TARGET}_2" MSG="Test #2 from CPU${CPU}" set_user_data runtest +validate_release validate_sysdata # === # TEST #3 -# Unset cpu_nr, so, no CPU should be appended. -# userdata is still set +# Unset all sysdata, fail if any userdata is set # === CPU=$((RANDOM % $(nproc))) OUTPUT_FILE="/tmp/${TARGET}_3" MSG="Test #3 from CPU${CPU}" -# Enable the auto population of cpu_nr unset_cpu_nr unset_taskname +unset_release runtest # At this time, cpu= shouldn't be present in the msg validate_no_sysdata -- 2.47.1
[PATCH net-next 6/6] docs: netconsole: document release feature
Add documentation explaining the kernel release auto-population feature in netconsole. This feature appends kernel version information to the userdata dictionary in every message sent when enabled via the `release_enabled` file in the configfs hierarchy. Signed-off-by: Breno Leitao --- Documentation/networking/netconsole.rst | 25 + 1 file changed, 25 insertions(+) diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst index ae82a6337a8d8..44f5a441cf813 100644 --- a/Documentation/networking/netconsole.rst +++ b/Documentation/networking/netconsole.rst @@ -268,6 +268,31 @@ Example:: In this example, the message was generated while "echo" was the current scheduled process. +Kernel release auto population in userdata +-- + +Within the netconsole configfs hierarchy, there is a file named `release_enabled` +located in the `userdata` directory. This file controls the kernel release +(version) auto-population feature, which appends the kernel release information +to userdata dictionary in every message sent. + +To enable the release auto-population:: + + echo 1 > /sys/kernel/config/netconsole/target1/userdata/release_enabled + +Example:: + + echo "This is a message" > /dev/kmsg + 12,607,22085407756,-;This is a message + release=6.14.0-rc6-01219-g3c027fbd941d + +.. note:: + + This feature provides the same data as the "release prepend" feature. + However, in this case, the release information is appended to the userdata + dictionary rather than being included in the message header. + + CPU number auto population in userdata -- -- 2.47.1
[PATCH v12 22/28] riscv: enable kernel access to shadow stack memory via FWFT sbi call
Kernel will have to perform shadow stack operations on user shadow stack. Like during signal delivery and sigreturn, shadow stack token must be created and validated respectively. Thus shadow stack access for kernel must be enabled. In future when kernel shadow stacks are enabled for linux kernel, it must be enabled as early as possible for better coverage and prevent imbalance between regular stack and shadow stack. After `relocate_enable_mmu` has been done, this is as early as possible it can enabled. Reviewed-by: Zong Li Signed-off-by: Deepak Gupta --- arch/riscv/kernel/asm-offsets.c | 4 arch/riscv/kernel/head.S| 12 2 files changed, 16 insertions(+) diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c index 0c188aaf3925..21f99d5757b6 100644 --- a/arch/riscv/kernel/asm-offsets.c +++ b/arch/riscv/kernel/asm-offsets.c @@ -515,4 +515,8 @@ void asm_offsets(void) DEFINE(FREGS_A6,offsetof(struct __arch_ftrace_regs, a6)); DEFINE(FREGS_A7,offsetof(struct __arch_ftrace_regs, a7)); #endif + DEFINE(SBI_EXT_FWFT, SBI_EXT_FWFT); + DEFINE(SBI_EXT_FWFT_SET, SBI_EXT_FWFT_SET); + DEFINE(SBI_FWFT_SHADOW_STACK, SBI_FWFT_SHADOW_STACK); + DEFINE(SBI_FWFT_SET_FLAG_LOCK, SBI_FWFT_SET_FLAG_LOCK); } diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 356d5397b2a2..6244408ca917 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -164,6 +164,12 @@ secondary_start_sbi: call relocate_enable_mmu #endif call .Lsetup_trap_vector + li a7, SBI_EXT_FWFT + li a6, SBI_EXT_FWFT_SET + li a0, SBI_FWFT_SHADOW_STACK + li a1, 1 /* enable supervisor to access shadow stack access */ + li a2, SBI_FWFT_SET_FLAG_LOCK + ecall scs_load_current call smp_callin #endif /* CONFIG_SMP */ @@ -320,6 +326,12 @@ SYM_CODE_START(_start_kernel) la tp, init_task la sp, init_thread_union + THREAD_SIZE addi sp, sp, -PT_SIZE_ON_STACK + li a7, SBI_EXT_FWFT + li a6, SBI_EXT_FWFT_SET + li a0, SBI_FWFT_SHADOW_STACK + li a1, 1 /* enable supervisor to access shadow stack access */ + li a2, SBI_FWFT_SET_FLAG_LOCK + ecall scs_load_current #ifdef CONFIG_KASAN -- 2.34.1
[PATCH v12 03/28] riscv: zicfiss / zicfilp enumeration
This patch adds support for detecting zicfiss and zicfilp. zicfiss and zicfilp stands for unprivleged integer spec extension for shadow stack and branch tracking on indirect branches, respectively. This patch looks for zicfiss and zicfilp in device tree and accordinlgy lights up bit in cpu feature bitmap. Furthermore this patch adds detection utility functions to return whether shadow stack or landing pads are supported by cpu. Reviewed-by: Zong Li Signed-off-by: Deepak Gupta --- arch/riscv/include/asm/cpufeature.h | 13 + arch/riscv/include/asm/hwcap.h | 2 ++ arch/riscv/include/asm/processor.h | 1 + arch/riscv/kernel/cpufeature.c | 13 + 4 files changed, 29 insertions(+) diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h index 569140d6e639..69007b8100ca 100644 --- a/arch/riscv/include/asm/cpufeature.h +++ b/arch/riscv/include/asm/cpufeature.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -137,4 +138,16 @@ static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsi return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); } +static inline bool cpu_supports_shadow_stack(void) +{ + return (IS_ENABLED(CONFIG_RISCV_USER_CFI) && + riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICFISS)); +} + +static inline bool cpu_supports_indirect_br_lp_instr(void) +{ + return (IS_ENABLED(CONFIG_RISCV_USER_CFI) && + riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICFILP)); +} + #endif diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 869da082252a..2dc4232bdb3e 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -100,6 +100,8 @@ #define RISCV_ISA_EXT_ZICCRSE 91 #define RISCV_ISA_EXT_SVADE92 #define RISCV_ISA_EXT_SVADU93 +#define RISCV_ISA_EXT_ZICFILP 94 +#define RISCV_ISA_EXT_ZICFISS 95 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 5f56eb9d114a..e3aba3336e63 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -13,6 +13,7 @@ #include #include +#include #define arch_get_mmap_end(addr, len, flags)\ ({ \ diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index c6ba750536c3..82065cc55822 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -150,6 +150,15 @@ static int riscv_ext_svadu_validate(const struct riscv_isa_ext_data *data, return 0; } +static int riscv_cfi_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + if (!IS_ENABLED(CONFIG_RISCV_USER_CFI)) + return -EINVAL; + + return 0; +} + static const unsigned int riscv_zk_bundled_exts[] = { RISCV_ISA_EXT_ZBKB, RISCV_ISA_EXT_ZBKC, @@ -333,6 +342,10 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, riscv_ext_zicboz_validate), __RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicfilp, RISCV_ISA_EXT_ZICFILP, riscv_xlinuxenvcfg_exts, + riscv_cfi_validate), + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicfiss, RISCV_ISA_EXT_ZICFISS, riscv_xlinuxenvcfg_exts, + riscv_cfi_validate), __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), __RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND), __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), -- 2.34.1
[PATCH v12 19/28] riscv/ptrace: riscv cfi status and state via ptrace and in core files
Expose a new register type NT_RISCV_USER_CFI for risc-v cfi status and state. Intentionally both landing pad and shadow stack status and state are rolled into cfi state. Creating two different NT_RISCV_USER_XXX would not be useful and wastage of a note type. Enabling or disabling of feature is not allowed via ptrace set interface. However setting `elp` state or setting shadow stack pointer are allowed via ptrace set interface. It is expected `gdb` might have use to fixup `elp` state or `shadow stack` pointer. Signed-off-by: Deepak Gupta --- arch/riscv/include/uapi/asm/ptrace.h | 18 arch/riscv/kernel/ptrace.c | 83 include/uapi/linux/elf.h | 1 + 3 files changed, 102 insertions(+) diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h index 659ea3af5680..e6571fba8a8a 100644 --- a/arch/riscv/include/uapi/asm/ptrace.h +++ b/arch/riscv/include/uapi/asm/ptrace.h @@ -131,6 +131,24 @@ struct __sc_riscv_cfi_state { unsigned long ss_ptr; /* shadow stack pointer */ }; +struct __cfi_status { + /* indirect branch tracking state */ + __u64 lp_en : 1; + __u64 lp_lock : 1; + __u64 elp_state : 1; + + /* shadow stack status */ + __u64 shstk_en : 1; + __u64 shstk_lock : 1; + + __u64 rsvd : sizeof(__u64) - 5; +}; + +struct user_cfi_state { + struct __cfi_status cfi_status; + __u64 shstk_ptr; +}; + #endif /* __ASSEMBLY__ */ #endif /* _UAPI_ASM_RISCV_PTRACE_H */ diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index ea67e9fb7a58..df8b7c6ab671 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -19,6 +19,7 @@ #include #include #include +#include enum riscv_regset { REGSET_X, @@ -31,6 +32,9 @@ enum riscv_regset { #ifdef CONFIG_RISCV_ISA_SUPM REGSET_TAGGED_ADDR_CTRL, #endif +#ifdef CONFIG_RISCV_USER_CFI + REGSET_CFI, +#endif }; static int riscv_gpr_get(struct task_struct *target, @@ -184,6 +188,75 @@ static int tagged_addr_ctrl_set(struct task_struct *target, } #endif +#ifdef CONFIG_RISCV_USER_CFI +static int riscv_cfi_get(struct task_struct *target, +const struct user_regset *regset, +struct membuf to) +{ + struct user_cfi_state user_cfi; + struct pt_regs *regs; + + regs = task_pt_regs(target); + + user_cfi.cfi_status.lp_en = is_indir_lp_enabled(target); + user_cfi.cfi_status.lp_lock = is_indir_lp_locked(target); + user_cfi.cfi_status.elp_state = (regs->status & SR_ELP); + + user_cfi.cfi_status.shstk_en = is_shstk_enabled(target); + user_cfi.cfi_status.shstk_lock = is_shstk_locked(target); + user_cfi.shstk_ptr = get_active_shstk(target); + + return membuf_write(&to, &user_cfi, sizeof(user_cfi)); +} + +/* + * Does it make sense to allowing enable / disable of cfi via ptrace? + * Not allowing enable / disable / locking control via ptrace for now. + * Setting shadow stack pointer is allowed. GDB might use it to unwind or + * some other fixup. Similarly gdb might want to suppress elp and may want + * to reset elp state. + */ +static int riscv_cfi_set(struct task_struct *target, +const struct user_regset *regset, +unsigned int pos, unsigned int count, +const void *kbuf, const void __user *ubuf) +{ + int ret; + struct user_cfi_state user_cfi; + struct pt_regs *regs; + + regs = task_pt_regs(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_cfi, 0, -1); + if (ret) + return ret; + + /* +* Not allowing enabling or locking shadow stack or landing pad +* There is no disabling of shadow stack or landing pad via ptrace +* rsvd field should be set to zero so that if those fields are needed in future +*/ + if (user_cfi.cfi_status.lp_en || user_cfi.cfi_status.lp_lock || + user_cfi.cfi_status.shstk_en || user_cfi.cfi_status.shstk_lock || + !user_cfi.cfi_status.rsvd) + return -EINVAL; + + /* If lpad is enabled on target and ptrace requests to set / clear elp, do that */ + if (is_indir_lp_enabled(target)) { + if (user_cfi.cfi_status.elp_state) /* set elp state */ + regs->status |= SR_ELP; + else + regs->status &= ~SR_ELP; /* clear elp state */ + } + + /* If shadow stack enabled on target, set new shadow stack pointer */ + if (is_shstk_enabled(target)) + set_active_shstk(target, user_cfi.shstk_ptr); + + return 0; +} +#endif + static const struct user_regset riscv_user_regset[] = { [REGSET_X] = { .core_note_type = NT_PRSTATUS, @@ -224,6 +297,16 @@ static const struct user_regset riscv_user_regset[] = {
[PATCH 0/4] ntsync: some small fixes for doc and selftests
There are four small fixes for ntsync test and doc. I divided these into four different patches due to different types of errors. If one patch is better, I can do it too. Su Hui (4): selftests: ntsync: fix the wrong condition in wake_all selftests: ntsync: avoid possible overflow in 32-bit machine selftests: ntsync: update config docs: ntsync: update NTSYNC_IOC_* Documentation/userspace-api/ntsync.rst | 18 +- tools/testing/selftests/drivers/ntsync/config | 2 +- .../testing/selftests/drivers/ntsync/ntsync.c | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) -- 2.30.2
Re: [PATCH v3 02/17] riscv: sbi: add FWFT extension interface
On 13/03/2025 13:39, Andrew Jones wrote: > On Mon, Mar 10, 2025 at 04:12:09PM +0100, Clément Léger wrote: >> This SBI extensions enables supervisor mode to control feature that are >> under M-mode control (For instance, Svadu menvcfg ADUE bit, Ssdbltrp >> DTE, etc). >> >> Signed-off-by: Clément Léger >> --- >> arch/riscv/include/asm/sbi.h | 5 ++ >> arch/riscv/kernel/sbi.c | 97 >> 2 files changed, 102 insertions(+) >> >> diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h >> index bb077d0c912f..fc87c609c11a 100644 >> --- a/arch/riscv/include/asm/sbi.h >> +++ b/arch/riscv/include/asm/sbi.h >> @@ -503,6 +503,11 @@ int sbi_remote_hfence_vvma_asid(const struct cpumask >> *cpu_mask, >> unsigned long asid); >> long sbi_probe_extension(int ext); >> >> +int sbi_fwft_all_cpus_set(u32 feature, unsigned long value, unsigned long >> flags, >> + bool revert_on_failure); >> +int sbi_fwft_get(u32 feature, unsigned long *value); >> +int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags); >> + >> /* Check if current SBI specification version is 0.1 or not */ >> static inline int sbi_spec_is_0_1(void) >> { >> diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c >> index 1989b8cade1b..256910db1307 100644 >> --- a/arch/riscv/kernel/sbi.c >> +++ b/arch/riscv/kernel/sbi.c >> @@ -299,6 +299,103 @@ static int __sbi_rfence_v02(int fid, const struct >> cpumask *cpu_mask, >> return 0; >> } >> >> +int sbi_fwft_get(u32 feature, unsigned long *value) >> +{ >> +return -EOPNOTSUPP; >> +} >> + >> +/** >> + * sbi_fwft_set() - Set a feature on all online cpus > > copy+paste of description from sbi_fwft_all_cpus_set(). This function > only sets the feature on the calling hart. > >> + * @feature: The feature to be set >> + * @value: The feature value to be set >> + * @flags: FWFT feature set flags >> + * >> + * Return: 0 on success, appropriate linux error code otherwise. >> + */ >> +int sbi_fwft_set(u32 feature, unsigned long value, unsigned long flags) >> +{ >> +return -EOPNOTSUPP; >> +} >> + >> +struct fwft_set_req { >> +u32 feature; >> +unsigned long value; >> +unsigned long flags; >> +cpumask_t mask; >> +}; >> + >> +static void cpu_sbi_fwft_set(void *arg) >> +{ >> +struct fwft_set_req *req = arg; >> + >> +if (sbi_fwft_set(req->feature, req->value, req->flags)) >> +cpumask_clear_cpu(smp_processor_id(), &req->mask); >> +} >> + >> +static int sbi_fwft_feature_local_set(u32 feature, unsigned long value, >> + unsigned long flags, >> + bool revert_on_fail) >> +{ >> +int ret; >> +unsigned long prev_value; >> +cpumask_t tmp; >> +struct fwft_set_req req = { >> +.feature = feature, >> +.value = value, >> +.flags = flags, >> +}; >> + >> +cpumask_copy(&req.mask, cpu_online_mask); >> + >> +/* We can not revert if features are locked */ >> +if (revert_on_fail && flags & SBI_FWFT_SET_FLAG_LOCK) > > Should use () around the flags &. I thought checkpatch complained about > that? > >> +return -EINVAL; >> + >> +/* Reset value is the same for all cpus, read it once. */ > > How do we know we're reading the reset value? sbi_fwft_all_cpus_set() may > be called multiple times on the same feature. And harts may have had > sbi_fwft_set() called on them independently. I think we should drop the > whole prev_value optimization. That's actually used for revert_on_failure as well not only the optimization. > >> +ret = sbi_fwft_get(feature, &prev_value); >> +if (ret) >> +return ret; >> + >> +/* Feature might already be set to the value we want */ >> +if (prev_value == value) >> +return 0; >> + >> +on_each_cpu_mask(&req.mask, cpu_sbi_fwft_set, &req, 1); >> +if (cpumask_equal(&req.mask, cpu_online_mask)) >> +return 0; >> + >> +pr_err("Failed to set feature %x for all online cpus, reverting\n", >> + feature); > > nit: I'd let the above line stick out. We have 100 chars. > >> + >> +req.value = prev_value; >> +cpumask_copy(&tmp, &req.mask); >> +on_each_cpu_mask(&req.mask, cpu_sbi_fwft_set, &req, 1); >> +if (cpumask_equal(&req.mask, &tmp)) >> +return 0; > > I'm not sure we want the revert_on_fail support either. What happens when > the revert fails and we return -EINVAL below? Also returning zero when > revert succeeds means the caller won't know if we successfully set what > we wanted or just successfully reverted. So that might actually be needed for features that needs to be enabled on all hart or not enabled at all. If we fail to enable all of them, them the hart will be in some non coherent state between the harts. The returned error code though is wrong and I'm not sure we would have a way to gracefully h
Re: [PATCH v3 05/17] riscv: misaligned: use on_each_cpu() for scalar misaligned access probing
On 13/03/2025 13:57, Andrew Jones wrote: > On Mon, Mar 10, 2025 at 04:12:12PM +0100, Clément Léger wrote: >> schedule_on_each_cpu() was used without any good reason while documented >> as very slow. This call was in the boot path, so better use >> on_each_cpu() for scalar misaligned checking. Vector misaligned check >> still needs to use schedule_on_each_cpu() since it requires irqs to be >> enabled but that's less of a problem since this code is ran in a kthread. >> Add a comment to explicit that. >> >> Signed-off-by: Clément Léger >> --- >> arch/riscv/kernel/traps_misaligned.c | 9 +++-- >> 1 file changed, 7 insertions(+), 2 deletions(-) >> >> diff --git a/arch/riscv/kernel/traps_misaligned.c >> b/arch/riscv/kernel/traps_misaligned.c >> index 90ac74191357..ffac424faa88 100644 >> --- a/arch/riscv/kernel/traps_misaligned.c >> +++ b/arch/riscv/kernel/traps_misaligned.c >> @@ -616,6 +616,11 @@ bool >> check_vector_unaligned_access_emulated_all_cpus(void) >> return false; >> } >> >> +/* >> + * While being documented as very slow, schedule_on_each_cpu() is used >> + * since kernel_vector_begin() expects irqs to be enabled or it will >> panic(). > > which expects Hum that would yield the following: "schedule_on_each_cpu() is used since kernel_vector_begin() that is called inside the vector code 'which' expects irqs to be enabled or it will panic()." which seems wrong as well. I guess something like this would be better: "While being documented as very slow, schedule_on_each_cpu() is used since kernel_vector_begin() expects irqs to be enabled or it will panic()" Thanks, Clément > >> + */ >> schedule_on_each_cpu(check_vector_unaligned_access_emulated); >> >> for_each_online_cpu(cpu) >> @@ -636,7 +641,7 @@ bool >> check_vector_unaligned_access_emulated_all_cpus(void) >> >> static bool unaligned_ctl __read_mostly; >> >> -static void check_unaligned_access_emulated(struct work_struct *work >> __always_unused) >> +static void check_unaligned_access_emulated(void *arg __always_unused) >> { >> int cpu = smp_processor_id(); >> long *mas_ptr = per_cpu_ptr(&misaligned_access_speed, cpu); >> @@ -677,7 +682,7 @@ bool check_unaligned_access_emulated_all_cpus(void) >> * accesses emulated since tasks requesting such control can run on any >> * CPU. >> */ >> -schedule_on_each_cpu(check_unaligned_access_emulated); >> +on_each_cpu(check_unaligned_access_emulated, NULL, 1); >> >> for_each_online_cpu(cpu) >> if (per_cpu(misaligned_access_speed, cpu) >> -- >> 2.47.2 >> > > Reviewed-by: Andrew Jones
Re: [PATCH v3 06/17] riscv: misaligned: use correct CONFIG_ ifdef for misaligned_access_speed
On 13/03/2025 14:06, Andrew Jones wrote: > On Mon, Mar 10, 2025 at 04:12:13PM +0100, Clément Léger wrote: >> misaligned_access_speed is defined under CONFIG_RISCV_SCALAR_MISALIGNED >> but was used under CONFIG_RISCV_PROBE_UNALIGNED_ACCESS. Fix that by >> using the correct config option. >> >> Signed-off-by: Clément Léger >> --- >> arch/riscv/kernel/traps_misaligned.c | 2 +- >> 1 file changed, 1 insertion(+), 1 deletion(-) >> >> diff --git a/arch/riscv/kernel/traps_misaligned.c >> b/arch/riscv/kernel/traps_misaligned.c >> index ffac424faa88..7fe25adf2539 100644 >> --- a/arch/riscv/kernel/traps_misaligned.c >> +++ b/arch/riscv/kernel/traps_misaligned.c >> @@ -362,7 +362,7 @@ static int handle_scalar_misaligned_load(struct pt_regs >> *regs) >> >> perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); >> >> -#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS >> +#ifdef CONFIG_RISCV_SCALAR_MISALIGNED >> *this_cpu_ptr(&misaligned_access_speed) = >> RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; >> #endif > > Sure, but CONFIG_RISCV_PROBE_UNALIGNED_ACCESS selects > CONFIG_RISCV_SCALAR_MISALIGNED, so this isn't fixing anything. Indeed, that is not fixing anything (hence no Fixes tag), it compiles as a side effect of Kconfig dependencies. > Changing it > does make sense though since this line in handle_scalar_misaligned_load() > "belongs" to check_unaligned_access_emulated() which is also under > CONFIG_RISCV_SCALAR_MISALIGNED. Anyway, all this unaligned configs need a > major cleanup. Yes, as I said, I'd be advocating to remove all that ifdefery mess. Thanks, Clément > > > Reviewed-by: Andrew Jones > > Thanks, > drew > >> >> -- >> 2.47.2 >> >> >> -- >> kvm-riscv mailing list >> kvm-ri...@lists.infradead.org >> http://lists.infradead.org/mailman/listinfo/kvm-riscv
Re: [PATCH v11 06/27] riscv/mm : ensure PROT_WRITE leads to VM_READ | VM_WRITE
On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta wrote: > > `arch_calc_vm_prot_bits` is implemented on risc-v to return VM_READ | > VM_WRITE if PROT_WRITE is specified. Similarly `riscv_sys_mmap` is > updated to convert all incoming PROT_WRITE to (PROT_WRITE | PROT_READ). > This is to make sure that any existing apps using PROT_WRITE still work. > > Earlier `protection_map[VM_WRITE]` used to pick read-write PTE encodings. > Now `protection_map[VM_WRITE]` will always pick PAGE_SHADOWSTACK PTE > encodings for shadow stack. Above changes ensure that existing apps > continue to work because underneath kernel will be picking > `protection_map[VM_WRITE|VM_READ]` PTE encodings. > > Signed-off-by: Deepak Gupta > --- > arch/riscv/include/asm/mman.h| 25 + > arch/riscv/include/asm/pgtable.h | 1 + > arch/riscv/kernel/sys_riscv.c| 10 ++ > arch/riscv/mm/init.c | 2 +- > 4 files changed, 37 insertions(+), 1 deletion(-) > > diff --git a/arch/riscv/include/asm/mman.h b/arch/riscv/include/asm/mman.h > new file mode 100644 > index ..392c9c2d2e78 > --- /dev/null > +++ b/arch/riscv/include/asm/mman.h > @@ -0,0 +1,25 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef __ASM_MMAN_H__ > +#define __ASM_MMAN_H__ > + > +#include > +#include > +#include > + > +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, > + unsigned long pkey > __always_unused) > +{ > + unsigned long ret = 0; > + > + /* > +* If PROT_WRITE was specified, force it to VM_READ | VM_WRITE. > +* Only VM_WRITE means shadow stack. > +*/ > + if (prot & PROT_WRITE) > + ret = (VM_READ | VM_WRITE); > + return ret; > +} > + > +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) > + > +#endif /* ! __ASM_MMAN_H__ */ > diff --git a/arch/riscv/include/asm/pgtable.h > b/arch/riscv/include/asm/pgtable.h > index 050fdc49b5ad..8c528cd7347a 100644 > --- a/arch/riscv/include/asm/pgtable.h > +++ b/arch/riscv/include/asm/pgtable.h > @@ -178,6 +178,7 @@ extern struct pt_alloc_ops pt_ops __meminitdata; > #define PAGE_READ_EXEC __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC) > #define PAGE_WRITE_EXEC__pgprot(_PAGE_BASE | _PAGE_READ | > \ > _PAGE_EXEC | _PAGE_WRITE) > +#define PAGE_SHADOWSTACK __pgprot(_PAGE_BASE | _PAGE_WRITE) > > #define PAGE_COPY PAGE_READ > #define PAGE_COPY_EXEC PAGE_READ_EXEC > diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c > index d77afe05578f..43a448bf254b 100644 > --- a/arch/riscv/kernel/sys_riscv.c > +++ b/arch/riscv/kernel/sys_riscv.c > @@ -7,6 +7,7 @@ > > #include > #include > +#include > > static long riscv_sys_mmap(unsigned long addr, unsigned long len, >unsigned long prot, unsigned long flags, > @@ -16,6 +17,15 @@ static long riscv_sys_mmap(unsigned long addr, unsigned > long len, > if (unlikely(offset & (~PAGE_MASK >> page_shift_offset))) > return -EINVAL; > > + /* > +* If PROT_WRITE is specified then extend that to PROT_READ > +* protection_map[VM_WRITE] is now going to select shadow stack > encodings. > +* So specifying PROT_WRITE actually should select protection_map > [VM_WRITE | VM_READ] > +* If user wants to create shadow stack then they should use > `map_shadow_stack` syscall. > +*/ > + if (unlikely((prot & PROT_WRITE) && !(prot & PROT_READ))) > + prot |= PROT_READ; > + > return ksys_mmap_pgoff(addr, len, prot, flags, fd, >offset >> (PAGE_SHIFT - page_shift_offset)); > } > diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c > index 15b2eda4c364..9d6661638d0b 100644 > --- a/arch/riscv/mm/init.c > +++ b/arch/riscv/mm/init.c > @@ -342,7 +342,7 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata > __aligned(PAGE_SIZE); > static const pgprot_t protection_map[16] = { > [VM_NONE] = PAGE_NONE, > [VM_READ] = PAGE_READ, > - [VM_WRITE] = PAGE_COPY, > + [VM_WRITE] = PAGE_SHADOWSTACK, > [VM_WRITE | VM_READ]= PAGE_COPY, > [VM_EXEC] = PAGE_EXEC, > [VM_EXEC | VM_READ] = PAGE_READ_EXEC, > LGTM. Reviewed-by: Zong Li > -- > 2.34.1 > > > ___ > linux-riscv mailing list > linux-ri...@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
Re: [PATCH v11 23/27] arch/riscv: compile vdso with landing pad
On Mon, Mar 10, 2025 at 11:44 PM Deepak Gupta wrote: > > From: Jim Shu > > user mode tasks compiled with zicfilp may call indirectly into vdso (like > hwprobe indirect calls). Add landing pad compile support in vdso. vdso > with landing pad in it will be nop for tasks which have not enabled > landing pad. > This patch allows to run user mode tasks with cfi eanbled and do no harm. > > Future work can be done on this to do below > - labeled landing pad on vdso functions (whenever labeling support shows >up in gnu-toolchain) > - emit shadow stack instructions only in vdso compiled objects as part of >kernel compile. > > Signed-off-by: Jim Shu > Signed-off-by: Deepak Gupta > --- > arch/riscv/Makefile | 7 +- > arch/riscv/include/asm/assembler.h| 44 > +++ > arch/riscv/kernel/vdso/Makefile | 12 ++ > arch/riscv/kernel/vdso/flush_icache.S | 4 > arch/riscv/kernel/vdso/getcpu.S | 4 > arch/riscv/kernel/vdso/rt_sigreturn.S | 4 > arch/riscv/kernel/vdso/sys_hwprobe.S | 4 > 7 files changed, 78 insertions(+), 1 deletion(-) > > diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile > index 13fbc0f94238..ea9468af2cb4 100644 > --- a/arch/riscv/Makefile > +++ b/arch/riscv/Makefile > @@ -87,10 +87,15 @@ riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := > $(riscv-march-y)_zacas > > # Check if the toolchain supports Zabha > riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZABHA) := $(riscv-march-y)_zabha > +# Check if the toolchain supports Zihintpause extension > +riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE) := > $(riscv-march-y)_zihintpause I think we don't need this, it has removed by the '6da111574baf ("riscv: Provide a definition for 'pause'")'. Apart from that, this patch looks good to me. Reviewed-by: Zong Li > + > +KBUILD_BASE_ISA = -march=$(shell echo $(riscv-march-y) | sed -E > 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') > +export KBUILD_BASE_ISA > > # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" > by > # matching non-v and non-multi-letter extensions out with the filter ([^v_]*) > -KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E > 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/') > +KBUILD_CFLAGS += $(KBUILD_BASE_ISA) > > KBUILD_AFLAGS += -march=$(riscv-march-y) > > diff --git a/arch/riscv/include/asm/assembler.h > b/arch/riscv/include/asm/assembler.h > index 44b1457d3e95..a058ea5e9c58 100644 > --- a/arch/riscv/include/asm/assembler.h > +++ b/arch/riscv/include/asm/assembler.h > @@ -80,3 +80,47 @@ > .endm > > #endif /* __ASM_ASSEMBLER_H */ > + > +#if defined(CONFIG_RISCV_USER_CFI) && (__riscv_xlen == 64) > +.macro vdso_lpad > +lpad 0 > +.endm > +#else > +.macro vdso_lpad > +.endm > +#endif > + > +/* > + * This macro emits a program property note section identifying > + * architecture features which require special handling, mainly for > + * use in assembly files included in the VDSO. > + */ > +#define NT_GNU_PROPERTY_TYPE_0 5 > +#define GNU_PROPERTY_RISCV_FEATURE_1_AND 0xc000 > + > +#define GNU_PROPERTY_RISCV_FEATURE_1_ZICFILP (1U << 0) > +#define GNU_PROPERTY_RISCV_FEATURE_1_ZICFISS (1U << 1) > + > +#if defined(CONFIG_RISCV_USER_CFI) && (__riscv_xlen == 64) > +#define GNU_PROPERTY_RISCV_FEATURE_1_DEFAULT \ > + (GNU_PROPERTY_RISCV_FEATURE_1_ZICFILP) > +#endif > + > +#ifdef GNU_PROPERTY_RISCV_FEATURE_1_DEFAULT > +.macro emit_riscv_feature_1_and, feat = GNU_PROPERTY_RISCV_FEATURE_1_DEFAULT > + .pushsection .note.gnu.property, "a" > + .p2align3 > + .word 4 > + .word 16 > + .word NT_GNU_PROPERTY_TYPE_0 > + .asciz "GNU" > + .word GNU_PROPERTY_RISCV_FEATURE_1_AND > + .word 4 > + .word \feat > + .word 0 > + .popsection > +.endm > +#else > +.macro emit_riscv_feature_1_and, feat = 0 > +.endm > +#endif > diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile > index 9a1b555e8733..daa10c2b0dd1 100644 > --- a/arch/riscv/kernel/vdso/Makefile > +++ b/arch/riscv/kernel/vdso/Makefile > @@ -13,12 +13,18 @@ vdso-syms += flush_icache > vdso-syms += hwprobe > vdso-syms += sys_hwprobe > > +ifdef CONFIG_RISCV_USER_CFI > +LPAD_MARCH = _zicfilp > +endif > + > # Files to link into the vdso > obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o > > ccflags-y := -fno-stack-protector > ccflags-y += -DDISABLE_BRANCH_PROFILING > ccflags-y += -fno-builtin > +ccflags-y += $(KBUILD_BASE_ISA)$(LPAD_MARCH) > +asflags-y += $(KBUILD_BASE_ISA)$(LPAD_MARCH) > > ifneq ($(c-gettimeofday-y),) >CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) > @@ -40,6 +46,12 @@ endif > CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) > CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) > > +# Disable profiling and instrumentation for VDSO code > +GCOV_PROFILE := n >
Re: [PATCH v11 27/27] kselftest/riscv: kselftest for user mode cfi
On Tue, Mar 11, 2025 at 1:50 AM Deepak Gupta wrote: > > Adds kselftest for RISC-V control flow integrity implementation for user > mode. There is not a lot going on in kernel for enabling landing pad for > user mode. cfi selftest are intended to be compiled with zicfilp and > zicfiss enabled compiler. Thus kselftest simply checks if landing pad and > shadow stack for the binary and process are enabled or not. selftest then > register a signal handler for SIGSEGV. Any control flow violation are > reported as SIGSEGV with si_code = SEGV_CPERR. Test will fail on receiving > any SEGV_CPERR. Shadow stack part has more changes in kernel and thus there > are separate tests for that > > - Exercise `map_shadow_stack` syscall > - `fork` test to make sure COW works for shadow stack pages > - gup tests > Kernel uses FOLL_FORCE when access happens to memory via > /proc//mem. Not breaking that for shadow stack. > - signal test. Make sure signal delivery results in token creation on > shadow stack and consumes (and verifies) token on sigreturn > - shadow stack protection test. attempts to write using regular store > instruction on shadow stack memory must result in access faults > > Test outut > == > > """ > TAP version 13 > 1..5 > This is to ensure shadow stack is indeed enabled and working > This is to ensure shadow stack is indeed enabled and working > ok 1 shstk fork test > ok 2 map shadow stack syscall > ok 3 shadow stack gup tests > ok 4 shadow stack signal tests > ok 5 memory protections of shadow stack memory > """ > > Signed-off-by: Deepak Gupta > --- > tools/testing/selftests/riscv/Makefile | 2 +- > tools/testing/selftests/riscv/cfi/.gitignore | 3 + > tools/testing/selftests/riscv/cfi/Makefile | 10 + > tools/testing/selftests/riscv/cfi/cfi_rv_test.h| 84 + > tools/testing/selftests/riscv/cfi/riscv_cfi_test.c | 78 + > tools/testing/selftests/riscv/cfi/shadowstack.c| 375 > + > tools/testing/selftests/riscv/cfi/shadowstack.h| 37 ++ > 7 files changed, 588 insertions(+), 1 deletion(-) > > diff --git a/tools/testing/selftests/riscv/Makefile > b/tools/testing/selftests/riscv/Makefile > index 099b8c1f46f8..5671b4405a12 100644 > --- a/tools/testing/selftests/riscv/Makefile > +++ b/tools/testing/selftests/riscv/Makefile > @@ -5,7 +5,7 @@ > ARCH ?= $(shell uname -m 2>/dev/null || echo not) > > ifneq (,$(filter $(ARCH),riscv)) > -RISCV_SUBTARGETS ?= abi hwprobe mm sigreturn vector > +RISCV_SUBTARGETS ?= abi hwprobe mm sigreturn vector cfi > else > RISCV_SUBTARGETS := > endif > diff --git a/tools/testing/selftests/riscv/cfi/.gitignore > b/tools/testing/selftests/riscv/cfi/.gitignore > new file mode 100644 > index ..82545863bac6 > --- /dev/null > +++ b/tools/testing/selftests/riscv/cfi/.gitignore > @@ -0,0 +1,3 @@ > +cfitests > +riscv_cfi_test > +shadowstack > diff --git a/tools/testing/selftests/riscv/cfi/Makefile > b/tools/testing/selftests/riscv/cfi/Makefile > new file mode 100644 > index ..b65f7ff38a32 > --- /dev/null > +++ b/tools/testing/selftests/riscv/cfi/Makefile > @@ -0,0 +1,10 @@ > +CFLAGS += -I$(top_srcdir)/tools/include > + > +CFLAGS += -march=rv64gc_zicfilp_zicfiss > + > +TEST_GEN_PROGS := cfitests > + > +include ../../lib.mk > + > +$(OUTPUT)/cfitests: riscv_cfi_test.c shadowstack.c > + $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^ > diff --git a/tools/testing/selftests/riscv/cfi/cfi_rv_test.h > b/tools/testing/selftests/riscv/cfi/cfi_rv_test.h > new file mode 100644 > index ..a9d5d6f8e29c > --- /dev/null > +++ b/tools/testing/selftests/riscv/cfi/cfi_rv_test.h > @@ -0,0 +1,84 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > + > +#ifndef SELFTEST_RISCV_CFI_H > +#define SELFTEST_RISCV_CFI_H > +#include > +#include > +#include "shadowstack.h" > + > +#define RISCV_CFI_SELFTEST_COUNT RISCV_SHADOW_STACK_TESTS 'RISCV_CFI_SELFTEST_COUNT' doesn't seems to be used anywhere > + > +#define CHILD_EXIT_CODE_SSWRITE10 > +#define CHILD_EXIT_CODE_SIG_TEST 11 > + > +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ > +({ \ > + register long _num __asm__ ("a7") = (num); \ > + register long _arg1 __asm__ ("a0") = (long)(arg1); \ > + register long _arg2 __asm__ ("a1") = (long)(arg2); \ > + register long _arg3 __asm__ ("a2") = (long)(arg3); \ > + register long _arg4 __asm__ ("a3") = (long)(arg4); \ > + register long _arg5 __asm__ ("a4") = (long)(arg5); \ > + \ > + __asm__ volatile( \ > + "ecall\n" \ > + : "+r" \ >
Re: [PATCH v11 03/27] riscv: zicfiss / zicfilp enumeration
On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta wrote: > > This patch adds support for detecting zicfiss and zicfilp. zicfiss and > zicfilp stands for unprivleged integer spec extension for shadow stack > and branch tracking on indirect branches, respectively. > > This patch looks for zicfiss and zicfilp in device tree and accordinlgy > lights up bit in cpu feature bitmap. Furthermore this patch adds detection > utility functions to return whether shadow stack or landing pads are > supported by cpu. > > Signed-off-by: Deepak Gupta > --- > arch/riscv/include/asm/cpufeature.h | 13 + > arch/riscv/include/asm/hwcap.h | 2 ++ > arch/riscv/include/asm/processor.h | 1 + > arch/riscv/kernel/cpufeature.c | 13 + > 4 files changed, 29 insertions(+) > > diff --git a/arch/riscv/include/asm/cpufeature.h > b/arch/riscv/include/asm/cpufeature.h > index 569140d6e639..69007b8100ca 100644 > --- a/arch/riscv/include/asm/cpufeature.h > +++ b/arch/riscv/include/asm/cpufeature.h > @@ -12,6 +12,7 @@ > #include > #include > #include > +#include > #include > #include > > @@ -137,4 +138,16 @@ static __always_inline bool > riscv_cpu_has_extension_unlikely(int cpu, const unsi > return __riscv_isa_extension_available(hart_isa[cpu].isa, ext); > } > > +static inline bool cpu_supports_shadow_stack(void) > +{ > + return (IS_ENABLED(CONFIG_RISCV_USER_CFI) && > + riscv_cpu_has_extension_unlikely(smp_processor_id(), > RISCV_ISA_EXT_ZICFISS)); > +} > + > +static inline bool cpu_supports_indirect_br_lp_instr(void) > +{ > + return (IS_ENABLED(CONFIG_RISCV_USER_CFI) && > + riscv_cpu_has_extension_unlikely(smp_processor_id(), > RISCV_ISA_EXT_ZICFILP)); > +} > + > #endif > diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h > index 869da082252a..2dc4232bdb3e 100644 > --- a/arch/riscv/include/asm/hwcap.h > +++ b/arch/riscv/include/asm/hwcap.h > @@ -100,6 +100,8 @@ > #define RISCV_ISA_EXT_ZICCRSE 91 > #define RISCV_ISA_EXT_SVADE92 > #define RISCV_ISA_EXT_SVADU93 > +#define RISCV_ISA_EXT_ZICFILP 94 > +#define RISCV_ISA_EXT_ZICFISS 95 > > #define RISCV_ISA_EXT_XLINUXENVCFG 127 > > diff --git a/arch/riscv/include/asm/processor.h > b/arch/riscv/include/asm/processor.h > index 5f56eb9d114a..e3aba3336e63 100644 > --- a/arch/riscv/include/asm/processor.h > +++ b/arch/riscv/include/asm/processor.h > @@ -13,6 +13,7 @@ > #include > > #include > +#include > > #define arch_get_mmap_end(addr, len, flags)\ > ({ \ > diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c > index c6ba750536c3..82065cc55822 100644 > --- a/arch/riscv/kernel/cpufeature.c > +++ b/arch/riscv/kernel/cpufeature.c > @@ -150,6 +150,15 @@ static int riscv_ext_svadu_validate(const struct > riscv_isa_ext_data *data, > return 0; > } > > +static int riscv_cfi_validate(const struct riscv_isa_ext_data *data, > + const unsigned long *isa_bitmap) > +{ > + if (!IS_ENABLED(CONFIG_RISCV_USER_CFI)) > + return -EINVAL; > + > + return 0; > +} > + > static const unsigned int riscv_zk_bundled_exts[] = { > RISCV_ISA_EXT_ZBKB, > RISCV_ISA_EXT_ZBKC, > @@ -333,6 +342,10 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { > __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, > riscv_xlinuxenvcfg_exts, > riscv_ext_zicboz_validate), > __RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE), > + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicfilp, RISCV_ISA_EXT_ZICFILP, > riscv_xlinuxenvcfg_exts, > + riscv_cfi_validate), > + __RISCV_ISA_EXT_SUPERSET_VALIDATE(zicfiss, RISCV_ISA_EXT_ZICFISS, > riscv_xlinuxenvcfg_exts, > + riscv_cfi_validate), > __RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR), > __RISCV_ISA_EXT_DATA(zicond, RISCV_ISA_EXT_ZICOND), > __RISCV_ISA_EXT_DATA(zicsr, RISCV_ISA_EXT_ZICSR), > LGTM. Reviewed-by: Zong Li > -- > 2.34.1 > > > ___ > linux-riscv mailing list > linux-ri...@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
Re: [PATCH v11 21/27] riscv: enable kernel access to shadow stack memory via FWFT sbi call
On Mon, Mar 10, 2025 at 11:44 PM Deepak Gupta wrote: > > Kernel will have to perform shadow stack operations on user shadow stack. > Like during signal delivery and sigreturn, shadow stack token must be > created and validated respectively. Thus shadow stack access for kernel > must be enabled. > > In future when kernel shadow stacks are enabled for linux kernel, it must > be enabled as early as possible for better coverage and prevent imbalance > between regular stack and shadow stack. After `relocate_enable_mmu` has > been done, this is as early as possible it can enabled. > > Signed-off-by: Deepak Gupta > --- > arch/riscv/kernel/asm-offsets.c | 4 > arch/riscv/kernel/head.S| 12 > 2 files changed, 16 insertions(+) > > diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c > index 0c188aaf3925..21f99d5757b6 100644 > --- a/arch/riscv/kernel/asm-offsets.c > +++ b/arch/riscv/kernel/asm-offsets.c > @@ -515,4 +515,8 @@ void asm_offsets(void) > DEFINE(FREGS_A6,offsetof(struct __arch_ftrace_regs, a6)); > DEFINE(FREGS_A7,offsetof(struct __arch_ftrace_regs, a7)); > #endif > + DEFINE(SBI_EXT_FWFT, SBI_EXT_FWFT); > + DEFINE(SBI_EXT_FWFT_SET, SBI_EXT_FWFT_SET); > + DEFINE(SBI_FWFT_SHADOW_STACK, SBI_FWFT_SHADOW_STACK); > + DEFINE(SBI_FWFT_SET_FLAG_LOCK, SBI_FWFT_SET_FLAG_LOCK); > } > diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S > index 356d5397b2a2..6244408ca917 100644 > --- a/arch/riscv/kernel/head.S > +++ b/arch/riscv/kernel/head.S > @@ -164,6 +164,12 @@ secondary_start_sbi: > call relocate_enable_mmu > #endif > call .Lsetup_trap_vector > + li a7, SBI_EXT_FWFT > + li a6, SBI_EXT_FWFT_SET > + li a0, SBI_FWFT_SHADOW_STACK > + li a1, 1 /* enable supervisor to access shadow stack access */ > + li a2, SBI_FWFT_SET_FLAG_LOCK > + ecall > scs_load_current > call smp_callin > #endif /* CONFIG_SMP */ > @@ -320,6 +326,12 @@ SYM_CODE_START(_start_kernel) > la tp, init_task > la sp, init_thread_union + THREAD_SIZE > addi sp, sp, -PT_SIZE_ON_STACK > + li a7, SBI_EXT_FWFT > + li a6, SBI_EXT_FWFT_SET > + li a0, SBI_FWFT_SHADOW_STACK > + li a1, 1 /* enable supervisor to access shadow stack access */ > + li a2, SBI_FWFT_SET_FLAG_LOCK > + ecall > scs_load_current > > #ifdef CONFIG_KASAN > LGTM. Reviewed-by: Zong Li > -- > 2.34.1 > > > ___ > linux-riscv mailing list > linux-ri...@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-riscv
Re: [PATCH v3 14/17] RISC-V: KVM: add SBI extension init()/deinit() functions
On 13/03/2025 15:27, Andrew Jones wrote: > On Mon, Mar 10, 2025 at 04:12:21PM +0100, Clément Léger wrote: >> The FWFT SBI extension will need to dynamically allocate memory and do >> init time specific initialization. Add an init/deinit callbacks that >> allows to do so. >> >> Signed-off-by: Clément Léger >> --- >> arch/riscv/include/asm/kvm_vcpu_sbi.h | 9 + >> arch/riscv/kvm/vcpu.c | 2 ++ >> arch/riscv/kvm/vcpu_sbi.c | 29 +++ >> 3 files changed, 40 insertions(+) >> >> diff --git a/arch/riscv/include/asm/kvm_vcpu_sbi.h >> b/arch/riscv/include/asm/kvm_vcpu_sbi.h >> index 4ed6203cdd30..bcb90757b149 100644 >> --- a/arch/riscv/include/asm/kvm_vcpu_sbi.h >> +++ b/arch/riscv/include/asm/kvm_vcpu_sbi.h >> @@ -49,6 +49,14 @@ struct kvm_vcpu_sbi_extension { >> >> /* Extension specific probe function */ >> unsigned long (*probe)(struct kvm_vcpu *vcpu); >> + >> +/* >> + * Init/deinit function called once during VCPU init/destroy. These >> + * might be use if the SBI extensions need to allocate or do specific >> + * init time only configuration. >> + */ >> +int (*init)(struct kvm_vcpu *vcpu); >> +void (*deinit)(struct kvm_vcpu *vcpu); >> }; >> >> void kvm_riscv_vcpu_sbi_forward(struct kvm_vcpu *vcpu, struct kvm_run *run); >> @@ -69,6 +77,7 @@ const struct kvm_vcpu_sbi_extension *kvm_vcpu_sbi_find_ext( >> bool riscv_vcpu_supports_sbi_ext(struct kvm_vcpu *vcpu, int idx); >> int kvm_riscv_vcpu_sbi_ecall(struct kvm_vcpu *vcpu, struct kvm_run *run); >> void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu); >> +void kvm_riscv_vcpu_sbi_deinit(struct kvm_vcpu *vcpu); >> >> int kvm_riscv_vcpu_get_reg_sbi_sta(struct kvm_vcpu *vcpu, unsigned long >> reg_num, >> unsigned long *reg_val); >> diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c >> index 60d684c76c58..877bcc85c067 100644 >> --- a/arch/riscv/kvm/vcpu.c >> +++ b/arch/riscv/kvm/vcpu.c >> @@ -185,6 +185,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) >> >> void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) >> { >> +kvm_riscv_vcpu_sbi_deinit(vcpu); >> + >> /* Cleanup VCPU AIA context */ >> kvm_riscv_vcpu_aia_deinit(vcpu); >> >> diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c >> index d1c83a77735e..858ddefd7e7f 100644 >> --- a/arch/riscv/kvm/vcpu_sbi.c >> +++ b/arch/riscv/kvm/vcpu_sbi.c >> @@ -505,8 +505,37 @@ void kvm_riscv_vcpu_sbi_init(struct kvm_vcpu *vcpu) >> continue; >> } >> >> +if (!ext->default_disabled && ext->init && >> +ext->init(vcpu) != 0) { >> +scontext->ext_status[idx] = >> KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE; >> +continue; >> +} > > I think this new block should be below the assignment below (and it can > drop the continue) and it shouldn't check default_disabled (as I've done > below). IOW, we should always run ext->init when there is one to run here. > Otherwise, I how will it get run later? Ok, i did not saw that there was a possibility to enable the extension at a later time. I'll fix that. Thanks, Clément > >> + >> scontext->ext_status[idx] = ext->default_disabled ? >> KVM_RISCV_SBI_EXT_STATUS_DISABLED : >> KVM_RISCV_SBI_EXT_STATUS_ENABLED; > > if (ext->init && ext->init(vcpu)) > scontext->ext_status[idx] = > KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE; > >> } >> } >> + >> +void kvm_riscv_vcpu_sbi_deinit(struct kvm_vcpu *vcpu) >> +{ >> +struct kvm_vcpu_sbi_context *scontext = &vcpu->arch.sbi_context; >> +const struct kvm_riscv_sbi_extension_entry *entry; >> +const struct kvm_vcpu_sbi_extension *ext; >> +int idx, i; >> + >> +for (i = 0; i < ARRAY_SIZE(sbi_ext); i++) { >> +entry = &sbi_ext[i]; >> +ext = entry->ext_ptr; >> +idx = entry->ext_idx; >> + >> +if (idx < 0 || idx >= ARRAY_SIZE(scontext->ext_status)) >> +continue; >> + >> +if (scontext->ext_status[idx] == >> KVM_RISCV_SBI_EXT_STATUS_UNAVAILABLE || >> +!ext->deinit) >> +continue; >> + >> +ext->deinit(vcpu); >> +} >> +} >> -- >> 2.47.2 >> > > Thanks, > drew
Re: [PATCH v11 13/27] prctl: arch-agnostic prctl for indirect branch tracking
On Fri, Mar 14, 2025 at 04:25:59PM +0800, Zong Li wrote: On Mon, Mar 10, 2025 at 11:42 PM Deepak Gupta wrote: Three architectures (x86, aarch64, riscv) have support for indirect branch tracking feature in a very similar fashion. On a very high level, indirect branch tracking is a CPU feature where CPU tracks branches which uses memory operand to perform control transfer in program. As part of this tracking on indirect branches, CPU goes in a state where it expects a landing pad instr on target and if not found then CPU raises some fault (architecture dependent) x86 landing pad instr - `ENDBRANCH` aarch64 landing pad instr - `BTI` riscv landing instr - `lpad` Given that three major arches have support for indirect branch tracking, This patch makes `prctl` for indirect branch tracking arch agnostic. To allow userspace to enable this feature for itself, following prtcls are defined: - PR_GET_INDIR_BR_LP_STATUS: Gets current configured status for indirect branch tracking. - PR_SET_INDIR_BR_LP_STATUS: Sets a configuration for indirect branch tracking. Following status options are allowed - PR_INDIR_BR_LP_ENABLE: Enables indirect branch tracking on user thread. - PR_INDIR_BR_LP_DISABLE; Disables indirect branch tracking on user thread. - PR_LOCK_INDIR_BR_LP_STATUS: Locks configured status for indirect branch tracking for user thread. Signed-off-by: Deepak Gupta Reviewed-by: Mark Brown --- arch/riscv/include/asm/usercfi.h | 16 - arch/riscv/kernel/entry.S| 2 +- arch/riscv/kernel/process.c | 5 +++ arch/riscv/kernel/usercfi.c | 76 include/linux/cpu.h | 4 +++ include/uapi/linux/prctl.h | 27 ++ kernel/sys.c | 30 7 files changed, 158 insertions(+), 2 deletions(-) diff --git a/arch/riscv/include/asm/usercfi.h b/arch/riscv/include/asm/usercfi.h index c4dcd256f19a..a8cec7c14d1d 100644 --- a/arch/riscv/include/asm/usercfi.h +++ b/arch/riscv/include/asm/usercfi.h @@ -16,7 +16,9 @@ struct kernel_clone_args; struct cfi_status { unsigned long ubcfi_en : 1; /* Enable for backward cfi. */ unsigned long ubcfi_locked : 1; - unsigned long rsvd : ((sizeof(unsigned long) * 8) - 2); + unsigned long ufcfi_en : 1; /* Enable for forward cfi. Note that ELP goes in sstatus */ + unsigned long ufcfi_locked : 1; + unsigned long rsvd : ((sizeof(unsigned long) * 8) - 4); unsigned long user_shdw_stk; /* Current user shadow stack pointer */ unsigned long shdw_stk_base; /* Base address of shadow stack */ unsigned long shdw_stk_size; /* size of shadow stack */ @@ -33,6 +35,10 @@ bool is_shstk_locked(struct task_struct *task); bool is_shstk_allocated(struct task_struct *task); void set_shstk_lock(struct task_struct *task); void set_shstk_status(struct task_struct *task, bool enable); +bool is_indir_lp_enabled(struct task_struct *task); +bool is_indir_lp_locked(struct task_struct *task); +void set_indir_lp_status(struct task_struct *task, bool enable); +void set_indir_lp_lock(struct task_struct *task); #define PR_SHADOW_STACK_SUPPORTED_STATUS_MASK (PR_SHADOW_STACK_ENABLE) @@ -58,6 +64,14 @@ void set_shstk_status(struct task_struct *task, bool enable); #define set_shstk_status(task, enable) +#define is_indir_lp_enabled(task) false + +#define is_indir_lp_locked(task) false + +#define set_indir_lp_status(task, enable) + +#define set_indir_lp_lock(task) + #endif /* CONFIG_RISCV_USER_CFI */ #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 68c99124ea55..00494b54ff4a 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -143,7 +143,7 @@ SYM_CODE_START(handle_exception) * Disable the FPU/Vector to detect illegal usage of floating point * or vector in kernel space. */ - li t0, SR_SUM | SR_FS_VS + li t0, SR_SUM | SR_FS_VS | SR_ELP REG_L s0, TASK_TI_USER_SP(tp) csrrc s1, CSR_STATUS, t0 diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index cd11667593fe..4587201dd81d 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -160,6 +160,11 @@ void start_thread(struct pt_regs *regs, unsigned long pc, set_shstk_status(current, false); set_shstk_base(current, 0, 0); set_active_shstk(current, 0); + /* +* disable indirect branch tracking on exec. +* libc will enable it later via prctl. +*/ + set_indir_lp_status(current, false); In set_indir_lp_status and set_shstk_status, the $senvcfg.LPE and $senvcfg.SSE fields are set. However, if the CPU does not support this CSR, writing to it will trigger an illegal instruction exception. Should we add sanity checks to handle this situation? Thanks hmm. these were two patches. something happened in my workflow and two were s
Re: [PATCH] Documentation: kcsan: fix "Plain Accesses and Data Races" URL in kcsan.rst
Hello, Ignacio Encinas Rubio wrote: > On 12/3/25 23:36, Jonathan Corbet wrote: >> It would be best, of course, to get the memory-model documentation >> properly into our built docs...someday... > > I hadn't thought about this. If this sentiment is shared by the LKMM > people I would be happy to work on this. Has this ever been > proposed/discussed before? > This might be something Jon would like to keep secret, but ... See the message and the thread it belongs at: https://lore.kernel.org/lkml/pine.lnx.4.44l0.1907310947340.1497-100...@iolanthe.rowland.org/ It happened in 2019 responding to Mauro's attempt to conversion of LKMM docs. I haven't see any change in sentiment among LKMM maintainers since. Your way forward would be to keep those .txt files *pure plain text" and to convert them on-the-fly into reST. Of course only if such an effort sounds worthwhile to you. Another approach might be to include those docs literally. Similar approach has applied to Documentation/ atomic_t.txt atomic_bitops.txt memory-barriers.txt Regards, Akira
[PATCH v12 20/28] riscv/hwprobe: zicfilp / zicfiss enumeration in hwprobe
Adding enumeration of zicfilp and zicfiss extensions in hwprobe syscall. Reviewed-by: Zong Li Signed-off-by: Deepak Gupta --- arch/riscv/include/uapi/asm/hwprobe.h | 2 ++ arch/riscv/kernel/sys_hwprobe.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index c3c1cc951cb9..c1b537b50158 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -73,6 +73,8 @@ struct riscv_hwprobe { #defineRISCV_HWPROBE_EXT_ZCMOP (1ULL << 47) #defineRISCV_HWPROBE_EXT_ZAWRS (1ULL << 48) #defineRISCV_HWPROBE_EXT_SUPM (1ULL << 49) +#defineRISCV_HWPROBE_EXT_ZICFILP (1ULL << 50) +#defineRISCV_HWPROBE_EXT_ZICFISS (1ULL << 51) #define RISCV_HWPROBE_KEY_CPUPERF_05 #defineRISCV_HWPROBE_MISALIGNED_UNKNOWN(0 << 0) #defineRISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0) diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index bcd3b816306c..d802ff707913 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -108,6 +108,8 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, EXT_KEY(ZCB); EXT_KEY(ZCMOP); EXT_KEY(ZICBOZ); + EXT_KEY(ZICFILP); + EXT_KEY(ZICFISS); EXT_KEY(ZICOND); EXT_KEY(ZIHINTNTL); EXT_KEY(ZIHINTPAUSE); -- 2.34.1
[PATCH v4 0/2] slab: Introduce kmalloc_obj() and family
Hi, Here's a refresh and update on the kmalloc_obj() API proposal. Please see patch 2 for the specific details. And note that this is obviously not v6.15 material! :) Thanks! -Kees v4: - split __flex_counter() out and add appropriate helpers - add flex array examples to commit log - add "size" details to commit log - add treewide conversion details to commit log - improve treewide Coccinelle scripting - fix documentation typos v3: https://lore.kernel.org/lkml/20240822231324.make.666-k...@kernel.org/ v2: https://lore.kernel.org/lkml/20240807235433.work.317-k...@kernel.org/ v1: https://lore.kernel.org/lkml/20240719192744.work.264-k...@kernel.org/ Kees Cook (2): compiler_types: Introduce __flex_counter() and family slab: Introduce kmalloc_obj() and family Documentation/process/deprecated.rst | 42 +++ include/linux/compiler_types.h | 31 + include/linux/overflow.h | 36 ++ include/linux/slab.h | 170 +++ 4 files changed, 279 insertions(+) -- 2.34.1
[PATCH v4 1/2] compiler_types: Introduce __flex_counter() and family
Introduce __flex_counter() which wraps __builtin_counted_by_ref(), as newly introduced by GCC[1] and Clang[2]. Use of __flex_counter() allows access to the counter member of a struct's flexible array member when it has been annotated with __counted_by(). Introduce typeof_flex_counter(), can_set_flex_counter(), and set_flex_counter() to provide the needed _Generic() wrappers to get sane results out of __flex_counter(). For example, with: struct foo { int counter; short array[] __counted_by(counter); } *p; __flex_counter(p->array) will resolve to: &p->counter typeof_flex_counter(p->array) will resolve to "int". (If p->array was not annotated, it would resolve to "size_t".) can_set_flex_counter(p->array, COUNT) is the same as: COUNT <= type_max(p->counter) && COUNT >= type_min(p->counter) (If p->array was not annotated it would return true since everything fits in size_t.) set_flex_counter(p->array, COUNT) is the same as: p->counter = COUNT; (It is a no-op if p->array is not annotated with __counted_by().) Signed-off-by: Kees Cook --- Cc: Miguel Ojeda Cc: "Gustavo A. R. Silva" Cc: Nathan Chancellor Cc: Peter Zijlstra Cc: Nick Desaulniers Cc: Marco Elver Cc: Przemek Kitszel Cc: linux-harden...@vger.kernel.org --- include/linux/compiler_types.h | 31 + include/linux/overflow.h | 36 ++ 2 files changed, 67 insertions(+) diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 981cc3d7e3aa..8b45ecfad5b1 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -453,6 +453,37 @@ struct ftrace_likely_data { #define __annotated(var, attr) (false) #endif +/* + * Optional: only supported since gcc >= 15, clang >= 19 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html#index-_005f_005fbuiltin_005fcounted_005fby_005fref + * clang: https://github.com/llvm/llvm-project/pull/102549 + */ +#if __has_builtin(__builtin_counted_by_ref) +/** + * __flex_counter() - Get pointer to counter member for the given + *flexible array, if it was annotated with __counted_by() + * @FAM: Pointer to flexible array member of an addressable struct instance + * + * For example, with: + * + * struct foo { + * int counter; + * short array[] __counted_by(counter); + * } *p; + * + * __flex_counter(p->array) will resolve to &p->counter. + * + * Note that Clang may not allow this to be assigned to a separate + * variable; it must be used directly. + * + * If p->array is unannotated, this returns (void *)NULL. + */ +#define __flex_counter(FAM)__builtin_counted_by_ref(FAM) +#else +#define __flex_counter(FAM)((void *)NULL) +#endif + /* * Some versions of gcc do not mark 'asm goto' volatile: * diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 0c7e3dcfe867..e2b81cb5576e 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -440,4 +440,40 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) #define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT)\ _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, }) +/** + * typeof_flex_counter() - Return the type of the counter variable of a given + * flexible array member annotated by __counted_by(). + * @FAM: Pointer to the flexible array member within a given struct. + * + * Returns "size_t" if no annotation exists. + */ +#define typeof_flex_counter(FAM) \ + typeof(_Generic(__flex_counter(FAM),\ + void *: (size_t)0, \ + default: *__flex_counter(FAM))) + +/** can_set_flex_counter() - Check if the counter associated with the given + * flexible array member can represent a value. + * @FAM: Pointer to the flexible array member within a given struct. + * @COUNT: Value to check against the __counted_by annotated @FAM's counter. + */ +#define can_set_flex_counter(FAM, COUNT) \ + (!overflows_type(COUNT, typeof_flex_counter(FAM))) + +/** + * set_flex_counter() - Set the counter associated with the given flexible + * array member that has been annoated by __counted_by(). + * @FAM: Pointer to the flexible array member within a given struct. + * @COUNT: Value to store to the __counted_by annotated @FAM's counter. + * + * This is a no-op if no annotation exists. Count needs to be checked with + * can_set_flex_counter(FAM, COUNT) before using this function. + */ +#define set_flex_counter(FAM, COUNT) \ +({ \ + *_Generic(__flex_counter(FAM), \ + void *: &(size_t){ 0 }, \ +
[PATCH v4 2/2] slab: Introduce kmalloc_obj() and family
Introduce type-aware kmalloc-family helpers to replace the common idioms for single, array, and flexible object allocations: ptr = kmalloc(sizeof(*ptr), gfp); ptr = kmalloc(sizeof(struct some_obj_name), gfp); ptr = kzalloc(sizeof(*ptr), gfp); ptr = kmalloc_array(count, sizeof(*ptr), gfp); ptr = kcalloc(count, sizeof(*ptr), gfp); ptr = kmalloc(struct_size(ptr, flex_member, count), gfp); These become, respectively: kmalloc_obj(ptr, gfp); kmalloc_obj(ptr, gfp); kzalloc_obj(ptr, gfp); kmalloc_objs(ptr, count, gfp); kzalloc_objs(ptr, count, gfp); kmalloc_flex(ptr, flex_member, count, gfp); Beyond the other benefits outlined below, the primary ergonomic benefit is the elimination of type redundancy (and the elimination of potential type mismatches), as the existing kmalloc assignment code pattern must always repeat the variable or the variable type on the right hand side. These each return the assigned value of ptr (which may be NULL on failure). For cases where the total size of the allocation is needed, the kmalloc_obj_sz(), kmalloc_objs_sz(), and kmalloc_flex_sz() family of macros can be used. For example: info->size = struct_size(ptr, flex_member, count); ptr = kmalloc(info->size, gfp); becomes: kmalloc_flex_sz(ptr, flex_member, count, gfp, &info->size); With the *_sz() helpers, it becomes possible to do bounds checking of the final size to make sure no arithmetic overflow has happened that exceeds the storage size of the target size variable. e.g. it was possible before to end up wrapping an allocation size and not noticing, there by allocating too small a size. (Most of Linux's exposure on that particular problem is via newly written code as we already did bulk conversions[1], but we continue to have a steady stream of patches catching additional cases[2] that would just go away with this API.) Internal introspection of the allocated type now becomes possible, allowing for future alignment-aware choices to be made by the allocator and future hardening work that can be type sensitive. For example, adding __alignof(*ptr) as an argument to the internal allocators so that appropriate/efficient alignment choices can be made, or being able to correctly choose per-allocation offset randomization within a bucket that does not break alignment requirements. For the flexible array helpers, the internal use of __flex_counter() allows for automatically setting the counter member of a struct's flexible array member when it has been annotated with __counted_by(), avoiding any missed early size initializations while __counted_by() annotations are added to the kernel. Additionally, this also checks for "too large" allocations based on the type size of the counter variable. For example: if (count > type_max(ptr->flex_count)) fail...; info->size = struct_size(ptr, flex_member, count); ptr = kmalloc(info->size, gfp); ptr->flex_count = count; becomes (n.b. unchanged from earlier example): kmalloc_flex_sz(ptr, flex_member, count, gfp, &info->size); ptr->flex_count = count; Note that manual initialization of the flexible array counter is still required (at some point) after allocation as not all compiler versions support the __counted_by annotation yet. But doing it internally makes sure they cannot be missed when __counted_by _is_ available, meaning that the bounds checker will not trip due to the lack of "early enough" initializations that used to work before enabling the stricter bounds checking. For example: kmalloc_flex(ptr, flex_member, count); fill(ptr->flex, count); ptr->flex_count = count; This works correctly before adding a __counted_by annotation (since nothing is checking ptr->flex accesses against ptr->flex_count). After adding the annotation, the bounds sanitizer would trip during fill() because ptr->flex_count wasn't set yet. But with kmalloc_flex() setting ptr->flex_count internally at allocation time, the existing code works without needing to move the ptr->flex_count assignment before the call to fill(). (This has been a stumbling block for __counted_by adoption.) Replacing all existing simple code patterns found via Coccinelle[3] shows what could be replaced immediately (also saving roughly 2200 lines): 7568 files changed, 16342 insertions(+), 18580 deletions(-) This would take us from 23927 k*alloc assignments to 8378: $ git grep ' = kv\?[mzcv]alloc\(\|_array\)(' | wc -l 23927 $ git reset --hard HEAD^ HEAD is now at 8bccc91e6cdf treewide: kmalloc_obj conversion $ git grep ' = kv\?[mzcv]alloc\(\|_array\)(' | wc -l 8378 This treewide change could be done at the end of the merge window just before -rc1 is released (as is common for treewide changes). Handling this API change in backports to -stable should be possible without much hassle by backporting the __flex_counter() patch and th
Re: [PATCH v4 1/2] compiler_types: Introduce __flex_counter() and family
Hi Kees, On 3/14/25 8:15 PM, Kees Cook wrote: > diff --git a/include/linux/overflow.h b/include/linux/overflow.h > index 0c7e3dcfe867..e2b81cb5576e 100644 > --- a/include/linux/overflow.h > +++ b/include/linux/overflow.h > @@ -440,4 +440,40 @@ static inline size_t __must_check size_sub(size_t > minuend, size_t subtrahend) > #define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \ > _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, }) > > +/** > + * typeof_flex_counter() - Return the type of the counter variable of a given > + * flexible array member annotated by __counted_by(). > + * @FAM: Pointer to the flexible array member within a given struct. > + * > + * Returns "size_t" if no annotation exists. Please use * Returns: instead so that kernel-doc can make a special doc section for it. Same for patch 2/2. > + */ > +#define typeof_flex_counter(FAM) \ > + typeof(_Generic(__flex_counter(FAM),\ > + void *: (size_t)0, \ > + default: *__flex_counter(FAM))) > + > +/** can_set_flex_counter() - Check if the counter associated with the given Needs a newline between /** and the function name, as in set_flex_counter() below. > + * flexible array member can represent a value. > + * @FAM: Pointer to the flexible array member within a given struct. > + * @COUNT: Value to check against the __counted_by annotated @FAM's counter. > + */ > +#define can_set_flex_counter(FAM, COUNT) \ > + (!overflows_type(COUNT, typeof_flex_counter(FAM))) > + > +/** > + * set_flex_counter() - Set the counter associated with the given flexible > + * array member that has been annoated by > __counted_by(). > + * @FAM: Pointer to the flexible array member within a given struct. > + * @COUNT: Value to store to the __counted_by annotated @FAM's counter. > + * > + * This is a no-op if no annotation exists. Count needs to be checked with > + * can_set_flex_counter(FAM, COUNT) before using this function. > + */ > +#define set_flex_counter(FAM, COUNT) \ > +({ \ > + *_Generic(__flex_counter(FAM), \ > + void *: &(size_t){ 0 }, \ > + default: __flex_counter(FAM)) = (COUNT); \ > +}) > + > #endif /* __LINUX_OVERFLOW_H */ -- ~Randy
[PATCH net-next 4/6] netconsole: append release to sysdata
Append the init_utsname()->release to sysdata buffer before sending the message in case the feature is set. Signed-off-by: Breno Leitao --- drivers/net/netconsole.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c index 970dfc3ac9d41..0a7981ef752c7 100644 --- a/drivers/net/netconsole.c +++ b/drivers/net/netconsole.c @@ -1238,6 +1238,14 @@ static int sysdata_append_taskname(struct netconsole_target *nt, int offset) MAX_EXTRADATA_ENTRY_LEN, " taskname=%s\n", current->comm); } + +static int sysdata_append_release(struct netconsole_target *nt, int offset) +{ + return scnprintf(&nt->extradata_complete[offset], +MAX_EXTRADATA_ENTRY_LEN, " release=%s\n", +init_utsname()->release); +} + /* * prepare_extradata - append sysdata at extradata_complete in runtime * @nt: target to send message to @@ -1259,6 +1267,8 @@ static int prepare_extradata(struct netconsole_target *nt) extradata_len += sysdata_append_cpu_nr(nt, extradata_len); if (nt->sysdata_fields & SYSDATA_TASKNAME) extradata_len += sysdata_append_taskname(nt, extradata_len); + if (nt->sysdata_fields & SYSDATA_RELEASE) + extradata_len += sysdata_append_release(nt, extradata_len); WARN_ON_ONCE(extradata_len > MAX_EXTRADATA_ENTRY_LEN * MAX_EXTRADATA_ITEMS); -- 2.47.1