From: Jiang Liu <jiang....@huawei.com> Implement basic lazy save and restore for FPSIMD registers, which only restore FPSIMD state on demand and save FPSIMD state if it has been loaded on to hardware.
Signed-off-by: Jiang Liu <jiang....@huawei.com> Cc: Jiang Liu <liu...@gmail.com> --- arch/arm64/include/asm/fpsimd.h | 17 ++--- arch/arm64/kernel/fpsimd.c | 150 ++++++++++++++++++++++++++++++++++++++-- arch/arm64/kernel/process.c | 4 +- arch/arm64/kernel/signal.c | 13 ++-- arch/arm64/kernel/signal32.c | 13 ++-- 5 files changed, 164 insertions(+), 33 deletions(-) diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 4c2bc80..725b225 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -35,6 +35,7 @@ struct fpsimd_state { __uint128_t vregs[32]; u32 fpsr; u32 fpcr; + bool on_hw; /* soft state: whether loaded onto hw */ }; }; }; @@ -54,21 +55,15 @@ struct fpsimd_state { struct task_struct; -/* Clear FP status register, so it doesn't affect new FP context */ -static inline void fpsimd_init_hw_state(void) -{ - int val = AARCH64_FPCR_DEFAULT_VAL; - - asm ("msr fpcr, %x0\n" - "msr fpsr, xzr\n" - : : "r"(val)); -} - extern void fpsimd_save_state(struct fpsimd_state *state); extern void fpsimd_load_state(struct fpsimd_state *state); - extern void fpsimd_thread_switch(struct task_struct *next); extern void fpsimd_flush_thread(void); +extern void fpsimd_dup_state(struct fpsimd_state *src, + struct fpsimd_state *dst); +extern void fpsimd_save_sigctx(struct fpsimd_state *state); +extern void fpsimd_prepare_sigctx(struct fpsimd_state *ctx); +extern void fpsimd_restore_sigctx(struct fpsimd_state *ctx); #endif diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 12a25e5..2208ba3 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -4,6 +4,8 @@ * Copyright (C) 2012 ARM Ltd. * Author: Catalin Marinas <catalin.mari...@arm.com> * + * Copyright (C) Jiang Liu <jiang....@huawei.com> + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. @@ -22,6 +24,7 @@ #include <linux/sched.h> #include <linux/signal.h> #include <linux/hardirq.h> +#include <linux/jump_label.h> #include <asm/fpsimd.h> #include <asm/cputype.h> @@ -33,13 +36,91 @@ #define FPEXC_IXF (1 << 4) #define FPEXC_IDF (1 << 7) +static struct static_key fpsimd_lazy_mode = STATIC_KEY_INIT_FALSE; + +static inline void fpsimd_set_on_hw(struct fpsimd_state *state) +{ + state->on_hw = true; +} + +static inline void fpsimd_clear_on_hw(struct fpsimd_state *state) +{ + state->on_hw = false; +} + +static inline bool fpsimd_is_on_hw(struct fpsimd_state *state) +{ + return state->on_hw; +} + +/* Clear FP status register, so it doesn't affect new FP context */ +static inline void fpsimd_init_hw_state(void) +{ + int val = AARCH64_FPCR_DEFAULT_VAL; + + asm ("msr fpcr, %x0\n" + "msr fpsr, xzr\n" + : : "r"(val)); +} + +static inline void fpsimd_enable_trap(void) +{ + u32 __val; + + asm volatile ("mrs %0, cpacr_el1\n" + "and %w0, %w0, #0xFFCFFFFF\n" + "msr cpacr_el1, %0" + : "=&r" (__val)); +} + +static inline void fpsimd_disable_trap(void) +{ + u32 __val; + + asm volatile ("mrs %0, cpacr_el1\n" + "orr %w0, %w0, #0x000300000\n" + "msr cpacr_el1, %0" + : "=&r" (__val)); +} + +/* + * If lazy mode is enabled, caller needs to disable preemption + * when calling fpsimd_load_state_lazy() and fpsimd_save_state_lazy(). + */ +static void fpsimd_load_state_lazy(struct fpsimd_state *state) +{ + if (static_key_false(&fpsimd_lazy_mode)) { + fpsimd_clear_on_hw(state); + fpsimd_enable_trap(); + } else { + fpsimd_load_state(state); + } +} + +static void fpsimd_save_state_lazy(struct fpsimd_state *state) +{ + if (static_key_false(&fpsimd_lazy_mode)) { + if (!fpsimd_is_on_hw(state)) + return; + } + + fpsimd_save_state(state); +} + /* * Trapped FP/ASIMD access. */ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) { - /* TODO: implement lazy context saving/restoring */ - WARN_ON(1); + struct fpsimd_state *state = ¤t->thread.fpsimd_state; + + if (static_key_false(&fpsimd_lazy_mode)) { + fpsimd_disable_trap(); + fpsimd_load_state(state); + fpsimd_set_on_hw(state); + } else { + WARN_ON(1); + } } /* @@ -73,9 +154,9 @@ void fpsimd_thread_switch(struct task_struct *next) { /* check if not kernel threads */ if (current->mm) - fpsimd_save_state(¤t->thread.fpsimd_state); + fpsimd_save_state_lazy(¤t->thread.fpsimd_state); if (next->mm) - fpsimd_load_state(&next->thread.fpsimd_state); + fpsimd_load_state_lazy(&next->thread.fpsimd_state); } void fpsimd_flush_thread(void) @@ -87,7 +168,59 @@ void fpsimd_flush_thread(void) #if (AARCH64_FPCR_DEFAULT_VAL != 0) state->fpcr = AARCH64_FPCR_DEFAULT_VAL; #endif - fpsimd_load_state(state); + fpsimd_load_state_lazy(state); + preempt_enable(); +} + +/* + * The 'src' has been copied into 'dst' when it's called , so only need to save + * the FPSIMD registers into 'dst' if 'src' has been loaded on hardware. + */ +void fpsimd_dup_state(struct fpsimd_state *src, struct fpsimd_state *dst) +{ + BUG_ON(src != ¤t->thread.fpsimd_state); + if (static_key_false(&fpsimd_lazy_mode)) { + preempt_disable(); + if (fpsimd_is_on_hw(src)) + fpsimd_save_state(dst); + fpsimd_clear_on_hw(dst); + preempt_enable(); + } else { + fpsimd_save_state(dst); + } +} + +void fpsimd_save_sigctx(struct fpsimd_state *state) +{ + preempt_disable(); + fpsimd_save_state_lazy(state); + preempt_enable(); +} + +/* The old FPSIMD context has been saved into sigframe when it's called. */ +void fpsimd_prepare_sigctx(struct fpsimd_state *ctx) +{ + if (static_key_false(&fpsimd_lazy_mode)) { + preempt_disable(); + if (fpsimd_is_on_hw(ctx)) { + fpsimd_init_hw_state(); + } else { + ctx->fpsr = 0; + ctx->fpcr = AARCH64_FPCR_DEFAULT_VAL; + } + preempt_enable(); + } else { + fpsimd_init_hw_state(); + } +} + +void fpsimd_restore_sigctx(struct fpsimd_state *ctx) +{ + struct fpsimd_state *state = ¤t->thread.fpsimd_state; + + preempt_disable(); + *state = *ctx; + fpsimd_load_state_lazy(state); preempt_enable(); } @@ -103,7 +236,10 @@ void kernel_neon_begin(void) preempt_disable(); if (current->mm) - fpsimd_save_state(¤t->thread.fpsimd_state); + fpsimd_save_state_lazy(¤t->thread.fpsimd_state); + + if (static_key_false(&fpsimd_lazy_mode)) + fpsimd_disable_trap(); fpsimd_init_hw_state(); } @@ -112,7 +248,7 @@ EXPORT_SYMBOL(kernel_neon_begin); void kernel_neon_end(void) { if (current->mm) - fpsimd_load_state(¤t->thread.fpsimd_state); + fpsimd_load_state_lazy(¤t->thread.fpsimd_state); preempt_enable(); } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 7ae8a1f..0176fac 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -195,8 +195,10 @@ void release_thread(struct task_struct *dead_task) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - fpsimd_save_state(¤t->thread.fpsimd_state); + BUG_ON(src != current); *dst = *src; + fpsimd_dup_state(&src->thread.fpsimd_state, &dst->thread.fpsimd_state); + return 0; } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 6d80612..b6fe0d1 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -51,8 +51,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) int err; /* dump the hardware registers to the fpsimd_state structure */ - fpsimd_save_state(fpsimd); - fpsimd_init_hw_state(); + fpsimd_save_sigctx(fpsimd); /* copy the FP and status/control registers */ err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs)); @@ -63,6 +62,9 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) __put_user_error(FPSIMD_MAGIC, &ctx->head.magic, err); __put_user_error(sizeof(struct fpsimd_context), &ctx->head.size, err); + if (!err) + fpsimd_prepare_sigctx(fpsimd); + return err ? -EFAULT : 0; } @@ -87,11 +89,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) __get_user_error(fpsimd.fpcr, &ctx->fpcr, err); /* load the hardware registers from the fpsimd_state structure */ - if (!err) { - preempt_disable(); - fpsimd_load_state(&fpsimd); - preempt_enable(); - } + if (!err) + fpsimd_restore_sigctx(&fpsimd); return err ? -EFAULT : 0; } diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index cb2cb41..8b4cb89 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -247,8 +247,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) * Note that this also saves V16-31, which aren't visible * in AArch32. */ - fpsimd_save_state(fpsimd); - fpsimd_init_hw_state(); + fpsimd_save_sigctx(fpsimd); /* Place structure header on the stack */ __put_user_error(magic, &frame->magic, err); @@ -276,6 +275,9 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) __put_user_error(0, &frame->ufp_exc.fpinst, err); __put_user_error(0, &frame->ufp_exc.fpinst2, err); + if (!err) + fpsimd_prepare_sigctx(fpsimd); + return err ? -EFAULT : 0; } @@ -311,11 +313,8 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) * We don't need to touch the exception register, so * reload the hardware state. */ - if (!err) { - preempt_disable(); - fpsimd_load_state(&fpsimd); - preempt_enable(); - } + if (!err) + fpsimd_restore_sigctx(&fpsimd); return err ? -EFAULT : 0; } -- 1.8.1.2 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/