On 06/06/2019 10:31, Julien Thierry wrote: > When using IRQ priority masking to disable interrupts, in order to deal > with the PSR.I state, local_irq_save() would convert the I bit into a > PMR value (GIC_PRIO_IRQOFF). This resulted in local_irq_restore() > potentially modifying the value of PMR in undesired location due to the > state of PSR.I upon flag saving [1]. > > In an attempt to solve this issue in a less hackish manner, introduce > a bit (GIC_PRIO_IGNORE_PMR) for the PMR values that can represent > whether PSR.I is being used to disable interrupts, in which case it > takes precedence of the status of interrupt masking via PMR. > > GIC_PRIO_IGNORE_PMR is chosen such that (<pmr_value> | > GIC_PRIO_IGNORE_PMR) does not mask more interrupts than <pmr_value> as > some sections (e.g. arch_cpu_idle(), interrupt acknowledge path) > requires PMR not to mask interrupts that could be signaled to the > CPU when using only PSR.I. >
s/GIC_PRIO_IGNORE_PMR/GIC_PRIO_PSR_I_SET/ > [1] https://www.spinics.net/lists/arm-kernel/msg716956.html > > Fixes: commit 4a503217ce37 ("arm64: irqflags: Use ICC_PMR_EL1 for interrupt > masking") > Signed-off-by: Julien Thierry <julien.thie...@arm.com> > Reported-by: Zenghui Yu <yuzeng...@huawei.com> > Cc: Steven Rostedt <rost...@goodmis.org> > Cc: Wei Li <liwei...@huawei.com> > Cc: Catalin Marinas <catalin.mari...@arm.com> > Cc: Will Deacon <will.dea...@arm.com> > Cc: Christoffer Dall <christoffer.d...@arm.com> > Cc: Marc Zyngier <marc.zyng...@arm.com> > Cc: James Morse <james.mo...@arm.com> > Cc: Suzuki K Pouloze <suzuki.poul...@arm.com> > Cc: Oleg Nesterov <o...@redhat.com> > --- > arch/arm64/include/asm/arch_gicv3.h | 4 ++- > arch/arm64/include/asm/daifflags.h | 68 > ++++++++++++++++++++++--------------- > arch/arm64/include/asm/irqflags.h | 67 +++++++++++++++--------------------- > arch/arm64/include/asm/kvm_host.h | 7 ++-- > arch/arm64/include/asm/ptrace.h | 10 ++++-- > arch/arm64/kernel/entry.S | 38 ++++++++++++++++++--- > arch/arm64/kernel/process.c | 2 +- > arch/arm64/kernel/smp.c | 8 +++-- > arch/arm64/kvm/hyp/switch.c | 2 +- > 9 files changed, 123 insertions(+), 83 deletions(-) > > diff --git a/arch/arm64/include/asm/arch_gicv3.h > b/arch/arm64/include/asm/arch_gicv3.h > index 14b41dd..9e991b6 100644 > --- a/arch/arm64/include/asm/arch_gicv3.h > +++ b/arch/arm64/include/asm/arch_gicv3.h > @@ -163,7 +163,9 @@ static inline bool gic_prio_masking_enabled(void) > > static inline void gic_pmr_mask_irqs(void) > { > - BUILD_BUG_ON(GICD_INT_DEF_PRI <= GIC_PRIO_IRQOFF); > + BUILD_BUG_ON(GICD_INT_DEF_PRI < (GIC_PRIO_IRQOFF | > + GIC_PRIO_PSR_I_SET)); > + BUILD_BUG_ON(GICD_INT_DEF_PRI >= GIC_PRIO_IRQON); > gic_write_pmr(GIC_PRIO_IRQOFF); > } > > diff --git a/arch/arm64/include/asm/daifflags.h > b/arch/arm64/include/asm/daifflags.h > index db452aa..f93204f 100644 > --- a/arch/arm64/include/asm/daifflags.h > +++ b/arch/arm64/include/asm/daifflags.h > @@ -18,6 +18,7 @@ > > #include <linux/irqflags.h> > > +#include <asm/arch_gicv3.h> > #include <asm/cpufeature.h> > > #define DAIF_PROCCTX 0 > @@ -32,6 +33,11 @@ static inline void local_daif_mask(void) > : > : > : "memory"); > + > + /* Don't really care for a dsb here, we don't intend to enable IRQs */ > + if (system_uses_irq_prio_masking()) > + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); > + > trace_hardirqs_off(); > } > > @@ -43,7 +49,7 @@ static inline unsigned long local_daif_save(void) > > if (system_uses_irq_prio_masking()) { > /* If IRQs are masked with PMR, reflect it in the flags */ > - if (read_sysreg_s(SYS_ICC_PMR_EL1) <= GIC_PRIO_IRQOFF) > + if (read_sysreg_s(SYS_ICC_PMR_EL1) != GIC_PRIO_IRQON) > flags |= PSR_I_BIT; > } > > @@ -59,36 +65,44 @@ static inline void local_daif_restore(unsigned long flags) > if (!irq_disabled) { > trace_hardirqs_on(); > > - if (system_uses_irq_prio_masking()) > - arch_local_irq_enable(); > - } else if (!(flags & PSR_A_BIT)) { > - /* > - * If interrupts are disabled but we can take > - * asynchronous errors, we can take NMIs > - */ > if (system_uses_irq_prio_masking()) { > - flags &= ~PSR_I_BIT; > + gic_write_pmr(GIC_PRIO_IRQON); > + dsb(sy); > + } > + } else if (system_uses_irq_prio_masking()) { > + u64 pmr; > + > + if (!(flags & PSR_A_BIT)) { > /* > - * There has been concern that the write to daif > - * might be reordered before this write to PMR. > - * From the ARM ARM DDI 0487D.a, section D1.7.1 > - * "Accessing PSTATE fields": > - * Writes to the PSTATE fields have side-effects on > - * various aspects of the PE operation. All of these > - * side-effects are guaranteed: > - * - Not to be visible to earlier instructions in > - * the execution stream. > - * - To be visible to later instructions in the > - * execution stream > - * > - * Also, writes to PMR are self-synchronizing, so no > - * interrupts with a lower priority than PMR is signaled > - * to the PE after the write. > - * > - * So we don't need additional synchronization here. > + * If interrupts are disabled but we can take > + * asynchronous errors, we can take NMIs > */ > - arch_local_irq_disable(); > + flags &= ~PSR_I_BIT; > + pmr = GIC_PRIO_IRQOFF; > + } else { > + pmr = GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET; > } > + > + /* > + * There has been concern that the write to daif > + * might be reordered before this write to PMR. > + * From the ARM ARM DDI 0487D.a, section D1.7.1 > + * "Accessing PSTATE fields": > + * Writes to the PSTATE fields have side-effects on > + * various aspects of the PE operation. All of these > + * side-effects are guaranteed: > + * - Not to be visible to earlier instructions in > + * the execution stream. > + * - To be visible to later instructions in the > + * execution stream > + * > + * Also, writes to PMR are self-synchronizing, so no > + * interrupts with a lower priority than PMR is signaled > + * to the PE after the write. > + * > + * So we don't need additional synchronization here. > + */ > + gic_write_pmr(pmr); > } > > write_sysreg(flags, daif); > diff --git a/arch/arm64/include/asm/irqflags.h > b/arch/arm64/include/asm/irqflags.h > index fbe1aba..b6f757f 100644 > --- a/arch/arm64/include/asm/irqflags.h > +++ b/arch/arm64/include/asm/irqflags.h > @@ -67,43 +67,46 @@ static inline void arch_local_irq_disable(void) > */ > static inline unsigned long arch_local_save_flags(void) > { > - unsigned long daif_bits; > unsigned long flags; > > - daif_bits = read_sysreg(daif); > - > - /* > - * The asm is logically equivalent to: > - * > - * if (system_uses_irq_prio_masking()) > - * flags = (daif_bits & PSR_I_BIT) ? > - * GIC_PRIO_IRQOFF : > - * read_sysreg_s(SYS_ICC_PMR_EL1); > - * else > - * flags = daif_bits; > - */ > asm volatile(ALTERNATIVE( > - "mov %0, %1\n" > - "nop\n" > - "nop", > - __mrs_s("%0", SYS_ICC_PMR_EL1) > - "ands %1, %1, " __stringify(PSR_I_BIT) "\n" > - "csel %0, %0, %2, eq", > - ARM64_HAS_IRQ_PRIO_MASKING) > - : "=&r" (flags), "+r" (daif_bits) > - : "r" ((unsigned long) GIC_PRIO_IRQOFF) > - : "cc", "memory"); > + "mrs %0, daif", > + __mrs_s("%0", SYS_ICC_PMR_EL1), > + ARM64_HAS_IRQ_PRIO_MASKING) > + : "=&r" (flags) > + : > + : "memory"); I think this is worth a comment here, as you're changing the semantics of arch_local_save_flags(). Maybe just indicating that the only thing this should be used for is to carry the interrupt state, and nothing else. > > return flags; > } > > +static inline int arch_irqs_disabled_flags(unsigned long flags) > +{ > + int res; > + > + asm volatile(ALTERNATIVE( > + "and %w0, %w1, #" __stringify(PSR_I_BIT), > + "eor %w0, %w1, #" __stringify(GIC_PRIO_IRQOFF), > + ARM64_HAS_IRQ_PRIO_MASKING) > + : "=&r" (res) > + : "r" ((int) flags) > + : "memory"); > + > + return res; > +} > + > static inline unsigned long arch_local_irq_save(void) > { > unsigned long flags; > > flags = arch_local_save_flags(); > > - arch_local_irq_disable(); > + /* > + * There are too many states with IRQs disabled, just keep the current > + * state if interrupts are already disabled/masked. > + */ > + if (!arch_irqs_disabled_flags(flags)) > + arch_local_irq_disable(); > > return flags; > } > @@ -124,21 +127,5 @@ static inline void arch_local_irq_restore(unsigned long > flags) > : "memory"); > } > > -static inline int arch_irqs_disabled_flags(unsigned long flags) > -{ > - int res; > - > - asm volatile(ALTERNATIVE( > - "and %w0, %w1, #" __stringify(PSR_I_BIT) "\n" > - "nop", > - "cmp %w1, #" __stringify(GIC_PRIO_IRQOFF) "\n" > - "cset %w0, ls", > - ARM64_HAS_IRQ_PRIO_MASKING) > - : "=&r" (res) > - : "r" ((int) flags) > - : "cc", "memory"); > - > - return res; > -} > #endif > #endif > diff --git a/arch/arm64/include/asm/kvm_host.h > b/arch/arm64/include/asm/kvm_host.h > index 4bcd9c1..fdc0e1c 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -608,11 +608,12 @@ static inline void kvm_arm_vhe_guest_enter(void) > * will not signal the CPU of interrupts of lower priority, and the > * only way to get out will be via guest exceptions. > * Naturally, we want to avoid this. > + * > + * local_daif_mask() already sets IGNORE_PMR, we just need a GIC_PRIO_PSR_I_SET? > + * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. > */ > - if (system_uses_irq_prio_masking()) { > - gic_write_pmr(GIC_PRIO_IRQON); > + if (system_uses_irq_prio_masking()) > dsb(sy); > - } > } > > static inline void kvm_arm_vhe_guest_exit(void) > diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h > index b2de329..da22422 100644 > --- a/arch/arm64/include/asm/ptrace.h > +++ b/arch/arm64/include/asm/ptrace.h > @@ -35,9 +35,15 @@ > * means masking more IRQs (or at least that the same IRQs remain masked). > * > * To mask interrupts, we clear the most significant bit of PMR. > + * > + * Some code sections either automatically switch back to PSR.I or explicitly > + * require to not use priority masking. If bit GIC_PRIO_PSR_I_SET is included > + * in the the priority mask, it indicates that PSR.I should be set and > + * interrupt disabling temporarily does not rely on IRQ priorities. > */ > -#define GIC_PRIO_IRQON 0xf0 > -#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) > +#define GIC_PRIO_IRQON 0xc0 > +#define GIC_PRIO_IRQOFF (GIC_PRIO_IRQON & ~0x80) > +#define GIC_PRIO_PSR_I_SET (1 << 4) > > /* Additional SPSR bits not exposed in the UABI */ > #define PSR_IL_BIT (1 << 20) > diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S > index 46358a3..7f92e4b 100644 > --- a/arch/arm64/kernel/entry.S > +++ b/arch/arm64/kernel/entry.S > @@ -258,6 +258,7 @@ alternative_else_nop_endif > /* > * Registers that may be useful after this macro is invoked: > * > + * x20 - ICC_PMR_EL1 > * x21 - aborted SP > * x22 - aborted PC > * x23 - aborted PSTATE > @@ -449,6 +450,24 @@ alternative_endif > .endm > #endif > > + .macro gic_prio_kentry_setup, tmp:req > +#ifdef CONFIG_ARM64_PSEUDO_NMI > + alternative_if ARM64_HAS_IRQ_PRIO_MASKING > + mov x20, #(GIC_PRIO_PSR_I_SET | GIC_PRIO_IRQON) > + msr_s SYS_ICC_PMR_EL1, x20 I find the implicit use of x20 quite dangerous, but hey. I guess that the context makes that fairly explicit. > + alternative_else_nop_endif > +#endif > + .endm > + > + .macro gic_prio_irq_setup, pmr:req, tmp:req > +#ifdef CONFIG_ARM64_PSEUDO_NMI > + alternative_if ARM64_HAS_IRQ_PRIO_MASKING > + orr \tmp, \pmr, #GIC_PRIO_PSR_I_SET > + msr_s SYS_ICC_PMR_EL1, \tmp > + alternative_else_nop_endif > +#endif > + .endm > + > .text > > /* > @@ -627,6 +646,7 @@ el1_dbg: > cmp x24, #ESR_ELx_EC_BRK64 // if BRK64 > cinc x24, x24, eq // set bit '0' > tbz x24, #0, el1_inv // EL1 only > + gic_prio_kentry_setup tmp=x3 > mrs x0, far_el1 > mov x2, sp // struct pt_regs > bl do_debug_exception > @@ -644,12 +664,10 @@ ENDPROC(el1_sync) > .align 6 > el1_irq: > kernel_entry 1 > + gic_prio_irq_setup pmr=x20, tmp=x1 > enable_da_f > > #ifdef CONFIG_ARM64_PSEUDO_NMI > -alternative_if ARM64_HAS_IRQ_PRIO_MASKING > - ldr x20, [sp, #S_PMR_SAVE] > -alternative_else_nop_endif > test_irqs_unmasked res=x0, pmr=x20 > cbz x0, 1f > bl asm_nmi_enter > @@ -679,8 +697,9 @@ alternative_else_nop_endif > > #ifdef CONFIG_ARM64_PSEUDO_NMI > /* > - * if IRQs were disabled when we received the interrupt, we have an NMI > - * and we are not re-enabling interrupt upon eret. Skip tracing. > + * When using IRQ priority masking, we can get spurious interrupts while > + * PMR is set to GIC_PRIO_IRQOFF. An NMI might also have occurred in a > + * section with interrupts disabled. Skip tracing in those cases. > */ > test_irqs_unmasked res=x0, pmr=x20 > cbz x0, 1f > @@ -809,6 +828,7 @@ el0_ia: > * Instruction abort handling > */ > mrs x26, far_el1 > + gic_prio_kentry_setup tmp=x0 > enable_da_f > #ifdef CONFIG_TRACE_IRQFLAGS > bl trace_hardirqs_off > @@ -854,6 +874,7 @@ el0_sp_pc: > * Stack or PC alignment exception handling > */ > mrs x26, far_el1 > + gic_prio_kentry_setup tmp=x0 > enable_da_f > #ifdef CONFIG_TRACE_IRQFLAGS > bl trace_hardirqs_off > @@ -888,6 +909,7 @@ el0_dbg: > * Debug exception handling > */ > tbnz x24, #0, el0_inv // EL0 only > + gic_prio_kentry_setup tmp=x3 > mrs x0, far_el1 > mov x1, x25 > mov x2, sp > @@ -909,7 +931,9 @@ ENDPROC(el0_sync) > el0_irq: > kernel_entry 0 > el0_irq_naked: > + gic_prio_irq_setup pmr=x20, tmp=x0 > enable_da_f > + > #ifdef CONFIG_TRACE_IRQFLAGS > bl trace_hardirqs_off > #endif > @@ -931,6 +955,7 @@ ENDPROC(el0_irq) > el1_error: > kernel_entry 1 > mrs x1, esr_el1 > + gic_prio_kentry_setup tmp=x2 > enable_dbg > mov x0, sp > bl do_serror > @@ -941,6 +966,7 @@ el0_error: > kernel_entry 0 > el0_error_naked: > mrs x1, esr_el1 > + gic_prio_kentry_setup tmp=x2 > enable_dbg > mov x0, sp > bl do_serror > @@ -965,6 +991,7 @@ work_pending: > */ > ret_to_user: > disable_daif > + gic_prio_kentry_setup tmp=x3 > ldr x1, [tsk, #TSK_TI_FLAGS] > and x2, x1, #_TIF_WORK_MASK > cbnz x2, work_pending > @@ -981,6 +1008,7 @@ ENDPROC(ret_to_user) > */ > .align 6 > el0_svc: > + gic_prio_kentry_setup tmp=x1 > mov x0, sp > bl el0_svc_handler > b ret_to_user > diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c > index 3767fb2..58efc37 100644 > --- a/arch/arm64/kernel/process.c > +++ b/arch/arm64/kernel/process.c > @@ -94,7 +94,7 @@ static void __cpu_do_idle_irqprio(void) > * be raised. > */ > pmr = gic_read_pmr(); > - gic_write_pmr(GIC_PRIO_IRQON); > + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); > > __cpu_do_idle(); > > diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c > index bb4b3f0..4deaee3 100644 > --- a/arch/arm64/kernel/smp.c > +++ b/arch/arm64/kernel/smp.c > @@ -192,11 +192,13 @@ static void init_gic_priority_masking(void) > > WARN_ON(!(cpuflags & PSR_I_BIT)); > > - gic_write_pmr(GIC_PRIO_IRQOFF); > - > /* We can only unmask PSR.I if we can take aborts */ > - if (!(cpuflags & PSR_A_BIT)) > + if (!(cpuflags & PSR_A_BIT)) { > + gic_write_pmr(GIC_PRIO_IRQOFF); > write_sysreg(cpuflags & ~PSR_I_BIT, daif); > + } else { > + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); > + } > } > > /* > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c > index 8799e0c..b89fcf0 100644 > --- a/arch/arm64/kvm/hyp/switch.c > +++ b/arch/arm64/kvm/hyp/switch.c > @@ -615,7 +615,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) > * Naturally, we want to avoid this. > */ > if (system_uses_irq_prio_masking()) { > - gic_write_pmr(GIC_PRIO_IRQON); > + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); > dsb(sy); > } > > -- > 1.9.1 > Thanks, M. -- Jazz is not dead. It just smells funny...