The hdec interrupt handler is reported to sometimes fire in Linux if KVM leaves it pending after a guest exists. This is harmless, so there is a no-op handler for it.
The interrupt handler currently uses the regular kernel stack. Change this to avoid touching the stack entirely. This should be the last place where the regular Linux stack can be accessed with asynchronous interrupts (including PMI) soft-masked. It might be possible to take advantage of this invariant, e.g., to context switch the kernel stack SLB entry without clearing MSR[EE]. Signed-off-by: Nicholas Piggin <npig...@gmail.com> --- arch/powerpc/include/asm/time.h | 1 - arch/powerpc/kernel/exceptions-64s.S | 25 ++++++++++++++++++++----- arch/powerpc/kernel/time.c | 9 --------- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h index 54f4ec1f9fab..0e17c41c4f46 100644 --- a/arch/powerpc/include/asm/time.h +++ b/arch/powerpc/include/asm/time.h @@ -24,7 +24,6 @@ extern struct clock_event_device decrementer_clockevent; extern void generic_calibrate_decr(void); -extern void hdec_interrupt(struct pt_regs *regs); /* Some sane defaults: 125 MHz timebase, 1GHz processor */ extern unsigned long ppc_proc_freq; diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index a4ceb88c53c4..3f4b7dfa800b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1505,6 +1505,8 @@ INT_DEFINE_BEGIN(hdecrementer) IVEC=0x980 IHSRR=EXC_HV IAREA=PACA_EXGEN + ISTACK=0 + IRECONCILE=0 IKVM_REAL=1 IKVM_VIRT=1 INT_DEFINE_END(hdecrementer) @@ -1516,11 +1518,24 @@ EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80) GEN_INT_ENTRY hdecrementer, GEN_VIRT EXC_VIRT_END(hdecrementer, 0x4980, 0x80) EXC_COMMON_BEGIN(hdecrementer_common) - GEN_COMMON hdecrementer - bl save_nvgprs - addi r3,r1,STACK_FRAME_OVERHEAD - bl hdec_interrupt - b ret_from_except + __GEN_COMMON_ENTRY hdecrementer + /* + * Hypervisor decrementer interrupts not caught by the KVM test + * shouldn't occur but are sometimes left pending on exit from a KVM + * guest. We don't need to do anything to clear them, as they are + * edge-triggered. + * + * Be careful to avoid touching the kernel stack. + */ + ld r10,PACA_EXGEN+EX_CTR(r13) + mtctr r10 + mtcrf 0x80,r9 + ld r9,PACA_EXGEN+EX_R9(r13) + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) + ld r13,PACA_EXGEN+EX_R13(r13) + HRFI_TO_KERNEL GEN_KVM hdecrementer diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 694522308cd5..bebc8c440289 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -663,15 +663,6 @@ void timer_broadcast_interrupt(void) } #endif -/* - * Hypervisor decrementer interrupts shouldn't occur but are sometimes - * left pending on exit from a KVM guest. We don't need to do anything - * to clear them, as they are edge-triggered. - */ -void hdec_interrupt(struct pt_regs *regs) -{ -} - #ifdef CONFIG_SUSPEND static void generic_suspend_disable_irqs(void) { -- 2.22.0