From: Lai Jiangshan <la...@linux.alibaba.com> The commit 929bacec21478("x86/entry/64: De-Xen-ify our NMI code") simplified the NMI code by changing paravirt code into native code and left a comment about "inspecting RIP instead". But until now, "inspecting RIP instead" has not been made happened and this patch tries to complete it.
Signed-off-by: Lai Jiangshan <la...@linux.alibaba.com> --- arch/x86/entry/entry_64.S | 46 +++++++++++---------------------------- 1 file changed, 13 insertions(+), 33 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index cad08703c4ad..cb6b8a6c6652 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1268,32 +1268,12 @@ SYM_CODE_START(asm_exc_nmi) je nested_nmi /* - * Now test if the previous stack was an NMI stack. This covers - * the case where we interrupt an outer NMI after it clears - * "NMI executing" but before IRET. We need to be careful, though: - * there is one case in which RSP could point to the NMI stack - * despite there being no NMI active: naughty userspace controls - * RSP at the very beginning of the SYSCALL targets. We can - * pull a fast one on naughty userspace, though: we program - * SYSCALL to mask DF, so userspace cannot cause DF to be set - * if it controls the kernel's RSP. We set DF before we clear - * "NMI executing". + * Now test if we interrupt an outer NMI after it clears + * "NMI executing" but before iret. */ - lea 6*8(%rsp), %rdx - /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */ - cmpq %rdx, 4*8(%rsp) - /* If the stack pointer is above the NMI stack, this is a normal NMI */ - ja first_nmi - - subq $EXCEPTION_STKSZ, %rdx - cmpq %rdx, 4*8(%rsp) - /* If it is below the NMI stack, it is a normal NMI */ - jb first_nmi - - /* Ah, it is within the NMI stack. */ - - testb $(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp) - jz first_nmi /* RSP was user controlled. */ + movq $nmi_executing_cleared, %rdx + cmpq 8(%rsp), %rdx + jne first_nmi /* This is a nested NMI. */ @@ -1438,16 +1418,16 @@ nmi_restore: addq $6*8, %rsp /* - * Clear "NMI executing". Set DF first so that we can easily - * distinguish the remaining code between here and IRET from - * the SYSCALL entry and exit paths. - * - * We arguably should just inspect RIP instead, but I (Andy) wrote - * this code when I had the misapprehension that Xen PV supported - * NMIs, and Xen PV would break that approach. + * Clear "NMI executing". It also leaves a window after it before + * iret which should be also considered to be "NMI executing" albeit + * with "NMI executing" variable being zero. So we should also check + * the RIP after it when checking "NMI executing". See the code + * before nested_nmi. No code is allowed to be added to between + * clearing "NMI executing" and iret unless we check a larger window + * with a range of RIPs instead of currently a single-RIP window. */ - std movq $0, 5*8(%rsp) /* clear "NMI executing" */ +nmi_executing_cleared: /* * iretq reads the "iret" frame and exits the NMI stack in a -- 2.19.1.6.gb485710b