From: Lai Jiangshan <la...@linux.alibaba.com>

The commit 929bacec21478("x86/entry/64: De-Xen-ify our NMI code") simplified
the NMI code by changing paravirt code into native code and left a comment
about "inspecting RIP instead".  But until now, "inspecting RIP instead"
has not been made happened and this patch tries to complete it.

Signed-off-by: Lai Jiangshan <la...@linux.alibaba.com>
---
 arch/x86/entry/entry_64.S | 46 +++++++++++----------------------------
 1 file changed, 13 insertions(+), 33 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index cad08703c4ad..cb6b8a6c6652 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1268,32 +1268,12 @@ SYM_CODE_START(asm_exc_nmi)
        je      nested_nmi
 
        /*
-        * Now test if the previous stack was an NMI stack.  This covers
-        * the case where we interrupt an outer NMI after it clears
-        * "NMI executing" but before IRET.  We need to be careful, though:
-        * there is one case in which RSP could point to the NMI stack
-        * despite there being no NMI active: naughty userspace controls
-        * RSP at the very beginning of the SYSCALL targets.  We can
-        * pull a fast one on naughty userspace, though: we program
-        * SYSCALL to mask DF, so userspace cannot cause DF to be set
-        * if it controls the kernel's RSP.  We set DF before we clear
-        * "NMI executing".
+        * Now test if we interrupt an outer NMI after it clears
+        * "NMI executing" but before iret.
         */
-       lea     6*8(%rsp), %rdx
-       /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) 
*/
-       cmpq    %rdx, 4*8(%rsp)
-       /* If the stack pointer is above the NMI stack, this is a normal NMI */
-       ja      first_nmi
-
-       subq    $EXCEPTION_STKSZ, %rdx
-       cmpq    %rdx, 4*8(%rsp)
-       /* If it is below the NMI stack, it is a normal NMI */
-       jb      first_nmi
-
-       /* Ah, it is within the NMI stack. */
-
-       testb   $(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp)
-       jz      first_nmi       /* RSP was user controlled. */
+       movq    $nmi_executing_cleared, %rdx
+       cmpq    8(%rsp), %rdx
+       jne     first_nmi
 
        /* This is a nested NMI. */
 
@@ -1438,16 +1418,16 @@ nmi_restore:
        addq    $6*8, %rsp
 
        /*
-        * Clear "NMI executing".  Set DF first so that we can easily
-        * distinguish the remaining code between here and IRET from
-        * the SYSCALL entry and exit paths.
-        *
-        * We arguably should just inspect RIP instead, but I (Andy) wrote
-        * this code when I had the misapprehension that Xen PV supported
-        * NMIs, and Xen PV would break that approach.
+        * Clear "NMI executing".  It also leaves a window after it before
+        * iret which should be also considered to be "NMI executing" albeit
+        * with "NMI executing" variable being zero.  So we should also check
+        * the RIP after it when checking "NMI executing".  See the code
+        * before nested_nmi.  No code is allowed to be added to between
+        * clearing "NMI executing" and iret unless we check a larger window
+        * with a range of RIPs instead of currently a single-RIP window.
         */
-       std
        movq    $0, 5*8(%rsp)           /* clear "NMI executing" */
+nmi_executing_cleared:
 
        /*
         * iretq reads the "iret" frame and exits the NMI stack in a
-- 
2.19.1.6.gb485710b

Reply via email to