PER_CPU_VAR(kernel_stack) was set up in a way where it points
five stack slots below the top of stack.

Presumably, it was done to avoid one "sub $5*8,%rsp"
in syscall/sysenter code paths, where iret frame needs to be
created by hand.

Ironically, none of them benefits from this optimization,
since all of them need to allocate additional data on stack
(struct pt_regs), so they still have to perform subtraction.

This patch eliminates KERNEL_STACK_OFFSET.

PER_CPU_VAR(kernel_stack) now points directly to top of stack.
pt_regs allocations are adjusted to allocate iret frame as well.
Hopefully we can merge it later with 32-bit specific
PER_CPU_VAR(cpu_current_top_of_stack) variable...

Semi-mysterious expressions THREAD_INFO(%rsp,RIP) - "why RIP??"
are now replaced by more logical THREAD_INFO(%rsp,SIZEOF_PTREGS) -
"calculate thread_info's address using information that
rsp is SIZEOF_PTREGS bytes below the stack top".

Net result in generated code is that constants in several insns
are changed.

This change is necessary for changing struct pt_regs creation
in SYSCALL64 code path from MOV to PUSH instructions.

Signed-off-by: Denys Vlasenko <dvlas...@redhat.com>
CC: Linus Torvalds <torva...@linux-foundation.org>
CC: Steven Rostedt <rost...@goodmis.org>
CC: Ingo Molnar <mi...@kernel.org>
CC: Borislav Petkov <b...@alien8.de>
CC: "H. Peter Anvin" <h...@zytor.com>
CC: Andy Lutomirski <l...@amacapital.net>
CC: Oleg Nesterov <o...@redhat.com>
CC: Frederic Weisbecker <fweis...@gmail.com>
CC: Alexei Starovoitov <a...@plumgrid.com>
CC: Will Drewry <w...@chromium.org>
CC: Kees Cook <keesc...@chromium.org>
CC: x...@kernel.org
CC: linux-kernel@vger.kernel.org
---
 arch/x86/ia32/ia32entry.S          | 34 +++++++++++++++++-----------------
 arch/x86/include/asm/thread_info.h |  5 ++---
 arch/x86/kernel/cpu/common.c       |  2 +-
 arch/x86/kernel/entry_64.S         |  9 ++++-----
 arch/x86/kernel/process_32.c       |  2 +-
 arch/x86/kernel/process_64.c       |  3 +--
 arch/x86/kernel/smpboot.c          |  3 +--
 arch/x86/xen/smp.c                 |  3 +--
 8 files changed, 28 insertions(+), 33 deletions(-)

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index ad9efef..acbff3f 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -127,7 +127,7 @@ ENTRY(ia32_sysenter_target)
        CFI_REL_OFFSET rsp,0
        pushfq_cfi
        /*CFI_REL_OFFSET rflags,0*/
-       movl    
TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
+       movl    TI_sysenter_return+THREAD_INFO(%rsp,3*8),%r10d
        CFI_REGISTER rip,r10
        pushq_cfi $__USER32_CS
        /*CFI_REL_OFFSET cs,0*/
@@ -159,8 +159,8 @@ ENTRY(ia32_sysenter_target)
        jnz sysenter_fix_flags
 sysenter_flags_fixed:
 
-       orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
-       testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
+       orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,SIZEOF_PTREGS)
+       testl   
$_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
        CFI_REMEMBER_STATE
        jnz  sysenter_tracesys
        cmpq    $(IA32_NR_syscalls-1),%rax
@@ -177,10 +177,10 @@ sysenter_dispatch:
        movq    %rax,RAX(%rsp)
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
-       testl   $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP)
+       testl   $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
        jnz     sysexit_audit
 sysexit_from_sys_call:
-       andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
+       andl    $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,SIZEOF_PTREGS)
        /* clear IF, that popfq doesn't enable interrupts early */
        andl    $~0x200,EFLAGS(%rsp)
        movl    RIP(%rsp),%edx          /* User %eip */
@@ -225,7 +225,7 @@ sysexit_from_sys_call:
        .endm
 
        .macro auditsys_exit exit
-       testl $(_TIF_ALLWORK_MASK & 
~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP)
+       testl $(_TIF_ALLWORK_MASK & 
~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
        jnz ia32_ret_from_sys_call
        TRACE_IRQS_ON
        ENABLE_INTERRUPTS(CLBR_NONE)
@@ -240,7 +240,7 @@ sysexit_from_sys_call:
        movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
-       testl %edi,TI_flags+THREAD_INFO(%rsp,RIP)
+       testl %edi,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
        jz \exit
        CLEAR_RREGS
        jmp int_with_check
@@ -262,7 +262,7 @@ sysenter_fix_flags:
 
 sysenter_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-       testl   $(_TIF_WORK_SYSCALL_ENTRY & 
~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP)
+       testl   $(_TIF_WORK_SYSCALL_ENTRY & 
~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
        jz      sysenter_auditsys
 #endif
        SAVE_EXTRA_REGS
@@ -311,7 +311,7 @@ ENDPROC(ia32_sysenter_target)
 ENTRY(ia32_cstar_target)
        CFI_STARTPROC32 simple
        CFI_SIGNAL_FRAME
-       CFI_DEF_CFA     rsp,KERNEL_STACK_OFFSET
+       CFI_DEF_CFA     rsp,0
        CFI_REGISTER    rip,rcx
        /*CFI_REGISTER  rflags,r11*/
        SWAPGS_UNSAFE_STACK
@@ -323,7 +323,7 @@ ENTRY(ia32_cstar_target)
         * disabled irqs and here we enable it straight after entry:
         */
        ENABLE_INTERRUPTS(CLBR_NONE)
-       ALLOC_PT_GPREGS_ON_STACK 8      /* +8: space for orig_ax */
+       ALLOC_PT_GPREGS_ON_STACK 6*8 /* 6*8: space for orig_ax and iret frame */
        SAVE_C_REGS_EXCEPT_RCX_R891011
        movl    %eax,%eax       /* zero extension */
        movq    %rax,ORIG_RAX(%rsp)
@@ -346,8 +346,8 @@ ENTRY(ia32_cstar_target)
 1:     movl    (%r8),%r9d
        _ASM_EXTABLE(1b,ia32_badarg)
        ASM_CLAC
-       orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
-       testl   $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
+       orl     $TS_COMPAT,TI_status+THREAD_INFO(%rsp,SIZEOF_PTREGS)
+       testl   
$_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
        CFI_REMEMBER_STATE
        jnz   cstar_tracesys
        cmpq $IA32_NR_syscalls-1,%rax
@@ -364,10 +364,10 @@ cstar_dispatch:
        movq %rax,RAX(%rsp)
        DISABLE_INTERRUPTS(CLBR_NONE)
        TRACE_IRQS_OFF
-       testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP)
+       testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
        jnz sysretl_audit
 sysretl_from_sys_call:
-       andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
+       andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,SIZEOF_PTREGS)
        RESTORE_RSI_RDI_RDX
        movl RIP(%rsp),%ecx
        CFI_REGISTER rip,rcx
@@ -402,7 +402,7 @@ sysretl_audit:
 
 cstar_tracesys:
 #ifdef CONFIG_AUDITSYSCALL
-       testl $(_TIF_WORK_SYSCALL_ENTRY & 
~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP)
+       testl $(_TIF_WORK_SYSCALL_ENTRY & 
~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
        jz cstar_auditsys
 #endif
        xchgl %r9d,%ebp
@@ -469,8 +469,8 @@ ENTRY(ia32_syscall)
           this could be a problem. */
        ALLOC_PT_GPREGS_ON_STACK
        SAVE_C_REGS_EXCEPT_R891011
-       orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP)
-       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
+       orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,SIZEOF_PTREGS)
+       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
        jnz ia32_tracesys
        cmpq $(IA32_NR_syscalls-1),%rax
        ja ia32_badsys
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index ba115eb..5381cb9 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -172,7 +172,6 @@ struct thread_info {
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
 
 #define STACK_WARN             (THREAD_SIZE/8)
-#define KERNEL_STACK_OFFSET    (5*(BITS_PER_LONG/8))
 
 /*
  * macros/functions for gaining access to the thread information structure
@@ -204,13 +203,13 @@ static inline unsigned long current_stack_pointer(void)
 /* how to get the thread information struct from ASM */
 #define GET_THREAD_INFO(reg) \
        _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
-       _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ;
+       _ASM_SUB $(THREAD_SIZE),reg ;
 
 /*
  * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
  * a certain register (to be used in assembler memory operands).
  */
-#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
+#define THREAD_INFO(reg, off) (off)-THREAD_SIZE(reg)
 
 #endif
 
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index e45ba5e..34719f3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1173,7 +1173,7 @@ static __init int setup_disablecpuid(char *arg)
 __setup("clearcpuid=", setup_disablecpuid);
 
 DEFINE_PER_CPU(unsigned long, kernel_stack) =
-       (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+       (unsigned long)&init_thread_union + THREAD_SIZE;
 EXPORT_PER_CPU_SYMBOL(kernel_stack);
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 0c91256..ecb68d8 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -225,7 +225,7 @@ ENDPROC(native_usergs_sysret64)
 ENTRY(system_call)
        CFI_STARTPROC   simple
        CFI_SIGNAL_FRAME
-       CFI_DEF_CFA     rsp,KERNEL_STACK_OFFSET
+       CFI_DEF_CFA     rsp,0
        CFI_REGISTER    rip,rcx
        /*CFI_REGISTER  rflags,r11*/
        SWAPGS_UNSAFE_STACK
@@ -242,9 +242,8 @@ GLOBAL(system_call_after_swapgs)
         * so we can enable interrupts only after we're done with using 
rsp_scratch:
         */
        movq    %rsp,PER_CPU_VAR(rsp_scratch)
-       /* kernel_stack is set so that 5 slots (iret frame) are preallocated */
        movq    PER_CPU_VAR(kernel_stack),%rsp
-       ALLOC_PT_GPREGS_ON_STACK 8              /* +8: space for orig_ax */
+       ALLOC_PT_GPREGS_ON_STACK 6*8 /* 6*8: space for orig_ax and iret frame */
        movq    %rcx,RIP(%rsp)
        movq    PER_CPU_VAR(rsp_scratch),%rcx
        movq    %r11,EFLAGS(%rsp)
@@ -258,7 +257,7 @@ GLOBAL(system_call_after_swapgs)
        SAVE_C_REGS_EXCEPT_RAX_RCX_R11
        movq    $-ENOSYS,RAX(%rsp)
        CFI_REL_OFFSET rip,RIP
-       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP)
+       testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
        jnz tracesys
 system_call_fastpath:
 #if __SYSCALL_MASK == ~0
@@ -276,7 +275,7 @@ system_call_fastpath:
  * Has incompletely filled pt_regs, iret frame is also incomplete.
  */
 ret_from_sys_call:
-       testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP)
+       testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
        jnz int_ret_from_sys_call_fixup /* Go the the slow path */
 
        LOCKDEP_SYS_EXIT
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 1b9963f..5a4c2f8 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -312,7 +312,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct 
*next_p)
        load_sp0(tss, next);
        this_cpu_write(kernel_stack,
                       (unsigned long)task_stack_page(next_p) +
-                      THREAD_SIZE - KERNEL_STACK_OFFSET);
+                      THREAD_SIZE);
        this_cpu_write(cpu_current_top_of_stack,
                       (unsigned long)task_stack_page(next_p) +
                       THREAD_SIZE);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 97f5658..db49063 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -409,8 +409,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct 
*next_p)
        load_sp0(tss, next);
 
        this_cpu_write(kernel_stack,
-                 (unsigned long)task_stack_page(next_p) +
-                 THREAD_SIZE - KERNEL_STACK_OFFSET);
+               (unsigned long)task_stack_page(next_p) + THREAD_SIZE);
 
        /*
         * Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 759388c..7b20ffd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -813,8 +813,7 @@ static int do_boot_cpu(int apicid, int cpu, struct 
task_struct *idle)
        initial_gs = per_cpu_offset(cpu);
 #endif
        per_cpu(kernel_stack, cpu) =
-               (unsigned long)task_stack_page(idle) -
-               KERNEL_STACK_OFFSET + THREAD_SIZE;
+               (unsigned long)task_stack_page(idle) + THREAD_SIZE;
        early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
        initial_code = (unsigned long)start_secondary;
        stack_start  = idle->thread.sp;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 08e8489..765b768 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -452,8 +452,7 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct 
*idle)
        clear_tsk_thread_flag(idle, TIF_FORK);
 #endif
        per_cpu(kernel_stack, cpu) =
-               (unsigned long)task_stack_page(idle) -
-               KERNEL_STACK_OFFSET + THREAD_SIZE;
+               (unsigned long)task_stack_page(idle) + THREAD_SIZE;
 
        xen_setup_runstate_info(cpu);
        xen_setup_timer(cpu);
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to