Move the low-level context switch code to an out-of-line asm stub instead of
using complex inline asm.  This allows constructing a new stack frame for the
child process to make it seamlessly flow to ret_from_fork without an extra
test and branch in __switch_to().  It also improves code generation for
__schedule() by using the C calling convention instead of clobbering all
registers.

Signed-off-by: Brian Gerst <brge...@gmail.com>
---
 arch/x86/entry/entry_32.S          |  37 ++++++++++
 arch/x86/entry/entry_64.S          |  41 ++++++++++-
 arch/x86/include/asm/processor.h   |   3 -
 arch/x86/include/asm/switch_to.h   | 137 ++++++-------------------------------
 arch/x86/include/asm/thread_info.h |   2 -
 arch/x86/kernel/asm-offsets.c      |   6 ++
 arch/x86/kernel/asm-offsets_32.c   |   5 ++
 arch/x86/kernel/asm-offsets_64.c   |   5 ++
 arch/x86/kernel/process_32.c       |   9 ++-
 arch/x86/kernel/process_64.c       |   9 ++-
 arch/x86/kernel/smpboot.c          |   1 -
 11 files changed, 125 insertions(+), 130 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 983e5d3..e8302b5 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -204,6 +204,43 @@
        POP_GS_EX
 .endm
 
+/*
+ * %eax: prev task
+ * %edx: next task
+ */
+ENTRY(__switch_to_asm)
+       /*
+        * Save callee-saved registers
+        * This must match the order in struct inactive_task_frame
+        */
+       pushl   %ebp
+       pushl   %ebx
+       pushl   %edi
+       pushl   %esi
+
+       /* switch stack */
+       movl    %esp, TASK_threadsp(%eax)
+       movl    TASK_threadsp(%edx), %esp
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+       movl    TASK_stack_canary(%edx), %ebx
+       movl    %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
+#endif
+
+       /* restore callee-saved registers */
+       popl    %esi
+       popl    %edi
+       popl    %ebx
+       popl    %ebp
+
+       jmp     __switch_to
+END(__switch_to_asm)
+
+/*
+ * A newly forked process directly context switches into this address.
+ *
+ * eax: prev task we switched from
+ */
 ENTRY(ret_from_fork)
        pushl   %eax
        call    schedule_tail
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9ee0da1..7574528 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -365,13 +365,48 @@ END(ptregs_\func)
 #include <asm/syscalls_64.h>
 
 /*
+ * %rdi: prev task
+ * %rsi: next task
+ */
+ENTRY(__switch_to_asm)
+       /*
+        * Save callee-saved registers
+        * This must match the order in inactive_task_frame
+        */
+       pushq   %rbp
+       pushq   %rbx
+       pushq   %r12
+       pushq   %r13
+       pushq   %r14
+       pushq   %r15
+
+       /* switch stack */
+       movq    %rsp, TASK_threadsp(%rdi)
+       movq    TASK_threadsp(%rsi), %rsp
+
+#ifdef CONFIG_CC_STACKPROTECTOR
+       movq    TASK_stack_canary(%rsi), %rbx
+       movq    %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
+#endif
+
+       /* restore callee-saved registers */
+       popq    %r15
+       popq    %r14
+       popq    %r13
+       popq    %r12
+       popq    %rbx
+       popq    %rbp
+
+       jmp     __switch_to
+END(__switch_to_asm)
+
+/*
  * A newly forked process directly context switches into this address.
  *
- * rdi: prev task we switched from
+ * rax: prev task we switched from
  */
 ENTRY(ret_from_fork)
-       LOCK ; btr $TIF_FORK, TI_flags(%r8)
-
+       movq    %rax, %rdi
        call    schedule_tail                   /* rdi: 'prev' task parameter */
 
        testb   $3, CS(%rsp)                    /* from kernel_thread? */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 965c5d2..1e7d634 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -385,9 +385,6 @@ struct thread_struct {
        unsigned short          fsindex;
        unsigned short          gsindex;
 #endif
-#ifdef CONFIG_X86_32
-       unsigned long           ip;
-#endif
 #ifdef CONFIG_X86_64
        unsigned long           fsbase;
        unsigned long           gsbase;
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 02de86e..bf4e2ec 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -2,135 +2,40 @@
 #define _ASM_X86_SWITCH_TO_H
 
 struct task_struct; /* one of the stranger aspects of C forward declarations */
+
+struct task_struct *__switch_to_asm(struct task_struct *prev,
+                                   struct task_struct *next);
+
 __visible struct task_struct *__switch_to(struct task_struct *prev,
-                                          struct task_struct *next);
+                                         struct task_struct *next);
 struct tss_struct;
 void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
                      struct tss_struct *tss);
 
 /* data that is pointed to by thread.sp */
 struct inactive_task_frame {
+#ifdef CONFIG_X86_64
+       unsigned long r15;
+       unsigned long r14;
+       unsigned long r13;
+       unsigned long r12;
+#else
+       unsigned long si;
+       unsigned long di;
+#endif
+       unsigned long bx;
        unsigned long bp;
+       unsigned long ret_addr;
 };
 
-#ifdef CONFIG_X86_32
-
-#ifdef CONFIG_CC_STACKPROTECTOR
-#define __switch_canary                                                        
\
-       "movl %P[task_canary](%[next]), %%ebx\n\t"                      \
-       "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
-#define __switch_canary_oparam                                         \
-       , [stack_canary] "=m" (stack_canary.canary)
-#define __switch_canary_iparam                                         \
-       , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
-#else  /* CC_STACKPROTECTOR */
-#define __switch_canary
-#define __switch_canary_oparam
-#define __switch_canary_iparam
-#endif /* CC_STACKPROTECTOR */
+struct fork_frame {
+       struct inactive_task_frame frame;
+       struct pt_regs regs;
+};
 
-/*
- * Saving eflags is important. It switches not only IOPL between tasks,
- * it also protects other tasks from NT leaking through sysenter etc.
- */
 #define switch_to(prev, next, last)                                    \
 do {                                                                   \
-       /*                                                              \
-        * Context-switching clobbers all registers, so we clobber      \
-        * them explicitly, via unused output variables.                \
-        * (EAX and EBP is not listed because EBP is saved/restored     \
-        * explicitly for wchan access and EAX is the return value of   \
-        * __switch_to())                                               \
-        */                                                             \
-       unsigned long ebx, ecx, edx, esi, edi;                          \
-                                                                       \
-       asm volatile("pushl %%ebp\n\t"          /* save    EBP   */     \
-                    "movl %%esp,%[prev_sp]\n\t"        /* save    ESP   */ \
-                    "movl %[next_sp],%%esp\n\t"        /* restore ESP   */ \
-                    "movl $1f,%[prev_ip]\n\t"  /* save    EIP   */     \
-                    "pushl %[next_ip]\n\t"     /* restore EIP   */     \
-                    __switch_canary                                    \
-                    "jmp __switch_to\n"        /* regparm call  */     \
-                    "1:\t"                                             \
-                    "popl %%ebp\n\t"           /* restore EBP   */     \
-                                                                       \
-                    /* output parameters */                            \
-                    : [prev_sp] "=m" (prev->thread.sp),                \
-                      [prev_ip] "=m" (prev->thread.ip),                \
-                      "=a" (last),                                     \
-                                                                       \
-                      /* clobbered output registers: */                \
-                      "=b" (ebx), "=c" (ecx), "=d" (edx),              \
-                      "=S" (esi), "=D" (edi)                           \
-                                                                       \
-                      __switch_canary_oparam                           \
-                                                                       \
-                      /* input parameters: */                          \
-                    : [next_sp]  "m" (next->thread.sp),                \
-                      [next_ip]  "m" (next->thread.ip),                \
-                                                                       \
-                      /* regparm parameters for __switch_to(): */      \
-                      [prev]     "a" (prev),                           \
-                      [next]     "d" (next)                            \
-                                                                       \
-                      __switch_canary_iparam                           \
-                                                                       \
-                    : /* reloaded segment registers */                 \
-                       "memory");                                      \
+       ((last) = __switch_to_asm((prev), (next)));                     \
 } while (0)
 
-#else /* CONFIG_X86_32 */
-
-/* frame pointer must be last for get_wchan */
-#define SAVE_CONTEXT    "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
-#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t"
-
-#define __EXTRA_CLOBBER  \
-       , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
-         "r12", "r13", "r14", "r15", "flags"
-
-#ifdef CONFIG_CC_STACKPROTECTOR
-#define __switch_canary                                                        
  \
-       "movq %P[task_canary](%%rsi),%%r8\n\t"                            \
-       "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
-#define __switch_canary_oparam                                           \
-       , [gs_canary] "=m" (irq_stack_union.stack_canary)
-#define __switch_canary_iparam                                           \
-       , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
-#else  /* CC_STACKPROTECTOR */
-#define __switch_canary
-#define __switch_canary_oparam
-#define __switch_canary_iparam
-#endif /* CC_STACKPROTECTOR */
-
-/*
- * There is no need to save or restore flags, because flags are always
- * clean in kernel mode, with the possible exception of IOPL.  Kernel IOPL
- * has no effect.
- */
-#define switch_to(prev, next, last) \
-       asm volatile(SAVE_CONTEXT                                         \
-            "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */       \
-            "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */    \
-            "call __switch_to\n\t"                                       \
-            "movq "__percpu_arg([current_task])",%%rsi\n\t"              \
-            __switch_canary                                              \
-            "movq %P[thread_info](%%rsi),%%r8\n\t"                       \
-            "movq %%rax,%%rdi\n\t"                                       \
-            "testl  %[_tif_fork],%P[ti_flags](%%r8)\n\t"                 \
-            "jnz   ret_from_fork\n\t"                                    \
-            RESTORE_CONTEXT                                              \
-            : "=a" (last)                                                \
-              __switch_canary_oparam                                     \
-            : [next] "S" (next), [prev] "D" (prev),                      \
-              [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
-              [ti_flags] "i" (offsetof(struct thread_info, flags)),      \
-              [_tif_fork] "i" (_TIF_FORK),                               \
-              [thread_info] "i" (offsetof(struct task_struct, stack)),   \
-              [current_task] "m" (current_task)                          \
-              __switch_canary_iparam                                     \
-            : "memory", "cc" __EXTRA_CLOBBER)
-
-#endif /* CONFIG_X86_32 */
-
 #endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 30c133a..20d56ec 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -99,7 +99,6 @@ struct thread_info {
 #define TIF_UPROBE             12      /* breakpointed or singlestepping */
 #define TIF_NOTSC              16      /* TSC is not accessible in userland */
 #define TIF_IA32               17      /* IA32 compatibility process */
-#define TIF_FORK               18      /* ret_from_fork */
 #define TIF_NOHZ               19      /* in adaptive nohz mode */
 #define TIF_MEMDIE             20      /* is terminating due to OOM killer */
 #define TIF_POLLING_NRFLAG     21      /* idle is polling for TIF_NEED_RESCHED 
*/
@@ -123,7 +122,6 @@ struct thread_info {
 #define _TIF_UPROBE            (1 << TIF_UPROBE)
 #define _TIF_NOTSC             (1 << TIF_NOTSC)
 #define _TIF_IA32              (1 << TIF_IA32)
-#define _TIF_FORK              (1 << TIF_FORK)
 #define _TIF_NOHZ              (1 << TIF_NOHZ)
 #define _TIF_POLLING_NRFLAG    (1 << TIF_POLLING_NRFLAG)
 #define _TIF_IO_BITMAP         (1 << TIF_IO_BITMAP)
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 674134e..ec41c79 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -29,6 +29,12 @@
 
 void common(void) {
        BLANK();
+       OFFSET(TASK_threadsp, task_struct, thread.sp);
+#ifdef CONFIG_CC_STACKPROTECTOR
+       OFFSET(TASK_stack_canary, task_struct, stack_canary);
+#endif
+
+       BLANK();
        OFFSET(TI_flags, thread_info, flags);
        OFFSET(TI_status, thread_info, status);
        OFFSET(TI_addr_limit, thread_info, addr_limit);
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index ecdc1d2..880aa09 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -57,6 +57,11 @@ void foo(void)
        /* Size of SYSENTER_stack */
        DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct 
*)0)->SYSENTER_stack));
 
+#ifdef CONFIG_CC_STACKPROTECTOR
+       BLANK();
+       OFFSET(stack_canary_offset, stack_canary, canary);
+#endif
+
 #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || 
defined(CONFIG_LGUEST_MODULE)
        BLANK();
        OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index d875f97..210927e 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -56,6 +56,11 @@ int main(void)
        OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
        BLANK();
 
+#ifdef CONFIG_CC_STACKPROTECTOR
+       DEFINE(stack_canary_offset, offsetof(union irq_stack_union, 
stack_canary));
+       BLANK();
+#endif
+
        DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
        DEFINE(NR_syscalls, sizeof(syscalls_64));
 
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 9f95091..81a82f5 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -133,17 +133,20 @@ int copy_thread_tls(unsigned long clone_flags, unsigned 
long sp,
        unsigned long arg, struct task_struct *p, unsigned long tls)
 {
        struct pt_regs *childregs = task_pt_regs(p);
+       struct fork_frame *fork_frame = container_of(childregs, struct 
fork_frame, regs);
+       struct inactive_task_frame *frame = &fork_frame->frame;
        struct task_struct *tsk;
        int err;
 
-       p->thread.sp = (unsigned long) childregs;
+       frame->bp = 0;
+       p->thread.sp = (unsigned long) fork_frame;
        p->thread.sp0 = (unsigned long) (childregs+1);
        memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
        if (unlikely(p->flags & PF_KTHREAD)) {
                /* kernel thread */
                memset(childregs, 0, sizeof(struct pt_regs));
-               p->thread.ip = (unsigned long) ret_from_kernel_thread;
+               frame->ret_addr = (unsigned long) ret_from_kernel_thread;
                task_user_gs(p) = __KERNEL_STACK_CANARY;
                childregs->ds = __USER_DS;
                childregs->es = __USER_DS;
@@ -161,7 +164,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned 
long sp,
        if (sp)
                childregs->sp = sp;
 
-       p->thread.ip = (unsigned long) ret_from_fork;
+       frame->ret_addr = (unsigned long) ret_from_fork;
        task_user_gs(p) = get_user_gs(current_pt_regs());
 
        p->thread.io_bitmap_ptr = NULL;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6e789ca..3ac9522 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -141,12 +141,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned 
long sp,
 {
        int err;
        struct pt_regs *childregs;
+       struct fork_frame *fork_frame;
+       struct inactive_task_frame *frame;
        struct task_struct *me = current;
 
        p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
        childregs = task_pt_regs(p);
-       p->thread.sp = (unsigned long) childregs;
-       set_tsk_thread_flag(p, TIF_FORK);
+       fork_frame = container_of(childregs, struct fork_frame, regs);
+       frame = &fork_frame->frame;
+       frame->bp = 0;
+       frame->ret_addr = (unsigned long) ret_from_fork;
+       p->thread.sp = (unsigned long) fork_frame;
        p->thread.io_bitmap_ptr = NULL;
 
        savesegment(gs, p->thread.gsindex);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2ed0ec1..021eb02 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -935,7 +935,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct 
*idle)
        per_cpu(cpu_current_top_of_stack, cpu) =
                (unsigned long)task_stack_page(idle) + THREAD_SIZE;
 #else
-       clear_tsk_thread_flag(idle, TIF_FORK);
        initial_gs = per_cpu_offset(cpu);
 #endif
 }
-- 
2.5.5

Reply via email to