On 02/25/2015 01:48 PM, Denys Vlasenko wrote:
> On Wed, Feb 25, 2015 at 9:53 AM, Ingo Molnar <mi...@kernel.org> wrote:
>> But the fix should be to not touch RSP in SAVE_ARGS, to
>> keep percpu::kernel_stack as an optimized entry point -
>> with KERNEL_STACK_OFFSET pointing to.
>>
>> So NAK - this should be fixed for real.
> 
> IOW, the proposal is to set KERNEL_STACK_OFFSET
> to SIZEOF_PTREGS. I can do that.
> 
> However.
> 
> There is an ortogonal idea we were discussing: to save
> registers and construct iret frame using PUSH insns, not MOVs.
> IIRC Andy and Linus liked it. I am ambivalent: the code will be smaller,
> but might get slower (at least on some CPUs).
> If we go that way, we will require KERNEL_STACK_OFFSET = 0
> (IOW: the current patch).
> 
> The decision on how exactly we should fix KERNEL_STACK_OFFSET
> (set it to SIZEOF_PTREGS or to zero) depends on whether
> we switch to using PUSHes, or not. What do you think?

A data point. I implemented push-based creation of pt_regs
and benchmarked it. The patch is on top of all my latest
patches sent to ML.

On SandyBridge CPU, it does not get slower: seems to be 1 cycle
faster per syscall.

We lose a number of large insns there:

    text           data     bss     dec     hex filename
-   9863              0       0    9863    2687 entry_64.o
+   9671              0       0    9671    25c7 entry_64.o


diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index f505cb6..d97bd92 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -128,8 +128,6 @@ ENDPROC(native_usergs_sysret64)
  * manipulation.
  */
        .macro FIXUP_TOP_OF_STACK tmp offset=0
-       movq $__USER_DS,SS+\offset(%rsp)
-       movq $__USER_CS,CS+\offset(%rsp)
        movq RIP+\offset(%rsp),\tmp  /* get rip */
        movq \tmp,RCX+\offset(%rsp)  /* copy it to rcx as sysret would do */
        movq EFLAGS+\offset(%rsp),\tmp /* ditto for rflags->r11 */
@@ -245,14 +243,22 @@ GLOBAL(system_call_after_swapgs)
         * and short:
         */
        ENABLE_INTERRUPTS(CLBR_NONE)
-       ALLOC_PT_GPREGS_ON_STACK 6*8 /* 6*8: space for orig_ax and iret frame */
-       movq    %rcx,RIP(%rsp)
-       movq    %r11,EFLAGS(%rsp)
-       movq    PER_CPU_VAR(old_rsp),%rcx
-       movq    %rcx,RSP(%rsp)
-       movq_cfi rax,ORIG_RAX
-       SAVE_C_REGS_EXCEPT_RAX_RCX_R11
-       movq    $-ENOSYS,RAX(%rsp)
+       /* Construct struct pt_regs on stack */
+       pushq   $__USER_DS              /* pt_regs->ss */
+       pushq   PER_CPU_VAR(old_rsp)    /* pt_regs->sp */
+       pushq   %r11                    /* pt_regs->flags */
+       pushq   $__USER_CS              /* pt_regs->cs */
+       pushq   %rcx                    /* pt_regs->ip */
+       pushq   %rax                    /* pt_regs->orig_ax */
+       pushq   %rdi                    /* pt_regs->di */
+       pushq   %rsi                    /* pt_regs->si */
+       pushq   %rdx                    /* pt_regs->dx */
+       pushq   %rcx                    /* pt_regs->cx */
+       pushq   $-ENOSYS                /* pt_regs->ax */
+       pushq   %r8                     /* pt_regs->r8 */
+       pushq   %r9                     /* pt_regs->r9 */
+       pushq   %r10                    /* pt_regs->r10 */
+       sub     $(7*8),%rsp /* pt_regs->r11,bp,bx,r12-15 */
        CFI_REL_OFFSET rip,RIP
        testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,SIZEOF_PTREGS)
        jnz tracesys


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to