On Thu, 2012-09-13 at 18:36 +0800, tiejun.chen wrote: > On 09/12/2012 06:38 PM, Benjamin Herrenschmidt wrote: > > On Wed, 2012-09-12 at 16:55 +0800, tiejun.chen wrote: > >>> to worry about nor stack frame to create etc... > >> > >> If you don't like this v4, let me know and then I can go back memcpy > >> for next > >> version. > > > > Just open code the whole copy. It should be easy really. As I said, you > > have the src and dst already in registers and you know they are aligned, > > so just put the size of the frame in a register (divided by 4), do an > > mtctr and do a little load_update/store_update loop to do the copy, all > > in the asm. > > Is the following Okay?
Well, why did you bother with the flushes ? One of the main reason I wasn't too happy with hijacking copy_and_flush is that ... you really don't need to bother about flushing the cache :-) The flush in that routine is about copying kernel code around and making sure the I/D caches stay in sync. Cheers, Ben. > --- > arch/powerpc/kernel/entry_32.S | 55 > +++++++++++++++++++++++++++++++++++----- > arch/powerpc/kernel/entry_64.S | 45 ++++++++++++++++++++++++++++++++ > 2 files changed, 94 insertions(+), 6 deletions(-) > > diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S > index ead5016..3b56bba 100644 > --- a/arch/powerpc/kernel/entry_32.S > +++ b/arch/powerpc/kernel/entry_32.S > @@ -32,6 +32,7 @@ > #include <asm/unistd.h> > #include <asm/ftrace.h> > #include <asm/ptrace.h> > +#include <asm/cache.h> > > #undef SHOW_SYSCALLS > #undef SHOW_SYSCALLS_TASK > @@ -831,19 +832,63 @@ restore_user: > bnel- load_dbcr0 > #endif > > -#ifdef CONFIG_PREEMPT > b restore > > /* N.B. the only way to get here is from the beq following ret_from_except. > */ > resume_kernel: > - /* check current_thread_info->preempt_count */ > + /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ > CURRENT_THREAD_INFO(r9, r1) > + lwz r8,TI_FLAGS(r9) > + andis. r8,r8,_TIF_EMULATE_STACK_STORE@h > + beq+ 1f > + > + addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ > + > + lwz r3,GPR1(r1) > + subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception > frame */ > + mr r4,r1 /* src: current exception frame */ > + li r5,INT_FRAME_SIZE /* size: INT_FRAME_SIZE */ > + li r6,0 /* start offset: 0 */ > + mr r1,r3 /* Reroute the trampoline frame to r1 */ > + > + /* Copy from the original to the trampoline. */ > + addi r5,r5,-4 > + addi r6,r6,-4 > +4: li r0,L1_CACHE_BYTES/4 > + mtctr r0 > +3: addi r6,r6,4 /* copy a cache line */ > + lwzx r0,r6,r4 > + stwx r0,r6,r3 > + bdnz 3b > + dcbst r6,r3 /* write it to memory */ > + sync > + cmplw 0,r6,r5 > + blt 4b > + > + /* Do real store operation to complete stwu */ > + lwz r5,GPR1(r1) > + stw r8,0(r5) > + > + /* Clear _TIF_EMULATE_STACK_STORE flag */ > + lis r11,_TIF_EMULATE_STACK_STORE@h > + addi r5,r9,TI_FLAGS > +0: lwarx r8,0,r5 > + andc r8,r8,r11 > +#ifdef CONFIG_IBM405_ERR77 > + dcbt 0,r5 > +#endif > + stwcx. r8,0,r5 > + bne- 0b > +1: > + > +#ifdef CONFIG_PREEMPT > + /* check current_thread_info->preempt_count */ > lwz r0,TI_PREEMPT(r9) > cmpwi 0,r0,0 /* if non-zero, just restore regs and return */ > bne restore > - lwz r0,TI_FLAGS(r9) > - andi. r0,r0,_TIF_NEED_RESCHED > + andi. r8,r8,_TIF_NEED_RESCHED > beq+ restore > + lwz r3,_MSR(r1) > andi. r0,r3,MSR_EE /* interrupts off? */ > beq restore /* don't schedule if so */ > #ifdef CONFIG_TRACE_IRQFLAGS > @@ -864,8 +909,6 @@ resume_kernel: > */ > bl trace_hardirqs_on > #endif > -#else > -resume_kernel: > #endif /* CONFIG_PREEMPT */ > > /* interrupts are hard-disabled at this point */ > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S > index b40e0b4..cc43b64 100644 > --- a/arch/powerpc/kernel/entry_64.S > +++ b/arch/powerpc/kernel/entry_64.S > @@ -593,6 +593,51 @@ _GLOBAL(ret_from_except_lite) > b .ret_from_except > > resume_kernel: > + /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ > + CURRENT_THREAD_INFO(r9, r1) > + ld r8,TI_FLAGS(r9) > + andis. r8,r8,_TIF_EMULATE_STACK_STORE@h > + beq+ 1f > + > + addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ > + > + lwz r3,GPR1(r1) > + subi r3,r3,INT_FRAME_SIZE /* dst: Allocate a trampoline exception > frame */ > + mr r4,r1 /* src: current exception frame */ > + li r5,INT_FRAME_SIZE /* size: INT_FRAME_SIZE */ > + li r6,0 /* start offset: 0 */ > + mr r1,r3 /* Reroute the trampoline frame to r1 */ > + > + /* Copy from the original to the trampoline. */ > + addi r5,r5,-8 > + addi r6,r6,-8 > +4: li r0,8 > + mtctr r0 > +3: addi r6,r6,8 /* copy a cache line */ > + ldx r0,r6,r4 > + stdx r0,r6,r3 > + bdnz 3b > + dcbst r6,r3 /* write it to memory */ > + sync > + cmpld 0,r6,r5 > + blt 4b > + sync > + > + bl .copy_and_flush > + > + /* Do real store operation to complete stwu */ > + lwz r5,GPR1(r1) > + std r8,0(r5) > + > + /* Clear _TIF_EMULATE_STACK_STORE flag */ > + lis r11,_TIF_EMULATE_STACK_STORE@h > + addi r5,r9,TI_FLAGS > + ldarx r4,0,r5 > + andc r4,r4,r11 > + stdcx. r4,0,r5 > + bne- 0b > +1: > + > #ifdef CONFIG_PREEMPT > /* Check if we need to preempt */ > andi. r0,r4,_TIF_NEED_RESCHED _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev