Jimi Xenidis <ji...@pobox.com> wrote: > > On Dec 7, 2012, at 7:38 AM, Jimi Xenidis <ji...@pobox.com> wrote: > > > > > On Dec 6, 2012, at 11:54 PM, Michael Neuling <mi...@neuling.org> wrote: > > > >>> commit 279c0615917b959a652e81f4ad0d886e2d426d85 > >>> Author: Jimi Xenidis <ji...@pobox.com> > >>> Date: Wed Dec 5 13:43:22 2012 -0500 > >>> > >>> powerpc/book3e: IBM Blue Gene/Q Quad Processing eXtention (QPX) > >>> > >>> This enables kernel support for the QPX extention and is intended for > >>> processors that support it, usually an IBM Blue Gene processor. > >>> Turning it on does not effect other processors but it does add code > >>> and will quadruple the per thread save and restore area for the FPU > >>> (hense the name). If you have enabled VSX it will only double the > >>> space. > >>> > >>> Signed-off-by: Jimi Xenidis <ji...@pobox.com> > >> > > <<snip>> > >> > >> > >> > >> +BEGIN_FTR_SECTION \ > >> + SAVE_32VSRS(n,c,base); \ > >> +END_FTR_SECTION_IFSET(CPU_FTR_VSX); > >> \ > >> +BEGIN_FTR_SECTION \ > >> + SAVE_32QRS(n,c,base); \ > >> +END_FTR_SECTION_IFSET(CPU_FTR_QPX); > >> > >> I don't think we want to do this. We are going to end up with 64 > >> NOPS here somewhere. > > > > Excellent point, NOPs are cheap on most processors but not A2 and a lot of > > embedded, I can wrap some branches with the FTR instead. > > Do you have a concern on the code size? > > Thought about it a bit and came up with this solution for > arch/powerpc/kernel/fpu.S. > This should address the following issues > - MSR_VSX vs MSR_VEC > - Big chunks of NOPs in the code path > - Less FTR space fixups at boot time. > - IMNHSO easier to read especially when disassembled
Indeed, I think it looks better. I was going to mention that it was already pretty complex to read, so a rewrite like this was probably needed. So thanks!! That being said, there is a pretty complex testing matrix of CONFIG_VSX/VMX/FPU/QPX/SMP/64/32 CPU_FTR/VSX/FPU/QPX/VMX so I'd need to look/test more carefully to make sure all of these are covered. Also, transactional memory (see http://lists.ozlabs.org/pipermail/linuxppc-dev/2012-November/102216.html) will change this code. You should rebase on top of that if you really want it considered for upstream. Mikey > > I did consider using the LR and BLR, but the !CONFIG_SMP case only adds one > more special block and uses a different register set. > Also if this is agreeable I would like us to consider removing the > *_32FPVSRS* macros entirely and put the FTR tests in the actual code. > This would allow us to use #ifdefs and reduce the amount of code that > actually gets compiled. > > Thoughts? > > diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S > index e0ada05..5964067 100644 > --- a/arch/powerpc/kernel/fpu.S > +++ b/arch/powerpc/kernel/fpu.S > @@ -25,30 +25,81 @@ > #include <asm/asm-offsets.h> > #include <asm/ptrace.h> > > -#ifdef CONFIG_VSX > -#define __REST_32FPVSRS(n,c,base) \ > -BEGIN_FTR_SECTION \ > - b 2f; \ > -END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ > - REST_32FPRS(n,base); \ > - b 3f; \ > -2: REST_32VSRS(n,c,base); \ > -3: > - > -#define __SAVE_32FPVSRS(n,c,base) \ > -BEGIN_FTR_SECTION \ > - b 2f; \ > -END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ > - SAVE_32FPRS(n,base); \ > - b 3f; \ > -2: SAVE_32VSRS(n,c,base); \ > -3: > -#else > -#define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) > -#define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) > -#endif > -#define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) > -#define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) > + > +/* > + * Restore subroutines, R4 is scratch and R5 is base > + */ > +vsx_restore: > + REST_32VSRS(0, __REG_R4, __REG_R5) > + b after_restore > +qpx_restore: > + REST_32QRS(0, __REG_R4, __REG_R5) > + b after_restore > +fpu_restore: > + REST_32FPRS(0, __REG_R5) > + b after_restore > + > +#define REST_32FPVSRS(n, c, base) \ > +BEGIN_FTR_SECTION \ > + b vsx_restore; \ > +END_FTR_SECTION_IFSET(CPU_FTR_VSX) \ > +BEGIN_FTR_SECTION \ > + b qpx_restore; \ > +END_FTR_SECTION_IFSET(CPU_FTR_QPX) \ > + b fpu_restore; \ > +after_restore: > + > +/* > + * Save subroutines, R4 is scratch and R3 is base > + */ > +vsx_save: > + SAVE_32VSRS(0, __REG_R4, __REG_R3) > + b after_save > +qpx_save: > + SAVE_32QRS(0, __REG_R4, __REG_R3) > + b after_save > +fpu_save: > + SAVE_32FPRS(0, __REG_R3) > + b after_save > + > +#define SAVE_32FPVSRS(n, c, base) \ > +BEGIN_FTR_SECTION \ > + b vsx_save; \ > +END_FTR_SECTION_IFSET(CPU_FTR_VSX) \ > +BEGIN_FTR_SECTION \ > + b qpx_save; \ > +END_FTR_SECTION_IFSET(CPU_FTR_QPX) \ > + b fpu_save; \ > +after_save: > + > +#ifndef CONFIG_SMP > +/* > + * we need an extra save set for the !CONFIG_SMP case, see below > + * Scratch it R5 and base is R4 > + */ > +vsx_save_nosmp: > + SAVE_32VSRS(0,R5,R4) > + b after_save_nosmp > + > +qpx_save_nosmp: > + SAVE_32QRS(0,R5,R4) > + b after_save_nosmp > + > +fpu_save_nosmp: > + SAVE_32FPRS(0,R4) > + b after_save_nosmp > + > +#define SAVE_32FPVSRS_NOSMP(n,c,base) \ > +BEGIN_FTR_SECTION \ > + b vsx_save_nosmp; \ > +END_FTR_SECTION_IFSET(CPU_FTR_VSX) \ > +BEGIN_FTR_SECTION \ > + b qpx_save_nosmp; \ > +END_FTR_SECTION_IFSET(CPU_FTR_QPX) \ > + b fpu_save_nosmp; \ > +after_save_nosmp: > + > +#endif /* !CONFIG_SMP */ > > /* > * This task wants to use the FPU now. > @@ -65,6 +116,11 @@ BEGIN_FTR_SECTION > oris r5,r5,MSR_VSX@h > END_FTR_SECTION_IFSET(CPU_FTR_VSX) > #endif > +#ifdef CONFIG_PPC_QPX > +BEGIN_FTR_SECTION > + oris r5,r5,MSR_VEC@h > +END_FTR_SECTION_IFSET(CPU_FTR_QPX) > +#endif > SYNC > MTMSRD(r5) /* enable use of fpu now */ > isync > @@ -81,7 +137,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) > beq 1f > toreal(r4) > addi r4,r4,THREAD /* want last_task_used_math->thread */ > - SAVE_32FPVSRS(0, R5, R4) > + SAVE_32FPVSRS_NOSMP(0, R5, R4) > mffs fr0 > stfd fr0,THREAD_FPSCR(r4) > PPC_LL r5,PT_REGS(r4) > @@ -94,7 +150,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) > #endif /* CONFIG_SMP */ > /* enable use of FP after return */ > #ifdef CONFIG_PPC32 > - mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) > */ > + mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */ > lwz r4,THREAD_FPEXC_MODE(r5) > ori r9,r9,MSR_FP /* enable FP for current */ > or r9,r9,r4 > @@ -132,6 +188,11 @@ BEGIN_FTR_SECTION > oris r5,r5,MSR_VSX@h > END_FTR_SECTION_IFSET(CPU_FTR_VSX) > #endif > +#ifdef CONFIG_PPC_QPX > +BEGIN_FTR_SECTION > + oris r5,r5,MSR_VEC@h > +END_FTR_SECTION_IFSET(CPU_FTR_QPX) > +#endif > SYNC_601 > ISYNC_601 > MTMSRD(r5) /* enable use of fpu now */ > @@ -148,10 +209,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX) > beq 1f > PPC_LL r4,_MSR-STACK_FRAME_OVERHEAD(r5) > li r3,MSR_FP|MSR_FE0|MSR_FE1 > -#ifdef CONFIG_VSX > +#if defined(CONFIG_VSX) || defined(CONFIG_PPC_QPX) > BEGIN_FTR_SECTION > oris r3,r3,MSR_VSX@h > -END_FTR_SECTION_IFSET(CPU_FTR_VSX) > +END_FTR_SECTION_IFSET(CPU_FTR_VSX | CPU_FTR_QPX) > #endif > andc r4,r4,r3 /* disable FP for previous task */ > PPC_STL r4,_MSR-STACK_FRAME_OVERHEAD(r5) > > -jx > > > > > >> > >> I'd like to see this patch broken into different parts. > > > > I'm not sure how _this_ patch: > > > > <https://github.com/jimix/linux-bgq/commit/279c0615917b959a652e81f4ad0d886e2d426d85> > > could be broken up, please advise. > > > >> > >> Also, have you boot tested this change on a VSX enabled box? > > > > I can try, I may bug you for help. > > Is there a commonly test (or apps) I should run? > > > > -jx > > > > > >> > >> Mikey > > > _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev