Hi, While looking at traces of kernel workloads, I noticed places where gcc used a large number of non volatiles. Some of these functions did very little work, and we spent most of our time saving the non volatiles to the stack and reading them back.
It made me wonder if we have the right ratio of volatile to non volatile GPRs. Since the kernel is completely self contained, we could potentially change that ratio. Attached is a quick hack to gcc and the kernel to decrease the number of non volatile GPRs to 8. I'm not sure if this is a good idea (and if the volatile to non volatile ratio is right), but this gives us something to play with. Anton
powerpc: Reduce the number of non volatiles GPRs to 8 This requires a hacked gcc. Signed-off-by: Anton Blanchard <an...@samba.org> -- Index: linux.junk/arch/powerpc/include/asm/exception-64s.h =================================================================== --- linux.junk.orig/arch/powerpc/include/asm/exception-64s.h +++ linux.junk/arch/powerpc/include/asm/exception-64s.h @@ -336,6 +336,7 @@ do_kvm_##n: \ std r2,GPR2(r1); /* save r2 in stackframe */ \ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ + SAVE_10GPRS(14, r1); \ mflr r9; /* Get LR, later save to stack */ \ ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ std r9,_LINK(r1); \ Index: linux.junk/arch/powerpc/include/asm/ppc_asm.h =================================================================== --- linux.junk.orig/arch/powerpc/include/asm/ppc_asm.h +++ linux.junk/arch/powerpc/include/asm/ppc_asm.h @@ -77,8 +77,8 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLP #ifdef __powerpc64__ #define SAVE_GPR(n, base) std n,GPR0+8*(n)(base) #define REST_GPR(n, base) ld n,GPR0+8*(n)(base) -#define SAVE_NVGPRS(base) SAVE_8GPRS(14, base); SAVE_10GPRS(22, base) -#define REST_NVGPRS(base) REST_8GPRS(14, base); REST_10GPRS(22, base) +#define SAVE_NVGPRS(base) SAVE_8GPRS(24, base) +#define REST_NVGPRS(base) REST_8GPRS(24, base) #else #define SAVE_GPR(n, base) stw n,GPR0+4*(n)(base) #define REST_GPR(n, base) lwz n,GPR0+4*(n)(base) Index: linux.junk/arch/powerpc/kernel/asm-offsets.c =================================================================== --- linux.junk.orig/arch/powerpc/kernel/asm-offsets.c +++ linux.junk/arch/powerpc/kernel/asm-offsets.c @@ -289,7 +289,6 @@ int main(void) DEFINE(GPR11, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[11])); DEFINE(GPR12, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[12])); DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13])); -#ifndef CONFIG_PPC64 DEFINE(GPR14, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[14])); DEFINE(GPR15, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[15])); DEFINE(GPR16, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[16])); @@ -308,7 +307,6 @@ int main(void) DEFINE(GPR29, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[29])); DEFINE(GPR30, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[30])); DEFINE(GPR31, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[31])); -#endif /* CONFIG_PPC64 */ /* * Note: these symbols include _ because they overlap with special * register names Index: linux.junk/arch/powerpc/kernel/entry_64.S =================================================================== --- linux.junk.orig/arch/powerpc/kernel/entry_64.S +++ linux.junk/arch/powerpc/kernel/entry_64.S @@ -86,6 +86,18 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) std r11,_XER(r1) std r11,_CTR(r1) std r9,GPR13(r1) + + std r14,GPR14(r1) + std r15,GPR15(r1) + std r16,GPR16(r1) + std r17,GPR17(r1) + std r18,GPR18(r1) + std r19,GPR19(r1) + std r20,GPR20(r1) + std r21,GPR21(r1) + std r22,GPR22(r1) + std r23,GPR23(r1) + mflr r10 /* * This clears CR0.SO (bit 28), which is the error indication on @@ -112,6 +124,7 @@ BEGIN_FW_FTR_SECTION cmpd cr1,r11,r10 beq+ cr1,33f bl accumulate_stolen_time + trap REST_GPR(0,r1) REST_4GPRS(3,r1) REST_2GPRS(7,r1) @@ -225,7 +238,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECK ACCOUNT_CPU_USER_EXIT(r11, r12) HMT_MEDIUM_LOW_HAS_PPR ld r13,GPR13(r1) /* only restore r13 if returning to usermode */ -1: ld r2,GPR2(r1) +1: + REST_10GPRS(14, r1) + ld r2,GPR2(r1) ld r1,GPR1(r1) mtlr r4 mtcr r5 @@ -405,10 +420,10 @@ _GLOBAL(ret_from_fork) _GLOBAL(ret_from_kernel_thread) bl schedule_tail REST_NVGPRS(r1) - mtlr r14 - mr r3,r15 + mtlr r24 + mr r3,r25 #if defined(_CALL_ELF) && _CALL_ELF == 2 - mr r12,r14 + mr r12,r24 #endif blrl li r3,0 @@ -540,8 +555,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEG mtcrf 0xFF,r6 /* r3-r13 are destroyed -- Cort */ - REST_8GPRS(14, r1) - REST_10GPRS(22, r1) + REST_8GPRS(24, r1) /* convert old thread to its task_struct for return value */ addi r3,r3,-THREAD @@ -771,6 +785,7 @@ fast_exception_return: mtspr SPRN_XER,r4 REST_8GPRS(5, r1) + REST_10GPRS(14, r1) andi. r0,r3,MSR_RI beq- unrecov_restore Index: linux.junk/arch/powerpc/kernel/process.c =================================================================== --- linux.junk.orig/arch/powerpc/kernel/process.c +++ linux.junk/arch/powerpc/kernel/process.c @@ -1207,12 +1207,12 @@ int copy_thread(unsigned long clone_flag childregs->gpr[1] = sp + sizeof(struct pt_regs); /* function */ if (usp) - childregs->gpr[14] = ppc_function_entry((void *)usp); + childregs->gpr[24] = ppc_function_entry((void *)usp); #ifdef CONFIG_PPC64 clear_tsk_thread_flag(p, TIF_32BIT); childregs->softe = 1; #endif - childregs->gpr[15] = kthread_arg; + childregs->gpr[25] = kthread_arg; p->thread.regs = NULL; /* no user register state */ ti->flags |= _TIF_RESTOREALL; f = ret_from_kernel_thread;
powerpc: Reduce the number of non volatiles GPRs to 8 A quick hack to test this change on the Linux kernel. Signed-off-by: Anton Blanchard <an...@samba.org> -- Index: gcc/gcc/config/rs6000/rs6000.h =================================================================== --- gcc.orig/gcc/config/rs6000/rs6000.h +++ gcc/gcc/config/rs6000/rs6000.h @@ -1017,8 +1017,8 @@ enum data_align { align_abi, align_opt, Aside from that, you can include as many other registers as you like. */ #define CALL_USED_REGISTERS \ - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, FIXED_R13, 0, 0, \ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, FIXED_R13, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, \ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, \ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, \ @@ -1039,8 +1039,8 @@ enum data_align { align_abi, align_opt, of `CALL_USED_REGISTERS'. */ #define CALL_REALLY_USED_REGISTERS \ - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, FIXED_R13, 0, 0, \ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, FIXED_R13, 1, 1, \ + 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, \ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, \ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \ 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, \ @@ -1058,7 +1058,7 @@ enum data_align { align_abi, align_opt, #define FIRST_SAVED_ALTIVEC_REGNO (FIRST_ALTIVEC_REGNO+20) #define FIRST_SAVED_FP_REGNO (14+32) -#define FIRST_SAVED_GP_REGNO (FIXED_R13 ? 14 : 13) +#define FIRST_SAVED_GP_REGNO 24 /* List the order in which to allocate registers. Each register must be listed once, even those in FIXED_REGISTERS. @@ -1124,8 +1124,8 @@ enum data_align { align_abi, align_opt, MAYBE_R2_AVAILABLE \ 9, 10, 8, 7, 6, 5, 4, \ 3, EARLY_R12 11, 0, \ - 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, \ - 18, 17, 16, 15, 14, 13, LATE_R12 \ + 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, \ + 31, 30, 29, 28, 27, 26, 25, 24, 13, LATE_R12 \ 66, 65, \ 1, MAYBE_R2_FIXED 67, 76, \ /* AltiVec registers. */ \
_______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev