On Thu, 2014-11-27 at 17:48 +0530, Madhavan Srinivasan wrote: > This patch create the infrastructure to handle the CR based > local_* atomic operations. Local atomic operations are fast > and highly reentrant per CPU counters. Used for percpu > variable updates. Local atomic operations only guarantee > variable modification atomicity wrt the CPU which owns the > data and these needs to be executed in a preemption safe way. > > Here is the design of this patch. Since local_* operations > are only need to be atomic to interrupts (IIUC), patch uses > one of the Condition Register (CR) fields as a flag variable. When > entering the local_*, specific bit in the CR5 field is set > and on exit, bit is cleared. CR bit checking is done in the > interrupt return path. If CR5[EQ] bit set and if we return > to kernel, we reset to start of local_* operation. > > Reason for this approach is that, currently l[w/d]arx/st[w/d]cx. > instruction pair is used for local_* operations, which are heavy > on cycle count and they dont support a local variant. So to > see whether the new implementation helps, used a modified > version of Rusty's benchmark code on local_t. > > https://lkml.org/lkml/2008/12/16/450 > > Modifications: > - increated the working set size from 1MB to 8MB, > - removed cpu_local_inc test. > > Test ran > - on POWER8 1S Scale out System 2.0GHz > - on OPAL v3 with v3.18-rc4 patch kernel as Host > > Here are the values with the patch. > > Time in ns per iteration > > inc add read add_return > atomic_long 67 67 18 69 > irqsave/rest 39 39 23 39 > trivalue 39 39 29 49 > local_t 26 26 24 26 > > Since CR5 is used as a flag, have added CFLAGS to avoid CR5 > for the kernel compilation and CR5 is zeroed at the kernel > entry. > > Tested the patch in a > - pSeries LPAR, > - Host with patched/unmodified guest kernel > > To check whether userspace see any CR5 corruption, ran a simple > test which does, > - set CR5 field, > - while(1) > - sleep or gettimeofday > - chk bit set > > Signed-off-by: Madhavan Srinivasan <ma...@linux.vnet.ibm.com> > --- > - I really appreciate feedback on the patchset. > - Kindly comment if I should try with any other benchmark or > workload to check the numbers. > - Also, kindly recommand any know stress test for CR > > Makefile | 6 ++ > arch/powerpc/include/asm/exception-64s.h | 21 +++++- > arch/powerpc/kernel/entry_64.S | 106 > ++++++++++++++++++++++++++++++- > arch/powerpc/kernel/exceptions-64s.S | 2 +- > arch/powerpc/kernel/head_64.S | 8 +++ > 5 files changed, 138 insertions(+), 5 deletions(-) > > diff --git a/Makefile b/Makefile > index 00d618b..2e271ad 100644 > --- a/Makefile > +++ b/Makefile > @@ -706,6 +706,12 @@ endif > > KBUILD_CFLAGS += $(call cc-option, -fno-var-tracking-assignments) > > +ifdef CONFIG_PPC64 > +# We need this flag to force compiler not to use CR5, since > +# local_t type code is based on this. > +KBUILD_CFLAGS += -ffixed-cr5 > +endif > + > ifdef CONFIG_DEBUG_INFO > ifdef CONFIG_DEBUG_INFO_SPLIT > KBUILD_CFLAGS += $(call cc-option, -gsplit-dwarf, -g) > diff --git a/arch/powerpc/include/asm/exception-64s.h > b/arch/powerpc/include/asm/exception-64s.h > index 77f52b2..c42919a 100644 > --- a/arch/powerpc/include/asm/exception-64s.h > +++ b/arch/powerpc/include/asm/exception-64s.h > @@ -306,7 +306,26 @@ do_kvm_##n: > \ > std r10,0(r1); /* make stack chain pointer */ \ > std r0,GPR0(r1); /* save r0 in stackframe */ \ > std r10,GPR1(r1); /* save r1 in stackframe */ \ > - beq 4f; /* if from kernel mode */ \ > +BEGIN_FTR_SECTION; \ > + lis r9,4096; /* Create a mask with HV and PR */ \ > + rldicr r9,r9,32,31; /* bits, AND with the MSR */ \ > + mr r10,r9; /* to check for Hyp state */ \ > + ori r9,r9,16384; \ > + and r9,r12,r9; \ > + cmpd cr3,r10,r9; > \ > + beq cr3,66f; /* Jump if we come from Hyp mode*/ \ > + mtcrf 0x04,r10; /* Clear CR5 if coming from usr */ \ > +FTR_SECTION_ELSE; \
Can't we just unconditionally clear at as long as we do that after we've saved it ? In that case, it's just a matter for the fixup code to check the saved version rather than the actual CR.. > + beq 4f; /* if kernel mode branch */ \ > + li r10,0; /* Clear CR5 incase of coming */ \ > + mtcrf 0x04,r10; /* from user. */ \ > + nop; /* This part of code is for */ \ > + nop; /* kernel with MSR[HV]=0, */ \ > + nop; /* MSR[PR]=0, so just chk for */ \ > + nop; /* MSR[PR] */ \ > + nop; \ > +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE); \ > +66: beq 4f; /* if from kernel mode */ \ > ACCOUNT_CPU_USER_ENTRY(r9, r10); \ > SAVE_PPR(area, r9, r10); \ > 4: EXCEPTION_PROLOG_COMMON_2(area) \ > diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S > index 0905c8d..e42bb99 100644 > --- a/arch/powerpc/kernel/entry_64.S > +++ b/arch/powerpc/kernel/entry_64.S > @@ -68,7 +68,26 @@ system_call_common: > 2: std r2,GPR2(r1) > std r3,GPR3(r1) > mfcr r2 > - std r4,GPR4(r1) > +BEGIN_FTR_SECTION > + lis r10,4096 > + rldicr r10,r10,32,31 > + mr r11,r10 > + ori r10,r10,16384 > + and r10,r12,r10 > + cmpd r11,r10 > + beq 67f > + mtcrf 0x04,r11 > +FTR_SECTION_ELSE > + beq 67f > + li r11,0 > + mtcrf 0x04,r11 > + nop > + nop > + nop > + nop > + nop > +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) > +67: std r4,GPR4(r1) > std r5,GPR5(r1) > std r6,GPR6(r1) > std r7,GPR7(r1) > @@ -224,8 +243,26 @@ syscall_exit: > BEGIN_FTR_SECTION > stdcx. r0,0,r1 /* to clear the reservation */ > END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) > +BEGIN_FTR_SECTION > + lis r4,4096 > + rldicr r4,r4,32,31 > + mr r6,r4 > + ori r4,r4,16384 > + and r4,r8,r4 > + cmpd cr3,r6,r4 > + beq cr3,65f > + mtcr r5 > +FTR_SECTION_ELSE > andi. r6,r8,MSR_PR > - ld r4,_LINK(r1) > + beq 65f > + mtcr r5 > + nop > + nop > + nop > + nop > + nop > +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) > +65: ld r4,_LINK(r1) > > beq- 1f > ACCOUNT_CPU_USER_EXIT(r11, r12) > @@ -234,7 +271,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) > 1: ld r2,GPR2(r1) > ld r1,GPR1(r1) > mtlr r4 > +#ifdef CONFIG_PPC64 > + mtcrf 0xFB,r5 > +#else > mtcr r5 > +#endif > mtspr SPRN_SRR0,r7 > mtspr SPRN_SRR1,r8 > RFI > @@ -804,7 +845,66 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) > */ > .globl fast_exception_return > fast_exception_return: > - ld r3,_MSR(r1) > + > + /* > + * Now that we are about to exit from interrupt, lets check for > + * cr5 eq bit. If it is set, then we may be in the middle of > + * local_t update. In this case, we should rewind the NIP > + * accordingly. > + */ > + mfcr r3 > + andi. r4,r3,0x200 > + beq 63f > + > + /* > + * Now that the bit is set, lets check for return to User > + */ > + ld r4,_MSR(r1) > +BEGIN_FTR_SECTION > + li r3,4096 > + rldicr r3,r3,32,31 > + mr r5,r3 > + ori r3,r3,16384 > + and r3,r4,r3 > + cmpd r5,r3 > + bne 63f > +FTR_SECTION_ELSE > + andi. r3,r4,MSR_PR > + bne 63f > + nop > + nop > + nop > + nop > + nop > +ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) > + > + /* > + * Looks like we are returning to Kernel, so > + * lets get the NIP and search the ex_table. > + * Change the NIP based on the return value > + */ > +lookup_ex_table: > + ld r3,_NIP(r1) > + bl search_exception_tables > + cmpli 0,1,r3,0 > + bne 62f > + > + /* > + * This is a panic case. Reason is that, we > + * have the CR5 bit set, but we are not in > + * local_* code and we are returning to Kernel. > + */ > + ld r3,_NIP(r1) > + mfcr r4 > + EMIT_BUG_ENTRY lookup_ex_table, __FILE__,__LINE__,BUGFLAG_WARNING > + > + /* > + * Now save the return fixup address as NIP > + */ > +62: ld r4,8(r3) > + std r4,_NIP(r1) > + crclr 22 > +63: ld r3,_MSR(r1) > ld r4,_CTR(r1) > ld r0,_LINK(r1) > mtctr r4 > diff --git a/arch/powerpc/kernel/exceptions-64s.S > b/arch/powerpc/kernel/exceptions-64s.S > index 72e783e..edb75a9 100644 > --- a/arch/powerpc/kernel/exceptions-64s.S > +++ b/arch/powerpc/kernel/exceptions-64s.S > @@ -637,7 +637,7 @@ masked_##_H##interrupt: > \ > rldicl r10,r10,48,1; /* clear MSR_EE */ \ > rotldi r10,r10,16; \ > mtspr SPRN_##_H##SRR1,r10; \ > -2: mtcrf 0x80,r9; \ > +2: mtcrf 0x90,r9; \ > ld r9,PACA_EXGEN+EX_R9(r13); \ > ld r10,PACA_EXGEN+EX_R10(r13); \ > ld r11,PACA_EXGEN+EX_R11(r13); \ > diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S > index d48125d..02e49b3 100644 > --- a/arch/powerpc/kernel/head_64.S > +++ b/arch/powerpc/kernel/head_64.S > @@ -347,6 +347,14 @@ __mmu_off: > * > */ > __start_initialization_multiplatform: > + > + /* > + * Before we do anything, lets clear CR5 field, > + * so that we will have a clean start at entry > + */ > + li r11,0 > + mtcrf 0x04,r11 > + > /* Make sure we are running in 64 bits mode */ > bl enable_64b_mode > _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev