Hi Shreyas, On Tue, May 03, 2016 at 01:54:36PM +0530, Shreyas B. Prabhu wrote: > POWER ISA v3 defines a new idle processor core mechanism. In summary, > a) new instruction named stop is added. This instruction replaces > instructions like nap, sleep, rvwinkle. > b) new per thread SPR named PSSCR is added which controls the behavior > of stop instruction. > > PSSCR has following key fields > Bits 0:3 - Power-Saving Level Status. This field indicates the lowest > power-saving state the thread entered since stop instruction was last > executed. > > Bit 42 - Enable State Loss > 0 - No state is lost irrespective of other fields > 1 - Allows state loss > > Bits 44:47 - Power-Saving Level Limit > This limits the power-saving level that can be entered into. > > Bits 60:63 - Requested Level > Used to specify which power-saving level must be entered on executing > stop instruction > > This patch adds support for stop instruction and PSSCR handling. > > Signed-off-by: Shreyas B. Prabhu <shre...@linux.vnet.ibm.com>
[..snip..] > diff --git a/arch/powerpc/kernel/idle_power7.S > b/arch/powerpc/kernel/idle_power7.S > index 6a24769..d85f834 100644 > --- a/arch/powerpc/kernel/idle_power7.S > +++ b/arch/powerpc/kernel/idle_power7.S > @@ -46,7 +46,7 @@ core_idle_lock_held: > power7_enter_nap_mode: > #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE > /* Tell KVM we're napping */ > - li r4,KVM_HWTHREAD_IN_NAP > + li r4,KVM_HWTHREAD_IN_IDLE > stb r4,HSTATE_HWTHREAD_STATE(r13) > #endif > stb r3,PACA_THREAD_IDLE_STATE(r13) > diff --git a/arch/powerpc/kernel/idle_power_common.S > b/arch/powerpc/kernel/idle_power_common.S > index ff7a541..f260fa8 100644 > --- a/arch/powerpc/kernel/idle_power_common.S > +++ b/arch/powerpc/kernel/idle_power_common.S > @@ -96,11 +96,35 @@ _GLOBAL(power_powersave_common) > * back to reset vector. > */ > _GLOBAL(power7_restore_hyp_resource) > + GET_PACA(r13) > +BEGIN_FTR_SECTION_NESTED(888) > + /* > + * POWER ISA 3. Use PSSCR to determine if we > + * are waking up from deep idle state > + */ > + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) > + ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) > + > + mfspr r5,SPRN_PSSCR > + /* > + * 0-4 bits correspond to Power-Saving Level Status > + * which indicates the idle state we are waking up from > + */ > + rldicl r5,r5,4,60 > + cmpd r5,r4 > + bge power_stop_wakeup_hyp_loss > /* > + * Waking up without hypervisor state loss. Return to > + * reset vector > + */ > + blr > + > +END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300,CPU_FTR_ARCH_300,888) > + /* > + * POWER ISA 2.07 or less. > * Check if last bit of HSPGR0 is set. This indicates whether we are > * waking up from winkle. > */ > - GET_PACA(r13) > clrldi r5,r13,63 > clrrdi r13,r13,1 > cmpwi cr4,r5,1 > diff --git a/arch/powerpc/kernel/idle_power_stop.S > b/arch/powerpc/kernel/idle_power_stop.S > new file mode 100644 > index 0000000..6c86c56 > --- /dev/null > +++ b/arch/powerpc/kernel/idle_power_stop.S > @@ -0,0 +1,221 @@ > +#include <linux/threads.h> > + > +#include <asm/processor.h> > +#include <asm/cputable.h> > +#include <asm/thread_info.h> > +#include <asm/ppc_asm.h> > +#include <asm/asm-offsets.h> > +#include <asm/ppc-opcode.h> > +#include <asm/hw_irq.h> > +#include <asm/kvm_book3s_asm.h> > +#include <asm/opal.h> > +#include <asm/cpuidle.h> > +#include <asm/book3s/64/mmu-hash.h> > +#include <asm/exception-64s.h> > + > +#undef DEBUG > + > +/* > + * rA - Requested stop state > + * rB - Spare reg that can be used > + */ > +#define PSSCR_REQUEST_STATE(rA, rB) \ > + ld rB, PACA_THREAD_PSSCR(r13); \ > + or rB,rB,rA; \ > + mtspr SPRN_PSSCR, rB; \ > + > + .text > + > + .globl power_enter_stop > +power_enter_stop: > +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE > + /* Tell KVM we're napping */ > + li r4,KVM_HWTHREAD_IN_IDLE > + stb r4,HSTATE_HWTHREAD_STATE(r13) > +#endif > + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) > + ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) > + cmpd cr3,r3,r4 It is not clear what r3 is supposed to contain at this point. I think it should contain the requested stop state. But I might be wrong! Perhaps a comment above power_enter_stop can clarify that. > + bge 2f > + IDLE_STATE_ENTER_SEQ(PPC_STOP) > +2: > + lbz r7,PACA_THREAD_MASK(r13) > + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) > + > +lwarx_loop1: > + lwarx r15,0,r14 > + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT > + bnel core_idle_lock_held The definition of core_idle_lock_held below jumps to lwarx_loop2 instead of doing a blr once it observed that the LOCK_BIT is no longer set. This doesn't seem correct since the purpose of core_idle_lock_held is to spin until the LOCK_BIT is cleared and then resume whatever we were supposed to do next. Can you clarify this part ? > + andc r15,r15,r7 /* Clear thread bit */ > + > + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS > + stwcx. r15,0,r14 > + bne- lwarx_loop1 > + > + /* > + * Note all register i.e per-core, per-subcore or per-thread is saved > + * here since any thread in the core might wake up first > + */ > + mfspr r3,SPRN_RPR > + std r3,_RPR(r1) > + mfspr r3,SPRN_SPURR > + std r3,_SPURR(r1) > + mfspr r3,SPRN_PURR > + std r3,_PURR(r1) > + mfspr r3,SPRN_TSCR > + std r3,_TSCR(r1) > + mfspr r3,SPRN_DSCR > + std r3,_DSCR(r1) > + mfspr r3,SPRN_AMOR > + std r3,_AMOR(r1) > + > + IDLE_STATE_ENTER_SEQ(PPC_STOP) > + > + > +_GLOBAL(power_stop) > + PSSCR_REQUEST_STATE(r3,r4) > + li r4, 1 > + LOAD_REG_ADDR(r5,power_enter_stop) > + b power_powersave_common > + > +_GLOBAL(power_stop0) > + li r3,0 > + li r4,1 > + LOAD_REG_ADDR(r5,power_enter_stop) > + PSSCR_REQUEST_STATE(r3,r4) r4 will get clobbered at this point. Move PSSCR_REQUEST_STATE before "li r4,1". Also why cant this simply call "power_stop" having set r3 to 0 ? > + b power_powersave_common > + > +_GLOBAL(power_stop_wakeup_hyp_loss) > + ld r2,PACATOC(r13); > + ld r1,PACAR1(r13) > + /* > + * Before entering any idle state, the NVGPRs are saved in the stack > + * and they are restored before switching to the process context. Hence > + * until they are restored, they are free to be used. > + * > + * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode > + * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the > + * wakeup reason if we branch to kvm_start_guest. > + */ Retain the comment from an earlier patch explaning why LR is being cached in r17. > + mflr r17 > + mfspr r16,SPRN_SRR1 > +BEGIN_FTR_SECTION > + CHECK_HMI_INTERRUPT > +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) > + > + lbz r7,PACA_THREAD_MASK(r13) > + ld r14,PACA_CORE_IDLE_STATE_PTR(r13) > +lwarx_loop2: > + lwarx r15,0,r14 > + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT > + /* > + * Lock bit is set in one of the 2 cases- > + * a. In the stop enter path, the last thread is executing > + * fastsleep workaround code. > + * b. In the wake up path, another thread is resyncing timebase or > + * restoring context > + * In either case loop until the lock bit is cleared. > + */ > + bne core_idle_lock_held > + > + cmpwi cr2,r15,0 > + lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) > + and r4,r4,r15 > + cmpwi cr1,r4,0 /* Check if first in subcore */ > + > + or r15,r15,r7 /* Set thread bit */ > + > + beq cr1,first_thread_in_subcore > + > + /* Not first thread in subcore to wake up */ > + stwcx. r15,0,r14 > + bne- lwarx_loop2 > + isync > + b common_exit The code from lwarx_loop2 till the end of the definition of common_exit is the same as the lwarx_loop2 to common_exit in idle_power7.S. Well, except for a minor bit in the manner in which return from core_idle_lock_held is handled and the fact that we're not defining pnv_fastsleep_workaround_at_exit immediately in first_thread_in_core. I prefer the original version where core_idle_lock_held does a blr instead of explicitly jumping back to lwarx_loop2 since it can be invoked safely from multiple places. Can we move this to a common place and invoke it from these two places instead of duplicating the code ? > + > +core_idle_lock_held: > + HMT_LOW > +core_idle_lock_loop: > + lwz r15,0(14) > + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT > + bne core_idle_lock_loop > + HMT_MEDIUM > + b lwarx_loop2 > + > +first_thread_in_subcore: > + /* First thread in subcore to wakeup */ > + ori r15,r15,PNV_CORE_IDLE_LOCK_BIT > + stwcx. r15,0,r14 > + bne- lwarx_loop2 > + isync > + > + /* > + * If waking up from sleep, subcore state is not lost. Hence > + * skip subcore state restore > + */ > + bne cr4,subcore_state_restored > + > + /* Restore per-subcore state */ > + ld r4,_RPR(r1) > + mtspr SPRN_RPR,r4 > + ld r4,_AMOR(r1) > + mtspr SPRN_AMOR,r4 > + > +subcore_state_restored: > + /* > + * Check if the thread is also the first thread in the core. If not, > + * skip to clear_lock. > + */ > + bne cr2,clear_lock > + > +first_thread_in_core: I suppose we don't need the pnv_fastsleep_workaround_at_exit at this point anymore. > + > +timebase_resync: > + /* Do timebase resync if we are waking up from sleep. Use cr3 value > + * set in exceptions-64s.S */ > + ble cr3,clear_lock > + /* Time base re-sync */ > + li r0,OPAL_RESYNC_TIMEBASE > + bl opal_call_realmode; > + > + /* > + * If waking up from sleep, per core state is not lost, skip to > + * clear_lock. > + */ > + bne cr4,clear_lock > + > + /* Restore per core state */ > + ld r4,_TSCR(r1) > + mtspr SPRN_TSCR,r4 > + > +clear_lock: > + andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS > + lwsync > + stw r15,0(r14) > + > +common_exit: > + /* > + * Common to all threads. > + * > + * If waking up from sleep, hypervisor state is not lost. Hence > + * skip hypervisor state restore. > + */ > + bne cr4,hypervisor_state_restored > + > + /* Waking up from deep idle state */ > + > + /* Restore per thread state */ > + bl __restore_cpu_power8 > + > + ld r4,_SPURR(r1) > + mtspr SPRN_SPURR,r4 > + ld r4,_PURR(r1) > + mtspr SPRN_PURR,r4 > + ld r4,_DSCR(r1) > + mtspr SPRN_DSCR,r4 > + > +hypervisor_state_restored: > + > + mtspr SPRN_SRR1,r16 > + mtlr r17 > + blr [..snip..] > @@ -264,6 +275,30 @@ static int __init pnv_init_idle_states(void) > goto out_free; > } > > + if (cpu_has_feature(CPU_FTR_ARCH_300)) { > + psscr_val = kcalloc(dt_idle_states, sizeof(*psscr_val), > + GFP_KERNEL); Need to handle the case whe the kcalloc fails to allocate memory for psscr_val here. > + if (of_property_read_u64_array(power_mgt, > + "ibm,cpu-idle-state-psscr", > + psscr_val, dt_idle_states)) { > + pr_warn("cpuidle-powernv: missing > ibm,cpu-idle-states-psscr in DT\n"); > + goto out_free_psscr; > + } The remainder of the patch looks ok. -- Thanks and Regards gautham. _______________________________________________ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev