Hi,

Hit with below kernel crash during Power8 Host boot with this patch series on 
top
of powerpc merge branch commit 
https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/commit/?h=merge&id=6a821ffee18a6e6c0027c523fa8c958df98ca361

built with ppc64le_defconfig

Host Console log:
[    0.454666] EEH: PCI Enhanced I/O Error Handling Enabled
[    0.456524] create_dump_obj: New platform dump. ID = 0x4 Size 7457968
[    0.457627] opal-power: OPAL EPOW, DPO support detected.
[    0.457722] BUG: Unable to handle kernel data access at 0xffffffffff76184a
[    0.457733] Faulting instruction address: 0xc00000000001a94c
[    0.457740] Oops: Kernel access of bad area, sig: 11 [#1]
[    0.457745] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
[    0.457750] Modules linked in:
[    0.457756] CPU: 58 PID: 0 Comm: swapper/58 Not tainted 5.1.0-rc2-gd0ae6c548 
#1
[    0.457762] NIP:  c00000000001a94c LR: c0000000000a6e9c CTR: c000000000008000
[    0.457768] REGS: c000000f272b7b50 TRAP: 0380   Not tainted  
(5.1.0-rc2-gd0ae6c548)
[    0.457773] MSR:  9000000000001033 <SF,HV,ME,IR,DR,RI,LE>  CR: 24004222  
XER: 00000000
[    0.457781] CFAR: c0000000000a6e98 IRQMASK: 1 
[    0.457781] GPR00: c0000000000a6e9c c000000f272b7de0 0000000000000004 
0000000000000006 
[    0.457781] GPR04: c0000000000a5dd4 0000000024004222 c000000f272b7d48 
0000000000000001 
[    0.457781] GPR08: 0000000000000002 ffffffffff761844 c000000f27250c00 
0000c3feb1676be1 
[    0.457781] GPR12: 0000000000004400 c000000ffff9d380 c000000ffe60ff90 
0000000000000000 
[    0.457781] GPR16: 0000000000000000 0000000000000000 c00000000004b4d0 
c00000000004b4a0 
[    0.457781] GPR20: c000000001526214 0000000000000800 0000000000000001 
c000000001521b78 
[    0.457781] GPR24: 000000000000003a 0000000000000000 0000000000080000 
0000000000000000 
[    0.457781] GPR28: c000000001526140 0000000000000001 0400000000000000 
c000000001525ce0 
[    0.457829] NIP [c00000000001a94c] irq_set_pending_from_srr1+0x1c/0x50
[    0.457835] LR [c0000000000a6e9c] power7_idle+0x3c/0x50
[    0.457839] Call Trace:
[    0.457843] [c000000f272b7de0] [c0000000000a6e98] power7_idle+0x38/0x50 
(unreliable)
[    0.457849] [c000000f272b7e00] [c0000000000210f4] arch_cpu_idle+0x54/0x160
[    0.457856] [c000000f272b7e30] [c000000000c47bc4] default_idle_call+0x74/0x88
[    0.457862] [c000000f272b7e50] [c000000000158f54] do_idle+0x2f4/0x3d0
[    0.457868] [c000000f272b7ec0] [c000000000159288] cpu_startup_entry+0x38/0x40
[    0.457874] [c000000f272b7ef0] [c00000000004dae4] start_secondary+0x654/0x680
[    0.457881] [c000000f272b7f90] [c00000000000b25c] 
start_secondary_prolog+0x10/0x14
[    0.457886] Instruction dump:
[    0.457890] 992d098b 7c630034 5463d97e 4e800020 60000000 3c4c014d 38424dd0 
7c0802a6 
[    0.457898] 60000000 3d22ff76 78637722 39291840 
[    0.457900] BUG: Unable to handle kernel data access at 0xffffffffff76184a
[    0.457901] <7d4918ae> 2b8a00ff 419e001c 892d098b 
[    0.457907] Faulting instruction address: 0xc00000000001a94c
[    0.457910] BUG: Unable to handle kernel data access at 0xffffffffff76184a
[    0.457915] ---[ end trace fa7343cfd21c8798 ]---
[    0.457919] Faulting instruction address: 0xc00000000001a94c
[    0.458961] BUG: Unable to handle kernel data access at 0xffffffffff76184a
[    0.458963] BUG: Unable to handle kernel data access at 0xffffffffff76184a
[    0.458964] BUG: Unable to handle kernel data access at 0xffffffffff76184a
[    0.458966] BUG: Unable to handle kernel data access at 0xffffffffff76184a
[    0.458968] BUG: Unable to handle kernel data access at 0xffffffffff76184a
[    0.458970] BUG: Unable to handle kernel data access at 0xffffffffff76184a
[    0.458972] Faulting instruction address: 0xc00000000001a94c
[    0.458973] Faulting instruction address: 0xc00000000001a94c
[    0.458974] Faulting instruction address: 0xc00000000001a94c
[    0.458975] Faulting instruction address: 0xc00000000001a94c
[    0.458976] Faulting instruction address: 0xc00000000001a94c
[    0.458978] initcall 
__machine_initcall_powernv_pnv_init_idle_states+0x0/0xb30 returned 0 after 0 
usecs
[    0.458981] calling  __machine_initcall_powernv_opal_time_init+0x0/0x150 @ 1
[    0.458982] Faulting instruction address: 0xc00000000001a94c
[    0.459022] BUG: Unable to handle kernel data access at 0xffffffffff76184a
[    0.459040] Faulting instruction address: 0xc00000000001a94c
[    0.459043] initcall __machine_initcall_powernv_opal_time_init+0x0/0x150 
returned 0 after 0 usecs
[    0.459044] BUG: Unable to handle kernel data access at 0xffffffffff76184c
[    0.459045] Faulting instruction address: 0xc00000000001a94c
[    0.459060] calling  __machine_initcall_powernv_rng_init+0x0/0x334 @ 1
[    0.459084] powernv-rng: Registering arch random hook.
[    0.459141] BUG: Unable to handle kernel data access at 0xffffffffff76184a
[    0.459147] Faulting instruction address: 0xc00000000001a94c
[    0.459191] BUG: Unable to handle kernel data access at 0xffffffffff76184a
[    0.459199] Faulting instruction address: 0xc00000000001a94c
[    0.459216] BUG: Unable to handle kernel data access at 0xffffffffff76184a
[    0.459224] Faulting instruction address: 0xc00000000001a94c
[    0.459228] BUG: Unable to handle kernel data access at 0xffffffffff76184a
[    0.459234] Faulting instruction address: 0xc00000000001a94c
[    0.459268] BUG: Unable to handle kernel data access at 0xffffffffff76184a
[    0.459275] Faulting instruction address: 0xc00000000001a94c
[    0.459375] 
[    0.459380] Oops: Kernel access of bad area, sig: 11 [#2]
[    0.459385] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA PowerNV
[    0.459390] Modules linked in:
[    0.459395] CPU: 63 PID: 0 Comm: swapper/63 Tainted: G      D           
5.1.0-rc2-gd0ae6c548 #1
[    0.459401] NIP:  c00000000001a94c LR: c0000000000a6e9c CTR: c000000000008000
[    0.459407] REGS: c000000f272a3b50 TRAP: 0380   Tainted: G      D            
(5.1.0-rc2-gd0ae6c548)
[    0.459414] MSR:  9000000000001033 <SF,HV,ME,IR,DR,RI,LE>  CR: 24004222  
XER: 00000000
[    0.459419] BUG: Unable to handle kernel data access at 0xffffffffff76184c
[    0.459422] CFAR: c0000000000a6e98 IRQMASK: 1 
[    0.459422] GPR00: c0000000000a6e9c c000000f272a3de0 0000000000000004 
0000000000000006 
[    0.459422] GPR04: c0000000000a5dd4 0000000024004222 c000000f272a3d48 
0000000000000001 
[    0.459422] GPR08: 0000000000000007 ffffffffff761844 c000000f27244e00 
0000c3feb18a5128 
[    0.459422] GPR12: 0000000000004400 c000000ffff99080 c000000ffe623f90 
0000000000000000 
[    0.459422] GPR16: 0000000000000000 0000000000000000 c00000000004b4d0 
c00000000004b4a0 
[    0.459422] GPR20: c000000001526214 0000000000000800 0000000000000001 
c000000001521b78 
[    0.459422] GPR24: 000000000000003f 0000000000000000 0000000000080000 
0000000000000000 
[    0.459422] GPR28: c000000001526140 0000000000000001 8000000000000000 
c000000001525ce0 
[    0.459443] NIP [c00000000001a94c] irq_set_pending_from_srr1+0x1c/0x50
[    0.459449] Faulting instruction address: 0xc00000000001a94c
[    0.459483] LR [c0000000000a6e9c] power7_idle+0x3c/0x50
[    0.459485] Call Trace:
[    0.459490] initcall __machine_initcall_powernv_rng_init+0x0/0x334 returned 
0 after 0 usecs
[    0.459493] calling  __machine_initcall_pseries_init_ras_IRQ+0x0/0xf4 @ 1
[    0.459497] [c000000f272a3de0] [c0000000000a6e98] power7_idle+0x38/0x50 
(unreliable)
[    0.459500] [c000000f272a3e00] [c0000000000210f4] arch_cpu_idle+0x54/0x160
[    0.459503] [c000000f272a3e30] [c000000000c47bc4] default_idle_call+0x74/0x88
[    0.459507] initcall __machine_initcall_pseries_init_ras_IRQ+0x0/0xf4 
returned 0 after 0 usecs
[    0.459510] calling  __machine_initcall_pseries_rng_init+0x0/0xa4 @ 1
[    0.459514] [c000000f272a3e50] [c000000000158f54] do_idle+0x2f4/0x3d0
[    0.459518] [c000000f272a3ec0] [c000000000159288] cpu_startup_entry+0x38/0x40
[    0.459523] initcall __machine_initcall_pseries_rng_init+0x0/0xa4 returned 0 
after 0 usecs
[    0.459527] [c000000f272a3ef0] [c00000000004dae4] start_secondary+0x654/0x680
[    0.459531] [c000000f272a3f90] [c00000000000b25c] 
start_secondary_prolog+0x10/0x14
[    0.459535] calling  __machine_initcall_pseries_ioei_init+0x0/0xd8 @ 1
[    0.459539] Instruction dump:
[    0.459542] 992d098b 7c630034 5463d97e 4e800020 60000000 3c4c014d 38424dd0 
7c0802a6 
[    0.459549] initcall __machine_initcall_pseries_ioei_init+0x0/0xd8 returned 
0 after 0 usecs
[    0.459553] 60000000 3d22ff76 78637722 39291840 <7d4918ae> 2b8a00ff 419e001c 
892d098b 
[    0.459559] calling  uid_cache_init+0x0/0x108 @ 1
[    0.459564] ---[ end trace fa7343cfd21c8799 ]---
[    0.459574] initcall uid_cache_init+0x0/0x108 returned 0 after 0 usecs
[    0.459576] calling  param_sysfs_init+0x0/0x248 @ 1


Regards,
-Satheesh.
 
On Mon, Apr 08, 2019 at 04:34:30PM +1000, Nicholas Piggin wrote:
> Reimplement Book3S idle code in C, moving POWER7/8/9 implementation
> speific HV idle code to the powernv platform code.
> 
> Book3S assembly stubs are kept in common code and used only to save
> the stack frame and non-volatile GPRs before executing architected
> idle instructions, and restoring the stack and reloading GPRs then
> returning to C after waking from idle.
> 
> The complex logic dealing with threads and subcores, locking, SPRs,
> HMIs, timebase resync, etc., is all done in C which makes it more
> maintainable.
> 
> This is not a strict translation to C code, there are some
> significant differences:
> 
> - Idle wakeup no longer uses the ->cpu_restore call to reinit SPRs,
>   but saves and restores them itself.
> 
> - The optimisation where EC=ESL=0 idle modes did not have to save GPRs
>   or change MSR is restored, because it's now simple to do. ESL=1
>   sleeps that do not lose GPRs can use this optimization too.
> 
> - KVM secondary entry and cede is now more of a call/return style
>   rather than branchy. nap_state_lost is not required because KVM
>   always returns via NVGPR restoring path.
> 
> - KVM secondary wakeup from offline sequence is moved entirely into
>   the offline wakeup, which avoids a hwsync in the normal idle wakeup
>   path.
> 
> Reviewed-by: Gautham R. Shenoy <e...@linux.vnet.ibm.com>
> Signed-off-by: Nicholas Piggin <npig...@gmail.com>
> 
> Notes:
> - The KVM code has been significantly changed and now actually boots a
>   HPT on radix guest with dependent threads mode and >0 secondaries.
>   With previous iterations my test wasn't actually catching this case
>   and there were some obvious bugs.
> 
>   I've broken the KVM code into the second patch just for review. The
>   first patch makes KVM kind-of work following its existing design.
>   The main thing that's missing from it is deep idle states that lose
>   SPRs on the secondaries don't restore them if it's a KVM request
>   wakeup. But you can run guests with deep idle states disabled.
>   Rather than a significant rework of the code to make that work with
>   the new idle code that would need testing, which then gets undone,
>   I have just broken it up like this for hopefully easier review of
>   the KVM parts. Patches can be squashed together before upstream merge.
> 
> - There's so many combinations of KVM modes and options I could use more
>   help with review and testing.
> 
> - This is not ported up to powerpc next yet.
> 
> - P9 restores some of the PMU SPRs, but not others, and P8 only zeroes
>   them. There are improvmets to be made to SPR save restore policies and
>   documentation, but this first pass tries to keep things as they were.
> 
> Left to do:
> - Test actual POWER7 hardware.
> 
> - More KVM testing and review.
> 
> - Port to powerpc next.
> 
> Since RFC v1:
> - Now tested and working with POWER9 hash and radix.
> - KVM support added. This took a bit of work to untangle and might
>   still have some issues, but POWER9 seems to work including hash on
>   radix with dependent threads mode.
> - This snowballed a bit because of KVM and other details making it
>   not feasible to leave POWER7/8 code alone. That's only half done
>   at the moment.
> - So far this trades about 800 lines of asm for 500 of C. With POWER7/8
>   support done it might be another hundred or so lines of C.
> 
> Since RFC v2:
> - Fixed deep state SLB reloading
> - Now tested and working with POWER8.
> - Accounted for most feedback.
> 
> Since RFC v3:
> - Rebased to powerpc merge + idle state bugfix
> - Split SLB flush/restore code out and shared with MCE code (pseries
>   MCE patches can also use).
> - More testing on POWER8 including KVM with secondaries.
> - Performance testing looks good. EC=ESL=0 is about 5% faster, other
>   stop states look a little faster too.
> - Adjusted SPR saving to handler POWER7, haven't tested it.
> 
> Since v1:
> - More review comments from Gautham.
> - Rename isa3_ to isa300_ prefix.
> - Tinkered with some comments, copyright notice, changelog.
> - Cede and regular idle do not go via KVM secondary wakeup code path,
>   so hwthread_state stores and barriers can be simplified, and some
>   KVM code paths simplified a little.
> 
> Since v2:
> - Rebase, SLB reload patch has been merged.
> - More testing. Tested machine check idle wakeup path with mambo stepping
>   through instructions.
> 
> Since v3:
> - Build fixes caught by CI
> 
> Since v4:
> - PSSCR test PLS rather than RL (Akshay)
> 
> Since v5:
> - Fix TB loss test to use PLS instead of RL as well
> - Rename hv_loss variable to spr_loss to better describe its usage
> - Clamp the SPR loss level to shallower of SPR loss or TB loss in case
>   future CPU has that behaviour (P8 type behaviour).
> - Added a few more comments.
> 
> Since v6:
> - Comment improvements
> - Remove the restore_cpu() simplification. Now that restore_cpu is not
>   called from idle, it can be simplified, however it's not required so
>   leave that to a future patch, to avoid risking change to boot/kexec
>   paths.
> - Actually use the stack red zone rather than pt_regs beyond it to save
>   GPRs. A MCE or SRESET while saving regs (that runs with MSR[RI]=1 on
>   P9) would have trashed our saved regs.
> 
> Since v7:
> - Hopefully fix KVM dependent threads mode.
> - Split KVM patch out.
> ---
>  arch/powerpc/include/asm/cpuidle.h       |   19 +-
>  arch/powerpc/include/asm/paca.h          |   41 +-
>  arch/powerpc/include/asm/processor.h     |    9 +-
>  arch/powerpc/include/asm/reg.h           |    8 +-
>  arch/powerpc/kernel/asm-offsets.c        |   17 -
>  arch/powerpc/kernel/exceptions-64s.S     |   21 +-
>  arch/powerpc/kernel/idle_book3s.S        | 1053 +++-------------------
>  arch/powerpc/kernel/setup-common.c       |    4 +-
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S  |    6 +-
>  arch/powerpc/platforms/powernv/idle.c    |  843 +++++++++++++----
>  arch/powerpc/platforms/powernv/subcore.c |    2 +-
>  arch/powerpc/xmon/xmon.c                 |   25 +-
>  12 files changed, 902 insertions(+), 1146 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/cpuidle.h 
> b/arch/powerpc/include/asm/cpuidle.h
> index 43e5f31fe64d..9844b3ded187 100644
> --- a/arch/powerpc/include/asm/cpuidle.h
> +++ b/arch/powerpc/include/asm/cpuidle.h
> @@ -27,10 +27,11 @@
>   * the THREAD_WINKLE_BITS are set, which indicate which threads have not
>   * yet woken from the winkle state.
>   */
> -#define PNV_CORE_IDLE_LOCK_BIT                       0x10000000
> +#define NR_PNV_CORE_IDLE_LOCK_BIT            28
> +#define PNV_CORE_IDLE_LOCK_BIT                       (1ULL << 
> NR_PNV_CORE_IDLE_LOCK_BIT)
> 
> +#define PNV_CORE_IDLE_WINKLE_COUNT_SHIFT     16
>  #define PNV_CORE_IDLE_WINKLE_COUNT           0x00010000
> -#define PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT   0x00080000
>  #define PNV_CORE_IDLE_WINKLE_COUNT_BITS              0x000F0000
>  #define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT       8
>  #define PNV_CORE_IDLE_THREAD_WINKLE_BITS     0x0000FF00
> @@ -68,16 +69,6 @@
>  #define ERR_DEEP_STATE_ESL_MISMATCH  -2
> 
>  #ifndef __ASSEMBLY__
> -/* Additional SPRs that need to be saved/restored during stop */
> -struct stop_sprs {
> -     u64 pid;
> -     u64 ldbar;
> -     u64 fscr;
> -     u64 hfscr;
> -     u64 mmcr1;
> -     u64 mmcr2;
> -     u64 mmcra;
> -};
> 
>  #define PNV_IDLE_NAME_LEN    16
>  struct pnv_idle_states_t {
> @@ -92,10 +83,6 @@ struct pnv_idle_states_t {
> 
>  extern struct pnv_idle_states_t *pnv_idle_states;
>  extern int nr_pnv_idle_states;
> -extern u32 pnv_fastsleep_workaround_at_entry[];
> -extern u32 pnv_fastsleep_workaround_at_exit[];
> -
> -extern u64 pnv_first_deep_stop_state;
> 
>  unsigned long pnv_cpu_offline(unsigned int cpu);
>  int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags);
> diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
> index e843bc5d1a0f..e55dedd7ee3e 100644
> --- a/arch/powerpc/include/asm/paca.h
> +++ b/arch/powerpc/include/asm/paca.h
> @@ -173,8 +173,8 @@ struct paca_struct {
>       u8 irq_happened;                /* irq happened while soft-disabled */
>       u8 io_sync;                     /* writel() needs spin_unlock sync */
>       u8 irq_work_pending;            /* IRQ_WORK interrupt while 
> soft-disable */
> -     u8 nap_state_lost;              /* NV GPR values lost in power7_idle */
>  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +     u8 nap_state_lost;              /* NV GPR values lost in power7_idle */
>       u8 pmcregs_in_use;              /* pseries puts this in lppaca */
>  #endif
>       u64 sprg_vdso;                  /* Saved user-visible sprg */
> @@ -183,23 +183,28 @@ struct paca_struct {
>  #endif
> 
>  #ifdef CONFIG_PPC_POWERNV
> -     /* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
> -     u32 *core_idle_state_ptr;
> -     u8 thread_idle_state;           /* PNV_THREAD_RUNNING/NAP/SLEEP */
> -     /* Mask to indicate thread id in core */
> -     u8 thread_mask;
> -     /* Mask to denote subcore sibling threads */
> -     u8 subcore_sibling_mask;
> -     /* Flag to request this thread not to stop */
> -     atomic_t dont_stop;
> -     /* The PSSCR value that the kernel requested before going to stop */
> -     u64 requested_psscr;
> -
> -     /*
> -      * Save area for additional SPRs that need to be
> -      * saved/restored during cpuidle stop.
> -      */
> -     struct stop_sprs stop_sprs;
> +     /* PowerNV idle fields */
> +     /* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */
> +     unsigned long idle_state;
> +     union {
> +             /* P7/P8 specific fields */
> +             struct {
> +                     /* PNV_THREAD_RUNNING/NAP/SLEEP */
> +                     u8 thread_idle_state;
> +                     /* Mask to denote subcore sibling threads */
> +                     u8 subcore_sibling_mask;
> +             };
> +
> +             /* P9 specific fields */
> +             struct {
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +                     /* The PSSCR value that the kernel requested before 
> going to stop */
> +                     u64 requested_psscr;
> +                     /* Flag to request this thread not to stop */
> +                     atomic_t dont_stop;
> +#endif
> +             };
> +     };
>  #endif
> 
>  #ifdef CONFIG_PPC_BOOK3S_64
> diff --git a/arch/powerpc/include/asm/processor.h 
> b/arch/powerpc/include/asm/processor.h
> index 3351bcf42f2d..3120cca72e1f 100644
> --- a/arch/powerpc/include/asm/processor.h
> +++ b/arch/powerpc/include/asm/processor.h
> @@ -411,14 +411,17 @@ static inline unsigned long get_clean_sp(unsigned long 
> sp, int is_32)
>  }
>  #endif
> 
> +/* asm stubs */
> +extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
> +extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
> +extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
> +
>  extern unsigned long cpuidle_disable;
>  enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
> 
>  extern int powersave_nap;    /* set if nap mode can be used in idle loop */
> -extern unsigned long power7_idle_insn(unsigned long type); /* 
> PNV_THREAD_NAP/etc*/
> +
>  extern void power7_idle_type(unsigned long type);
> -extern unsigned long power9_idle_stop(unsigned long psscr_val);
> -extern unsigned long power9_offline_stop(unsigned long psscr_val);
>  extern void power9_idle_type(unsigned long stop_psscr_val,
>                             unsigned long stop_psscr_mask);
> 
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index c5b2aff0ce8e..10caa145f98b 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -168,6 +168,7 @@
>  #define PSSCR_ESL            0x00200000 /* Enable State Loss */
>  #define PSSCR_SD             0x00400000 /* Status Disable */
>  #define PSSCR_PLS    0xf000000000000000 /* Power-saving Level Status */
> +#define PSSCR_PLS_SHIFT      60
>  #define PSSCR_GUEST_VIS      0xf0000000000003ffUL /* Guest-visible PSSCR 
> fields */
>  #define PSSCR_FAKE_SUSPEND   0x00000400 /* Fake-suspend bit (P9 DD2.2) */
>  #define PSSCR_FAKE_SUSPEND_LG        10         /* Fake-suspend bit position 
> */
> @@ -758,10 +759,9 @@
>  #define        SRR1_WAKERESET        0x00100000 /* System reset */
>  #define   SRR1_WAKEHDBELL    0x000c0000 /* Hypervisor doorbell on P8 */
>  #define        SRR1_WAKESTATE        0x00030000 /* Powersave exit mask 
> [46:47] */
> -#define        SRR1_WS_DEEPEST       0x00030000 /* Some resources not 
> maintained,
> -                                       * may not be recoverable */
> -#define        SRR1_WS_DEEPER        0x00020000 /* Some resources not 
> maintained */
> -#define        SRR1_WS_DEEP          0x00010000 /* All resources maintained 
> */
> +#define        SRR1_WS_HVLOSS        0x00030000 /* HV resources not 
> maintained */
> +#define        SRR1_WS_GPRLOSS       0x00020000 /* GPRs not maintained */
> +#define        SRR1_WS_NOLOSS        0x00010000 /* All resources maintained 
> */
>  #define   SRR1_PROGTM                0x00200000 /* TM Bad Thing */
>  #define   SRR1_PROGFPE               0x00100000 /* Floating Point Enabled */
>  #define   SRR1_PROGILL               0x00080000 /* Illegal instruction */
> diff --git a/arch/powerpc/kernel/asm-offsets.c 
> b/arch/powerpc/kernel/asm-offsets.c
> index 86a61e5f8285..167a59fda12e 100644
> --- a/arch/powerpc/kernel/asm-offsets.c
> +++ b/arch/powerpc/kernel/asm-offsets.c
> @@ -766,23 +766,6 @@ int main(void)
>       OFFSET(VCPU_TIMING_LAST_ENTER_TBL, kvm_vcpu, 
> arch.timing_last_enter.tv32.tbl);
>  #endif
> 
> -#ifdef CONFIG_PPC_POWERNV
> -     OFFSET(PACA_CORE_IDLE_STATE_PTR, paca_struct, core_idle_state_ptr);
> -     OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state);
> -     OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask);
> -     OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
> -     OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
> -     OFFSET(PACA_DONT_STOP, paca_struct, dont_stop);
> -#define STOP_SPR(x, f)       OFFSET(x, paca_struct, stop_sprs.f)
> -     STOP_SPR(STOP_PID, pid);
> -     STOP_SPR(STOP_LDBAR, ldbar);
> -     STOP_SPR(STOP_FSCR, fscr);
> -     STOP_SPR(STOP_HFSCR, hfscr);
> -     STOP_SPR(STOP_MMCR1, mmcr1);
> -     STOP_SPR(STOP_MMCR2, mmcr2);
> -     STOP_SPR(STOP_MMCRA, mmcra);
> -#endif
> -
>       DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
>       DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE);
> 
> diff --git a/arch/powerpc/kernel/exceptions-64s.S 
> b/arch/powerpc/kernel/exceptions-64s.S
> index a5b8fbae56a0..7d54cfa5ca84 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -144,8 +144,11 @@ TRAMP_KVM(PACA_EXNMI, 0x100)
> 
>  #ifdef CONFIG_PPC_P7_NAP
>  EXC_COMMON_BEGIN(system_reset_idle_common)
> -     mfspr   r12,SPRN_SRR1
> -     b       pnv_powersave_wakeup
> +     mfspr   r3,SPRN_SRR1
> +#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +     bltlr   cr3     /* no state loss, return to idle caller */
> +#endif
> +     b       idle_return_gpr_loss
>  #endif
> 
>  /*
> @@ -427,17 +430,19 @@ EXC_COMMON_BEGIN(machine_check_idle_common)
>        * Then decrement MCE nesting after finishing with the stack.
>        */
>       ld      r3,_MSR(r1)
> +     ld      r4,_LINK(r1)
> 
>       lhz     r11,PACA_IN_MCE(r13)
>       subi    r11,r11,1
>       sth     r11,PACA_IN_MCE(r13)
> 
> -     /* Turn off the RI bit because SRR1 is used by idle wakeup code. */
> -     /* Recoverability could be improved by reducing the use of SRR1. */
> -     li      r11,0
> -     mtmsrd  r11,1
> -
> -     b       pnv_powersave_wakeup_mce
> +     mtlr    r4
> +     rlwinm  r10,r3,47-31,30,31
> +     cmpwi   cr3,r10,2
> +#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +     bltlr   cr3     /* no state loss, return to idle caller */
> +#endif
> +     b       idle_return_gpr_loss
>  #endif
>       /*
>        * Handle machine check early in real mode. We come here with
> diff --git a/arch/powerpc/kernel/idle_book3s.S 
> b/arch/powerpc/kernel/idle_book3s.S
> index 7f5ac2e8581b..af002b82145d 100644
> --- a/arch/powerpc/kernel/idle_book3s.S
> +++ b/arch/powerpc/kernel/idle_book3s.S
> @@ -1,519 +1,98 @@
>  /*
> - *  This file contains idle entry/exit functions for POWER7,
> - *  POWER8 and POWER9 CPUs.
> + *  Copyright 2018, IBM Corporation.
>   *
>   *  This program is free software; you can redistribute it and/or
>   *  modify it under the terms of the GNU General Public License
>   *  as published by the Free Software Foundation; either version
>   *  2 of the License, or (at your option) any later version.
> + *
> + *  This file contains general idle entry/exit functions to save
> + *  and restore stack and NVGPRs which allows C code to call idle
> + *  states that lose GPRs, and it will return transparently with
> + *  SRR1 wakeup reason return value.
> + *
> + *  The platform / CPU caller must ensure SPRs and any other non-GPR
> + *  state is saved and restored correctly, handle KVM, interrupts, etc.
>   */
> 
> -#include <linux/threads.h>
> -#include <asm/processor.h>
> -#include <asm/page.h>
> -#include <asm/cputable.h>
> -#include <asm/thread_info.h>
>  #include <asm/ppc_asm.h>
>  #include <asm/asm-offsets.h>
>  #include <asm/ppc-opcode.h>
> -#include <asm/hw_irq.h>
> -#include <asm/kvm_book3s_asm.h>
> -#include <asm/opal.h>
>  #include <asm/cpuidle.h>
> -#include <asm/exception-64s.h>
> -#include <asm/book3s/64/mmu-hash.h>
> -#include <asm/mmu.h>
> -#include <asm/asm-compat.h>
> -#include <asm/feature-fixups.h>
> -
> -#undef DEBUG
> -
> -/*
> - * Use unused space in the interrupt stack to save and restore
> - * registers for winkle support.
> - */
> -#define _MMCR0       GPR0
> -#define _SDR1        GPR3
> -#define _PTCR        GPR3
> -#define _RPR GPR4
> -#define _SPURR       GPR5
> -#define _PURR        GPR6
> -#define _TSCR        GPR7
> -#define _DSCR        GPR8
> -#define _AMOR        GPR9
> -#define _WORT        GPR10
> -#define _WORC        GPR11
> -#define _LPCR        GPR12
> -
> -#define PSSCR_EC_ESL_MASK_SHIFTED          (PSSCR_EC | PSSCR_ESL) >> 16
> -
> -     .text
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +#include <asm/kvm_book3s_asm.h>
> +#endif
> 
>  /*
> - * Used by threads before entering deep idle states. Saves SPRs
> - * in interrupt stack frame
> - */
> -save_sprs_to_stack:
> -     /*
> -      * Note all register i.e per-core, per-subcore or per-thread is saved
> -      * here since any thread in the core might wake up first
> -      */
> -BEGIN_FTR_SECTION
> -     /*
> -      * Note - SDR1 is dropped in Power ISA v3. Hence not restoring
> -      * SDR1 here
> -      */
> -     mfspr   r3,SPRN_PTCR
> -     std     r3,_PTCR(r1)
> -     mfspr   r3,SPRN_LPCR
> -     std     r3,_LPCR(r1)
> -FTR_SECTION_ELSE
> -     mfspr   r3,SPRN_SDR1
> -     std     r3,_SDR1(r1)
> -ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
> -     mfspr   r3,SPRN_RPR
> -     std     r3,_RPR(r1)
> -     mfspr   r3,SPRN_SPURR
> -     std     r3,_SPURR(r1)
> -     mfspr   r3,SPRN_PURR
> -     std     r3,_PURR(r1)
> -     mfspr   r3,SPRN_TSCR
> -     std     r3,_TSCR(r1)
> -     mfspr   r3,SPRN_DSCR
> -     std     r3,_DSCR(r1)
> -     mfspr   r3,SPRN_AMOR
> -     std     r3,_AMOR(r1)
> -     mfspr   r3,SPRN_WORT
> -     std     r3,_WORT(r1)
> -     mfspr   r3,SPRN_WORC
> -     std     r3,_WORC(r1)
> -/*
> - * On POWER9, there are idle states such as stop4, invoked via cpuidle,
> - * that lose hypervisor resources. In such cases, we need to save
> - * additional SPRs before entering those idle states so that they can
> - * be restored to their older values on wakeup from the idle state.
> + * Desired PSSCR in r3
>   *
> - * On POWER8, the only such deep idle state is winkle which is used
> - * only in the context of CPU-Hotplug, where these additional SPRs are
> - * reinitiazed to a sane value. Hence there is no need to save/restore
> - * these SPRs.
> - */
> -BEGIN_FTR_SECTION
> -     blr
> -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
> -
> -power9_save_additional_sprs:
> -     mfspr   r3, SPRN_PID
> -     mfspr   r4, SPRN_LDBAR
> -     std     r3, STOP_PID(r13)
> -     std     r4, STOP_LDBAR(r13)
> -
> -     mfspr   r3, SPRN_FSCR
> -     mfspr   r4, SPRN_HFSCR
> -     std     r3, STOP_FSCR(r13)
> -     std     r4, STOP_HFSCR(r13)
> -
> -     mfspr   r3, SPRN_MMCRA
> -     mfspr   r4, SPRN_MMCR0
> -     std     r3, STOP_MMCRA(r13)
> -     std     r4, _MMCR0(r1)
> -
> -     mfspr   r3, SPRN_MMCR1
> -     mfspr   r4, SPRN_MMCR2
> -     std     r3, STOP_MMCR1(r13)
> -     std     r4, STOP_MMCR2(r13)
> -     blr
> -
> -power9_restore_additional_sprs:
> -     ld      r3,_LPCR(r1)
> -     ld      r4, STOP_PID(r13)
> -     mtspr   SPRN_LPCR,r3
> -     mtspr   SPRN_PID, r4
> -
> -     ld      r3, STOP_LDBAR(r13)
> -     ld      r4, STOP_FSCR(r13)
> -     mtspr   SPRN_LDBAR, r3
> -     mtspr   SPRN_FSCR, r4
> -
> -     ld      r3, STOP_HFSCR(r13)
> -     ld      r4, STOP_MMCRA(r13)
> -     mtspr   SPRN_HFSCR, r3
> -     mtspr   SPRN_MMCRA, r4
> -
> -     ld      r3, _MMCR0(r1)
> -     ld      r4, STOP_MMCR1(r13)
> -     mtspr   SPRN_MMCR0, r3
> -     mtspr   SPRN_MMCR1, r4
> -
> -     ld      r3, STOP_MMCR2(r13)
> -     ld      r4, PACA_SPRG_VDSO(r13)
> -     mtspr   SPRN_MMCR2, r3
> -     mtspr   SPRN_SPRG3, r4
> -     blr
> -
> -/*
> - * Used by threads when the lock bit of core_idle_state is set.
> - * Threads will spin in HMT_LOW until the lock bit is cleared.
> - * r14 - pointer to core_idle_state
> - * r15 - used to load contents of core_idle_state
> - * r9  - used as a temporary variable
> + * No state will be lost regardless of wakeup mechanism (interrupt or NIA).
> + *
> + * An EC=0 type wakeup will return with a value of 0. SRESET wakeup (which 
> can
> + * happen with xscom SRESET and possibly MCE) may clobber volatiles except 
> LR,
> + * and must blr, to return to caller with r3 set according to caller's 
> expected
> + * return code (for Book3S/64 that is SRR1).
>   */
> -
> -core_idle_lock_held:
> -     HMT_LOW
> -3:   lwz     r15,0(r14)
> -     andis.  r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
> -     bne     3b
> -     HMT_MEDIUM
> -     lwarx   r15,0,r14
> -     andis.  r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
> -     bne-    core_idle_lock_held
> +_GLOBAL(isa300_idle_stop_noloss)
> +     mtspr   SPRN_PSSCR,r3
> +     PPC_STOP
> +     li      r3,0
>       blr
> 
>  /*
> - * Pass requested state in r3:
> - *   r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8
> - *      - Requested PSSCR value in POWER9
> + * Desired PSSCR in r3
> + *
> + * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
> + * The SRESET wakeup returns to this function's caller by calling
> + * idle_return_gpr_loss with r3 set to desired return value.
> + *
> + * A wakeup without GPR loss may alteratively be handled as in
> + * isa300_idle_stop_noloss and blr directly, as an optimisation.
>   *
> - * Address of idle handler to branch to in realmode in r4
> + * The caller is responsible for saving/restoring SPRs, MSR, timebase,
> + * etc.
>   */
> -pnv_powersave_common:
> -     /* Use r3 to pass state nap/sleep/winkle */
> -     /* NAP is a state loss, we create a regs frame on the
> -      * stack, fill it up with the state we care about and
> -      * stick a pointer to it in PACAR1. We really only
> -      * need to save PC, some CR bits and the NV GPRs,
> -      * but for now an interrupt frame will do.
> -      */
> -     mtctr   r4
> -
> -     mflr    r0
> -     std     r0,16(r1)
> -     stdu    r1,-INT_FRAME_SIZE(r1)
> -     std     r0,_LINK(r1)
> -     std     r0,_NIP(r1)
> -
> -     /* We haven't lost state ... yet */
> -     li      r0,0
> -     stb     r0,PACA_NAPSTATELOST(r13)
> -
> -     /* Continue saving state */
> -     SAVE_GPR(2, r1)
> -     SAVE_NVGPRS(r1)
> -     mfcr    r5
> -     std     r5,_CCR(r1)
> +_GLOBAL(isa300_idle_stop_mayloss)
> +     mtspr   SPRN_PSSCR,r3
>       std     r1,PACAR1(r13)
> -
> -BEGIN_FTR_SECTION
> -     /*
> -      * POWER9 does not require real mode to stop, and presently does not
> -      * set hwthread_state for KVM (threads don't share MMU context), so
> -      * we can remain in virtual mode for this.
> -      */
> -     bctr
> -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
> -     /*
> -      * POWER8
> -      * Go to real mode to do the nap, as required by the architecture.
> -      * Also, we need to be in real mode before setting hwthread_state,
> -      * because as soon as we do that, another thread can switch
> -      * the MMU context to the guest.
> -      */
> -     LOAD_REG_IMMEDIATE(r7, MSR_IDLE)
> -     mtmsrd  r7,0
> -     bctr
> -
> -/*
> - * This is the sequence required to execute idle instructions, as
> - * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
> - */
> -#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST)                        \
> -     /* Magic NAP/SLEEP/WINKLE mode enter sequence */        \
> -     std     r0,0(r1);                                       \
> -     ptesync;                                                \
> -     ld      r0,0(r1);                                       \
> -236: cmpd    cr0,r0,r0;                                      \
> -     bne     236b;                                           \
> -     IDLE_INST;
> -
> -
> -     .globl pnv_enter_arch207_idle_mode
> -pnv_enter_arch207_idle_mode:
> -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> -     /* Tell KVM we're entering idle */
> -     li      r4,KVM_HWTHREAD_IN_IDLE
> -     /******************************************************/
> -     /*  N O T E   W E L L    ! ! !    N O T E   W E L L   */
> -     /* The following store to HSTATE_HWTHREAD_STATE(r13)  */
> -     /* MUST occur in real mode, i.e. with the MMU off,    */
> -     /* and the MMU must stay off until we clear this flag */
> -     /* and test HSTATE_HWTHREAD_REQ(r13) in               */
> -     /* pnv_powersave_wakeup in this file.                 */
> -     /* The reason is that another thread can switch the   */
> -     /* MMU to a guest context whenever this flag is set   */
> -     /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on,    */
> -     /* that would potentially cause this thread to start  */
> -     /* executing instructions from guest memory in        */
> -     /* hypervisor mode, leading to a host crash or data   */
> -     /* corruption, or worse.                              */
> -     /******************************************************/
> -     stb     r4,HSTATE_HWTHREAD_STATE(r13)
> -#endif
> -     stb     r3,PACA_THREAD_IDLE_STATE(r13)
> -     cmpwi   cr3,r3,PNV_THREAD_SLEEP
> -     bge     cr3,2f
> -     IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
> -     /* No return */
> -2:
> -     /* Sleep or winkle */
> -     lbz     r7,PACA_THREAD_MASK(r13)
> -     ld      r14,PACA_CORE_IDLE_STATE_PTR(r13)
> -     li      r5,0
> -     beq     cr3,3f
> -     lis     r5,PNV_CORE_IDLE_WINKLE_COUNT@h
> -3:
> -lwarx_loop1:
> -     lwarx   r15,0,r14
> -
> -     andis.  r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
> -     bnel-   core_idle_lock_held
> -
> -     add     r15,r15,r5                      /* Add if winkle */
> -     andc    r15,r15,r7                      /* Clear thread bit */
> -
> -     andi.   r9,r15,PNV_CORE_IDLE_THREAD_BITS
> -
> -/*
> - * If cr0 = 0, then current thread is the last thread of the core entering
> - * sleep. Last thread needs to execute the hardware bug workaround code if
> - * required by the platform.
> - * Make the workaround call unconditionally here. The below branch call is
> - * patched out when the idle states are discovered if the platform does not
> - * require it.
> - */
> -.global pnv_fastsleep_workaround_at_entry
> -pnv_fastsleep_workaround_at_entry:
> -     beq     fastsleep_workaround_at_entry
> -
> -     stwcx.  r15,0,r14
> -     bne-    lwarx_loop1
> -     isync
> -
> -common_enter: /* common code for all the threads entering sleep or winkle */
> -     bgt     cr3,enter_winkle
> -     IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
> -
> -fastsleep_workaround_at_entry:
> -     oris    r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
> -     stwcx.  r15,0,r14
> -     bne-    lwarx_loop1
> -     isync
> -
> -     /* Fast sleep workaround */
> -     li      r3,1
> -     li      r4,1
> -     bl      opal_config_cpu_idle_state
> -
> -     /* Unlock */
> -     xoris   r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
> -     lwsync
> -     stw     r15,0(r14)
> -     b       common_enter
> -
> -enter_winkle:
> -     bl      save_sprs_to_stack
> -
> -     IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
> -
> -/*
> - * r3 - PSSCR value corresponding to the requested stop state.
> - */
> -power_enter_stop:
> -/*
> - * Check if we are executing the lite variant with ESL=EC=0
> - */
> -     andis.   r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
> -     clrldi   r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
> -     bne      .Lhandle_esl_ec_set
> +     mflr    r4
> +     mfcr    r5
> +     /* use stack red zone rather than a new frame for saving regs */
> +     std     r2,-8*0(r1)
> +     std     r14,-8*1(r1)
> +     std     r15,-8*2(r1)
> +     std     r16,-8*3(r1)
> +     std     r17,-8*4(r1)
> +     std     r18,-8*5(r1)
> +     std     r19,-8*6(r1)
> +     std     r20,-8*7(r1)
> +     std     r21,-8*8(r1)
> +     std     r22,-8*9(r1)
> +     std     r23,-8*10(r1)
> +     std     r24,-8*11(r1)
> +     std     r25,-8*12(r1)
> +     std     r26,-8*13(r1)
> +     std     r27,-8*14(r1)
> +     std     r28,-8*15(r1)
> +     std     r29,-8*16(r1)
> +     std     r30,-8*17(r1)
> +     std     r31,-8*18(r1)
> +     std     r4,-8*19(r1)
> +     std     r5,-8*20(r1)
> +     /* 168 bytes */
>       PPC_STOP
> -     li      r3,0  /* Since we didn't lose state, return 0 */
> -     std     r3, PACA_REQ_PSSCR(r13)
> -
> -     /*
> -      * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
> -      * it can determine if the wakeup reason is an HMI in
> -      * CHECK_HMI_INTERRUPT.
> -      *
> -      * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup
> -      * reason, so there is no point setting r12 to SRR1.
> -      *
> -      * Further, we clear r12 here, so that we don't accidentally enter the
> -      * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI.
> -      */
> -     li      r12, 0
> -     b       pnv_wakeup_noloss
> -
> -.Lhandle_esl_ec_set:
> -BEGIN_FTR_SECTION
> -     /*
> -      * POWER9 DD2.0 or earlier can incorrectly set PMAO when waking up after
> -      * a state-loss idle. Saving and restoring MMCR0 over idle is a
> -      * workaround.
> -      */
> -     mfspr   r4,SPRN_MMCR0
> -     std     r4,_MMCR0(r1)
> -END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
> -
> -/*
> - * Check if the requested state is a deep idle state.
> - */
> -     LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
> -     ld      r4,ADDROFF(pnv_first_deep_stop_state)(r5)
> -     cmpd    r3,r4
> -     bge     .Lhandle_deep_stop
> -     PPC_STOP        /* Does not return (system reset interrupt) */
> -
> -.Lhandle_deep_stop:
> -/*
> - * Entering deep idle state.
> - * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to
> - * stack and enter stop
> - */
> -     lbz     r7,PACA_THREAD_MASK(r13)
> -     ld      r14,PACA_CORE_IDLE_STATE_PTR(r13)
> -
> -lwarx_loop_stop:
> -     lwarx   r15,0,r14
> -     andis.  r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
> -     bnel-   core_idle_lock_held
> -     andc    r15,r15,r7                      /* Clear thread bit */
> -
> -     stwcx.  r15,0,r14
> -     bne-    lwarx_loop_stop
> -     isync
> -
> -     bl      save_sprs_to_stack
> -
> -     PPC_STOP        /* Does not return (system reset interrupt) */
> -
> -/*
> - * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
> - * r3 contains desired idle state (PNV_THREAD_NAP/SLEEP/WINKLE).
> - */
> -_GLOBAL(power7_idle_insn)
> -     /* Now check if user or arch enabled NAP mode */
> -     LOAD_REG_ADDR(r4, pnv_enter_arch207_idle_mode)
> -     b       pnv_powersave_common
> -
> -#define CHECK_HMI_INTERRUPT                                          \
> -BEGIN_FTR_SECTION_NESTED(66);                                                
> \
> -     rlwinm  r0,r12,45-31,0xf;  /* extract wake reason field (P8) */ \
> -FTR_SECTION_ELSE_NESTED(66);                                         \
> -     rlwinm  r0,r12,45-31,0xe;  /* P7 wake reason field is 3 bits */ \
> -ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66);             \
> -     cmpwi   r0,0xa;                 /* Hypervisor maintenance ? */  \
> -     bne+    20f;                                                    \
> -     /* Invoke opal call to handle hmi */                            \
> -     ld      r2,PACATOC(r13);                                        \
> -     ld      r1,PACAR1(r13);                                         \
> -     std     r3,ORIG_GPR3(r1);       /* Save original r3 */          \
> -     li      r3,0;                   /* NULL argument */             \
> -     bl      hmi_exception_realmode;                                 \
> -     nop;                                                            \
> -     ld      r3,ORIG_GPR3(r1);       /* Restore original r3 */       \
> -20:  nop;
> +     b       .       /* catch bugs */
> 
>  /*
> - * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
> - * r3 contains desired PSSCR register value.
> + * Desired return value in r3
>   *
> - * Offline (CPU unplug) case also must notify KVM that the CPU is
> - * idle.
> - */
> -_GLOBAL(power9_offline_stop)
> -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> -     /*
> -      * Tell KVM we're entering idle.
> -      * This does not have to be done in real mode because the P9 MMU
> -      * is independent per-thread. Some steppings share radix/hash mode
> -      * between threads, but in that case KVM has a barrier sync in real
> -      * mode before and after switching between radix and hash.
> -      */
> -     li      r4,KVM_HWTHREAD_IN_IDLE
> -     stb     r4,HSTATE_HWTHREAD_STATE(r13)
> -#endif
> -     /* fall through */
> -
> -_GLOBAL(power9_idle_stop)
> -     std     r3, PACA_REQ_PSSCR(r13)
> -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> -BEGIN_FTR_SECTION
> -     sync
> -     lwz     r5, PACA_DONT_STOP(r13)
> -     cmpwi   r5, 0
> -     bne     1f
> -END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
> -#endif
> -     mtspr   SPRN_PSSCR,r3
> -     LOAD_REG_ADDR(r4,power_enter_stop)
> -     b       pnv_powersave_common
> -     /* No return */
> -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> -1:
> -     /*
> -      * We get here when TM / thread reconfiguration bug workaround
> -      * code wants to get the CPU into SMT4 mode, and therefore
> -      * we are being asked not to stop.
> -      */
> -     li      r3, 0
> -     std     r3, PACA_REQ_PSSCR(r13)
> -     blr             /* return 0 for wakeup cause / SRR1 value */
> -#endif
> -
> -/*
> - * Called from machine check handler for powersave wakeups.
> - * Low level machine check processing has already been done. Now just
> - * go through the wake up path to get everything in order.
> + * The idle wakeup SRESET interrupt can call this after calling
> + * to return to the idle sleep function caller with r3 as the return code.
>   *
> - * r3 - The original SRR1 value.
> - * Original SRR[01] have been clobbered.
> - * MSR_RI is clear.
> + * This must not be used if idle was entered via a _noloss function (use
> + * a simple blr instead).
>   */
> -.global pnv_powersave_wakeup_mce
> -pnv_powersave_wakeup_mce:
> -     /* Set cr3 for pnv_powersave_wakeup */
> -     rlwinm  r11,r3,47-31,30,31
> -     cmpwi   cr3,r11,2
> -
> -     /*
> -      * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake
> -      * reason into r12, which allows reuse of the system reset wakeup
> -      * code without being mistaken for another type of wakeup.
> -      */
> -     oris    r12,r3,SRR1_WAKEMCE_RESVD@h
> -
> -     b       pnv_powersave_wakeup
> -
> -/*
> - * Called from reset vector for powersave wakeups.
> - * cr3 - set to gt if waking up with partial/complete hypervisor state loss
> - * r12 - SRR1
> - */
> -.global pnv_powersave_wakeup
> -pnv_powersave_wakeup:
> -     ld      r2, PACATOC(r13)
> -
> -BEGIN_FTR_SECTION
> -     bl      pnv_restore_hyp_resource_arch300
> -FTR_SECTION_ELSE
> -     bl      pnv_restore_hyp_resource_arch207
> -ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
> -
> -     li      r0,PNV_THREAD_RUNNING
> -     stb     r0,PACA_THREAD_IDLE_STATE(r13)  /* Clear thread state */
> -
> -     mr      r3,r12
> -
> +_GLOBAL(idle_return_gpr_loss)
>  #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
>       lbz     r0,HSTATE_HWTHREAD_STATE(r13)
>       cmpwi   r0,KVM_HWTHREAD_IN_KERNEL
> @@ -527,430 +106,98 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
>       beq     1f
>       b       kvm_start_guest
>  1:
> +     lbz     r0,PACA_NAPSTATELOST(r13)
> +     cmpwi   r0,0
> +     bne     2f
> +     bltlr   cr3
> +2:
>  #endif
> -
> -     /* Return SRR1 from power7_nap() */
> -     blt     cr3,pnv_wakeup_noloss
> -     b       pnv_wakeup_loss
> +     ld      r1,PACAR1(r13)
> +     ld      r4,-8*19(r1)
> +     ld      r5,-8*20(r1)
> +     mtlr    r4
> +     mtcr    r5
> +     ld      r2,-8*0(r1)
> +     ld      r14,-8*1(r1)
> +     ld      r15,-8*2(r1)
> +     ld      r16,-8*3(r1)
> +     ld      r17,-8*4(r1)
> +     ld      r18,-8*5(r1)
> +     ld      r19,-8*6(r1)
> +     ld      r20,-8*7(r1)
> +     ld      r21,-8*8(r1)
> +     ld      r22,-8*9(r1)
> +     ld      r23,-8*10(r1)
> +     ld      r24,-8*11(r1)
> +     ld      r25,-8*12(r1)
> +     ld      r26,-8*13(r1)
> +     ld      r27,-8*14(r1)
> +     ld      r28,-8*15(r1)
> +     ld      r29,-8*16(r1)
> +     ld      r30,-8*17(r1)
> +     ld      r31,-8*18(r1)
> +     blr
> 
>  /*
> - * Check whether we have woken up with hypervisor state loss.
> - * If yes, restore hypervisor state and return back to link.
> - *
> - * cr3 - set to gt if waking up with partial/complete hypervisor state loss
> + * This is the sequence required to execute idle instructions, as
> + * specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
>   */
> -pnv_restore_hyp_resource_arch300:
> -     /*
> -      * Workaround for POWER9, if we lost resources, the ERAT
> -      * might have been mixed up and needs flushing. We also need
> -      * to reload MMCR0 (see comment above). We also need to set
> -      * then clear bit 60 in MMCRA to ensure the PMU starts running.
> -      */
> -     blt     cr3,1f
> -BEGIN_FTR_SECTION
> -     PPC_INVALIDATE_ERAT
> -     ld      r1,PACAR1(r13)
> -     ld      r4,_MMCR0(r1)
> -     mtspr   SPRN_MMCR0,r4
> -END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
> -     mfspr   r4,SPRN_MMCRA
> -     ori     r4,r4,(1 << (63-60))
> -     mtspr   SPRN_MMCRA,r4
> -     xori    r4,r4,(1 << (63-60))
> -     mtspr   SPRN_MMCRA,r4
> -1:
> -     /*
> -      * POWER ISA 3. Use PSSCR to determine if we
> -      * are waking up from deep idle state
> -      */
> -     LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
> -     ld      r4,ADDROFF(pnv_first_deep_stop_state)(r5)
> -
> -     /*
> -      * 0-3 bits correspond to Power-Saving Level Status
> -      * which indicates the idle state we are waking up from
> -      */
> -     mfspr   r5, SPRN_PSSCR
> -     rldicl  r5,r5,4,60
> -     li      r0, 0           /* clear requested_psscr to say we're awake */
> -     std     r0, PACA_REQ_PSSCR(r13)
> -     cmpd    cr4,r5,r4
> -     bge     cr4,pnv_wakeup_tb_loss /* returns to caller */
> -
> -     blr     /* Waking up without hypervisor state loss. */
> -
> -/* Same calling convention as arch300 */
> -pnv_restore_hyp_resource_arch207:
> -     /*
> -      * POWER ISA 2.07 or less.
> -      * Check if we slept with sleep or winkle.
> -      */
> -     lbz     r4,PACA_THREAD_IDLE_STATE(r13)
> -     cmpwi   cr2,r4,PNV_THREAD_NAP
> -     bgt     cr2,pnv_wakeup_tb_loss  /* Either sleep or Winkle */
> -
> -     /*
> -      * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
> -      * up from nap. At this stage CR3 shouldn't contains 'gt' since that
> -      * indicates we are waking with hypervisor state loss from nap.
> -      */
> -     bgt     cr3,.
> -
> -     blr     /* Waking up without hypervisor state loss */
> +#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST)                        \
> +     /* Magic NAP/SLEEP/WINKLE mode enter sequence */        \
> +     std     r0,0(r1);                                       \
> +     ptesync;                                                \
> +     ld      r0,0(r1);                                       \
> +236: cmpd    cr0,r0,r0;                                      \
> +     bne     236b;                                           \
> +     IDLE_INST;                                              \
> +     b       .       /* catch bugs */
> 
>  /*
> - * Called if waking up from idle state which can cause either partial or
> - * complete hyp state loss.
> - * In POWER8, called if waking up from fastsleep or winkle
> - * In POWER9, called if waking up from stop state >= 
> pnv_first_deep_stop_state
> + * Desired instruction type in r3
>   *
> - * r13 - PACA
> - * cr3 - gt if waking up with partial/complete hypervisor state loss
> + * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
> + * The SRESET wakeup returns to this function's caller by calling
> + * idle_return_gpr_loss with r3 set to desired return value.
>   *
> - * If ISA300:
> - * cr4 - gt or eq if waking up from complete hypervisor state loss.
> + * A wakeup without GPR loss may alteratively be handled as in
> + * isa300_idle_stop_noloss and blr directly, as an optimisation.
>   *
> - * If ISA207:
> - * r4 - PACA_THREAD_IDLE_STATE
> - */
> -pnv_wakeup_tb_loss:
> -     ld      r1,PACAR1(r13)
> -     /*
> -      * Before entering any idle state, the NVGPRs are saved in the stack.
> -      * If there was a state loss, or PACA_NAPSTATELOST was set, then the
> -      * NVGPRs are restored. If we are here, it is likely that state is lost,
> -      * but not guaranteed -- neither ISA207 nor ISA300 tests to reach
> -      * here are the same as the test to restore NVGPRS:
> -      * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300,
> -      * and SRR1 test for restoring NVGPRs.
> -      *
> -      * We are about to clobber NVGPRs now, so set NAPSTATELOST to
> -      * guarantee they will always be restored. This might be tightened
> -      * with careful reading of specs (particularly for ISA300) but this
> -      * is already a slow wakeup path and it's simpler to be safe.
> -      */
> -     li      r0,1
> -     stb     r0,PACA_NAPSTATELOST(r13)
> -
> -     /*
> -      *
> -      * Save SRR1 and LR in NVGPRs as they might be clobbered in
> -      * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required
> -      * to determine the wakeup reason if we branch to kvm_start_guest. LR
> -      * is required to return back to reset vector after hypervisor state
> -      * restore is complete.
> -      */
> -     mr      r19,r12
> -     mr      r18,r4
> -     mflr    r17
> -BEGIN_FTR_SECTION
> -     CHECK_HMI_INTERRUPT
> -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> -
> -     ld      r14,PACA_CORE_IDLE_STATE_PTR(r13)
> -     lbz     r7,PACA_THREAD_MASK(r13)
> -
> -     /*
> -      * Take the core lock to synchronize against other threads.
> -      *
> -      * Lock bit is set in one of the 2 cases-
> -      * a. In the sleep/winkle enter path, the last thread is executing
> -      * fastsleep workaround code.
> -      * b. In the wake up path, another thread is executing fastsleep
> -      * workaround undo code or resyncing timebase or restoring context
> -      * In either case loop until the lock bit is cleared.
> -      */
> -1:
> -     lwarx   r15,0,r14
> -     andis.  r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
> -     bnel-   core_idle_lock_held
> -     oris    r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
> -     stwcx.  r15,0,r14
> -     bne-    1b
> -     isync
> -
> -     andi.   r9,r15,PNV_CORE_IDLE_THREAD_BITS
> -     cmpwi   cr2,r9,0
> -
> -     /*
> -      * At this stage
> -      * cr2 - eq if first thread to wakeup in core
> -      * cr3-  gt if waking up with partial/complete hypervisor state loss
> -      * ISA300:
> -      * cr4 - gt or eq if waking up from complete hypervisor state loss.
> -      */
> -
> -BEGIN_FTR_SECTION
> -     /*
> -      * Were we in winkle?
> -      * If yes, check if all threads were in winkle, decrement our
> -      * winkle count, set all thread winkle bits if all were in winkle.
> -      * Check if our thread has a winkle bit set, and set cr4 accordingly
> -      * (to match ISA300, above). Pseudo-code for core idle state
> -      * transitions for ISA207 is as follows (everything happens atomically
> -      * due to store conditional and/or lock bit):
> -      *
> -      * nap_idle() { }
> -      * nap_wake() { }
> -      *
> -      * sleep_idle()
> -      * {
> -      *      core_idle_state &= ~thread_in_core
> -      * }
> -      *
> -      * sleep_wake()
> -      * {
> -      *     bool first_in_core, first_in_subcore;
> -      *
> -      *     first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
> -      *     first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
> -      *
> -      *     core_idle_state |= thread_in_core;
> -      * }
> -      *
> -      * winkle_idle()
> -      * {
> -      *      core_idle_state &= ~thread_in_core;
> -      *      core_idle_state += 1 << WINKLE_COUNT_SHIFT;
> -      * }
> -      *
> -      * winkle_wake()
> -      * {
> -      *     bool first_in_core, first_in_subcore, winkle_state_lost;
> -      *
> -      *     first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
> -      *     first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
> -      *
> -      *     core_idle_state |= thread_in_core;
> -      *
> -      *     if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT))
> -      *         core_idle_state |= THREAD_WINKLE_BITS;
> -      *     core_idle_state -= 1 << WINKLE_COUNT_SHIFT;
> -      *
> -      *     winkle_state_lost = core_idle_state &
> -      *                              (thread_in_core << WINKLE_THREAD_SHIFT);
> -      *     core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT);
> -      * }
> -      *
> -      */
> -     cmpwi   r18,PNV_THREAD_WINKLE
> -     bne     2f
> -     andis.  r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h
> -     subis   r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h
> -     beq     2f
> -     ori     r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */
> -2:
> -     /* Shift thread bit to winkle mask, then test if this thread is set,
> -      * and remove it from the winkle bits */
> -     slwi    r8,r7,8
> -     and     r8,r8,r15
> -     andc    r15,r15,r8
> -     cmpwi   cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */
> -
> -     lbz     r4,PACA_SUBCORE_SIBLING_MASK(r13)
> -     and     r4,r4,r15
> -     cmpwi   r4,0    /* Check if first in subcore */
> -
> -     or      r15,r15,r7              /* Set thread bit */
> -     beq     first_thread_in_subcore
> -END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
> -
> -     or      r15,r15,r7              /* Set thread bit */
> -     beq     cr2,first_thread_in_core
> -
> -     /* Not first thread in core or subcore to wake up */
> -     b       clear_lock
> -
> -first_thread_in_subcore:
> -     /*
> -      * If waking up from sleep, subcore state is not lost. Hence
> -      * skip subcore state restore
> -      */
> -     blt     cr4,subcore_state_restored
> -
> -     /* Restore per-subcore state */
> -     ld      r4,_SDR1(r1)
> -     mtspr   SPRN_SDR1,r4
> -
> -     ld      r4,_RPR(r1)
> -     mtspr   SPRN_RPR,r4
> -     ld      r4,_AMOR(r1)
> -     mtspr   SPRN_AMOR,r4
> -
> -subcore_state_restored:
> -     /*
> -      * Check if the thread is also the first thread in the core. If not,
> -      * skip to clear_lock.
> -      */
> -     bne     cr2,clear_lock
> -
> -first_thread_in_core:
> -
> -     /*
> -      * First thread in the core waking up from any state which can cause
> -      * partial or complete hypervisor state loss. It needs to
> -      * call the fastsleep workaround code if the platform requires it.
> -      * Call it unconditionally here. The below branch instruction will
> -      * be patched out if the platform does not have fastsleep or does not
> -      * require the workaround. Patching will be performed during the
> -      * discovery of idle-states.
> -      */
> -.global pnv_fastsleep_workaround_at_exit
> -pnv_fastsleep_workaround_at_exit:
> -     b       fastsleep_workaround_at_exit
> -
> -timebase_resync:
> -     /*
> -      * Use cr3 which indicates that we are waking up with atleast partial
> -      * hypervisor state loss to determine if TIMEBASE RESYNC is needed.
> -      */
> -     ble     cr3,.Ltb_resynced
> -     /* Time base re-sync */
> -     bl      opal_resync_timebase;
> -     /*
> -      * If waking up from sleep (POWER8), per core state
> -      * is not lost, skip to clear_lock.
> -      */
> -.Ltb_resynced:
> -     blt     cr4,clear_lock
> -
> -     /*
> -      * First thread in the core to wake up and its waking up with
> -      * complete hypervisor state loss. Restore per core hypervisor
> -      * state.
> -      */
> -BEGIN_FTR_SECTION
> -     ld      r4,_PTCR(r1)
> -     mtspr   SPRN_PTCR,r4
> -     ld      r4,_RPR(r1)
> -     mtspr   SPRN_RPR,r4
> -     ld      r4,_AMOR(r1)
> -     mtspr   SPRN_AMOR,r4
> -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
> -
> -     ld      r4,_TSCR(r1)
> -     mtspr   SPRN_TSCR,r4
> -     ld      r4,_WORC(r1)
> -     mtspr   SPRN_WORC,r4
> -
> -clear_lock:
> -     xoris   r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
> -     lwsync
> -     stw     r15,0(r14)
> -
> -common_exit:
> -     /*
> -      * Common to all threads.
> -      *
> -      * If waking up from sleep, hypervisor state is not lost. Hence
> -      * skip hypervisor state restore.
> -      */
> -     blt     cr4,hypervisor_state_restored
> -
> -     /* Waking up from winkle */
> -
> -BEGIN_MMU_FTR_SECTION
> -     b       no_segments
> -END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
> -     /* Restore SLB  from PACA */
> -     ld      r8,PACA_SLBSHADOWPTR(r13)
> -
> -     .rept   SLB_NUM_BOLTED
> -     li      r3, SLBSHADOW_SAVEAREA
> -     LDX_BE  r5, r8, r3
> -     addi    r3, r3, 8
> -     LDX_BE  r6, r8, r3
> -     andis.  r7,r5,SLB_ESID_V@h
> -     beq     1f
> -     slbmte  r6,r5
> -1:   addi    r8,r8,16
> -     .endr
> -no_segments:
> -
> -     /* Restore per thread state */
> -
> -     ld      r4,_SPURR(r1)
> -     mtspr   SPRN_SPURR,r4
> -     ld      r4,_PURR(r1)
> -     mtspr   SPRN_PURR,r4
> -     ld      r4,_DSCR(r1)
> -     mtspr   SPRN_DSCR,r4
> -     ld      r4,_WORT(r1)
> -     mtspr   SPRN_WORT,r4
> -
> -     /* Call cur_cpu_spec->cpu_restore() */
> -     LOAD_REG_ADDR(r4, cur_cpu_spec)
> -     ld      r4,0(r4)
> -     ld      r12,CPU_SPEC_RESTORE(r4)
> -#ifdef PPC64_ELF_ABI_v1
> -     ld      r12,0(r12)
> -#endif
> -     mtctr   r12
> -     bctrl
> -
> -/*
> - * On POWER9, we can come here on wakeup from a cpuidle stop state.
> - * Hence restore the additional SPRs to the saved value.
> + * The caller is responsible for saving/restoring SPRs, MSR, timebase,
> + * etc.
>   *
> - * On POWER8, we come here only on winkle. Since winkle is used
> - * only in the case of CPU-Hotplug, we don't need to restore
> - * the additional SPRs.
> - */
> -BEGIN_FTR_SECTION
> -     bl      power9_restore_additional_sprs
> -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
> -hypervisor_state_restored:
> -
> -     mr      r12,r19
> -     mtlr    r17
> -     blr             /* return to pnv_powersave_wakeup */
> -
> -fastsleep_workaround_at_exit:
> -     li      r3,1
> -     li      r4,0
> -     bl      opal_config_cpu_idle_state
> -     b       timebase_resync
> -
> -/*
> - * R3 here contains the value that will be returned to the caller
> - * of power7_nap.
> - * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
> + * This must be called in real-mode (MSR_IDLE).
>   */
> -.global pnv_wakeup_loss
> -pnv_wakeup_loss:
> -     ld      r1,PACAR1(r13)
> -BEGIN_FTR_SECTION
> -     CHECK_HMI_INTERRUPT
> -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> -     REST_NVGPRS(r1)
> -     REST_GPR(2, r1)
> -     ld      r4,PACAKMSR(r13)
> -     ld      r5,_LINK(r1)
> -     ld      r6,_CCR(r1)
> -     addi    r1,r1,INT_FRAME_SIZE
> -     mtlr    r5
> -     mtcr    r6
> -     mtmsrd  r4
> -     blr
> +_GLOBAL(isa206_idle_insn_mayloss)
> +     std     r1,PACAR1(r13)
> +     mflr    r4
> +     mfcr    r5
> +     /* use stack red zone rather than a new frame for saving regs */
> +     std     r2,-8*0(r1)
> +     std     r14,-8*1(r1)
> +     std     r15,-8*2(r1)
> +     std     r16,-8*3(r1)
> +     std     r17,-8*4(r1)
> +     std     r18,-8*5(r1)
> +     std     r19,-8*6(r1)
> +     std     r20,-8*7(r1)
> +     std     r21,-8*8(r1)
> +     std     r22,-8*9(r1)
> +     std     r23,-8*10(r1)
> +     std     r24,-8*11(r1)
> +     std     r25,-8*12(r1)
> +     std     r26,-8*13(r1)
> +     std     r27,-8*14(r1)
> +     std     r28,-8*15(r1)
> +     std     r29,-8*16(r1)
> +     std     r30,-8*17(r1)
> +     std     r31,-8*18(r1)
> +     std     r4,-8*19(r1)
> +     std     r5,-8*20(r1)
> +     cmpwi   r3,PNV_THREAD_NAP
> +     bne     1f
> +     IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
> +1:   cmpwi   r3,PNV_THREAD_SLEEP
> +     bne     2f
> +     IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
> +2:   IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
> 
> -/*
> - * R3 here contains the value that will be returned to the caller
> - * of power7_nap.
> - * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
> - */
> -pnv_wakeup_noloss:
> -     lbz     r0,PACA_NAPSTATELOST(r13)
> -     cmpwi   r0,0
> -     bne     pnv_wakeup_loss
> -     ld      r1,PACAR1(r13)
> -BEGIN_FTR_SECTION
> -     CHECK_HMI_INTERRUPT
> -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
> -     ld      r4,PACAKMSR(r13)
> -     ld      r5,_NIP(r1)
> -     ld      r6,_CCR(r1)
> -     addi    r1,r1,INT_FRAME_SIZE
> -     mtlr    r5
> -     mtcr    r6
> -     mtmsrd  r4
> -     blr
> diff --git a/arch/powerpc/kernel/setup-common.c 
> b/arch/powerpc/kernel/setup-common.c
> index 2e5dfb6e0823..8b4858f82229 100644
> --- a/arch/powerpc/kernel/setup-common.c
> +++ b/arch/powerpc/kernel/setup-common.c
> @@ -401,8 +401,8 @@ void __init check_for_initrd(void)
> 
>  #ifdef CONFIG_SMP
> 
> -int threads_per_core, threads_per_subcore, threads_shift;
> -cpumask_t threads_core_mask;
> +int threads_per_core, threads_per_subcore, threads_shift __read_mostly;
> +cpumask_t threads_core_mask __read_mostly;
>  EXPORT_SYMBOL_GPL(threads_per_core);
>  EXPORT_SYMBOL_GPL(threads_per_subcore);
>  EXPORT_SYMBOL_GPL(threads_shift);
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
> b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 3a5e719ef032..58d0f1ba845d 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -448,8 +448,10 @@ kvm_no_guest:
>       rlwimi  r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
>       mtspr   SPRN_LPCR, r4
>       li      r3, 0
> -     mfspr   r12,SPRN_SRR1
> -     b       pnv_wakeup_loss
> +     /* set up cr3 and r3 for return */
> +     cmpdi   cr3, r3, 0
> +     mfspr   r3,SPRN_SRR1
> +     b       idle_return_gpr_loss
> 
>  53:  HMT_LOW
>       ld      r5, HSTATE_KVM_VCORE(r13)
> diff --git a/arch/powerpc/platforms/powernv/idle.c 
> b/arch/powerpc/platforms/powernv/idle.c
> index e52f9b06dd9c..6ea1543c2d6d 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -16,6 +16,7 @@
>  #include <linux/device.h>
>  #include <linux/cpu.h>
> 
> +#include <asm/asm-prototypes.h>
>  #include <asm/firmware.h>
>  #include <asm/machdep.h>
>  #include <asm/opal.h>
> @@ -48,10 +49,10 @@ static u64 pnv_default_stop_mask;
>  static bool default_stop_found;
> 
>  /*
> - * First deep stop state. Used to figure out when to save/restore
> - * hypervisor context.
> + * First stop state levels when SPR and TB loss can occur.
>   */
> -u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
> +static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
> +static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
> 
>  /*
>   * psscr value and mask of the deepest stop idle state.
> @@ -62,6 +63,8 @@ static u64 pnv_deepest_stop_psscr_mask;
>  static u64 pnv_deepest_stop_flag;
>  static bool deepest_stop_found;
> 
> +static unsigned long power7_offline_type;
> +
>  static int pnv_save_sprs_for_deep_states(void)
>  {
>       int cpu;
> @@ -72,12 +75,12 @@ static int pnv_save_sprs_for_deep_states(void)
>        * all cpus at boot. Get these reg values of current cpu and use the
>        * same across all cpus.
>        */
> -     uint64_t lpcr_val = mfspr(SPRN_LPCR);
> -     uint64_t hid0_val = mfspr(SPRN_HID0);
> -     uint64_t hid1_val = mfspr(SPRN_HID1);
> -     uint64_t hid4_val = mfspr(SPRN_HID4);
> -     uint64_t hid5_val = mfspr(SPRN_HID5);
> -     uint64_t hmeer_val = mfspr(SPRN_HMEER);
> +     uint64_t lpcr_val       = mfspr(SPRN_LPCR);
> +     uint64_t hid0_val       = mfspr(SPRN_HID0);
> +     uint64_t hid1_val       = mfspr(SPRN_HID1);
> +     uint64_t hid4_val       = mfspr(SPRN_HID4);
> +     uint64_t hid5_val       = mfspr(SPRN_HID5);
> +     uint64_t hmeer_val      = mfspr(SPRN_HMEER);
>       uint64_t msr_val = MSR_IDLE;
>       uint64_t psscr_val = pnv_deepest_stop_psscr_val;
> 
> @@ -137,89 +140,6 @@ static int pnv_save_sprs_for_deep_states(void)
>       return 0;
>  }
> 
> -static void pnv_alloc_idle_core_states(void)
> -{
> -     int i, j;
> -     int nr_cores = cpu_nr_cores();
> -     u32 *core_idle_state;
> -
> -     /*
> -      * core_idle_state - The lower 8 bits track the idle state of
> -      * each thread of the core.
> -      *
> -      * The most significant bit is the lock bit.
> -      *
> -      * Initially all the bits corresponding to threads_per_core
> -      * are set. They are cleared when the thread enters deep idle
> -      * state like sleep and winkle/stop.
> -      *
> -      * Initially the lock bit is cleared.  The lock bit has 2
> -      * purposes:
> -      *      a. While the first thread in the core waking up from
> -      *         idle is restoring core state, it prevents other
> -      *         threads in the core from switching to process
> -      *         context.
> -      *      b. While the last thread in the core is saving the
> -      *         core state, it prevents a different thread from
> -      *         waking up.
> -      */
> -     for (i = 0; i < nr_cores; i++) {
> -             int first_cpu = i * threads_per_core;
> -             int node = cpu_to_node(first_cpu);
> -             size_t paca_ptr_array_size;
> -
> -             core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
> -             *core_idle_state = (1 << threads_per_core) - 1;
> -             paca_ptr_array_size = (threads_per_core *
> -                                    sizeof(struct paca_struct *));
> -
> -             for (j = 0; j < threads_per_core; j++) {
> -                     int cpu = first_cpu + j;
> -
> -                     paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
> -                     paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
> -                     paca_ptrs[cpu]->thread_mask = 1 << j;
> -             }
> -     }
> -
> -     update_subcore_sibling_mask();
> -
> -     if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
> -             int rc = pnv_save_sprs_for_deep_states();
> -
> -             if (likely(!rc))
> -                     return;
> -
> -             /*
> -              * The stop-api is unable to restore hypervisor
> -              * resources on wakeup from platform idle states which
> -              * lose full context. So disable such states.
> -              */
> -             supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
> -             pr_warn("cpuidle-powernv: Disabling idle states that lose full 
> context\n");
> -             pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug 
> affected\n");
> -
> -             if (cpu_has_feature(CPU_FTR_ARCH_300) &&
> -                 (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
> -                     /*
> -                      * Use the default stop state for CPU-Hotplug
> -                      * if available.
> -                      */
> -                     if (default_stop_found) {
> -                             pnv_deepest_stop_psscr_val =
> -                                     pnv_default_stop_val;
> -                             pnv_deepest_stop_psscr_mask =
> -                                     pnv_default_stop_mask;
> -                             pr_warn("cpuidle-powernv: Offlined CPUs will 
> stop with psscr = 0x%016llx\n",
> -                                     pnv_deepest_stop_psscr_val);
> -                     } else { /* Fallback to snooze loop for CPU-Hotplug */
> -                             deepest_stop_found = false;
> -                             pr_warn("cpuidle-powernv: Offlined CPUs will 
> busy wait\n");
> -                     }
> -             }
> -     }
> -}
> -
>  u32 pnv_get_supported_cpuidle_states(void)
>  {
>       return supported_cpuidle_states;
> @@ -238,6 +158,9 @@ static void pnv_fastsleep_workaround_apply(void *info)
>               *err = 1;
>  }
> 
> +static bool power7_fastsleep_workaround_entry = true;
> +static bool power7_fastsleep_workaround_exit = true;
> +
>  /*
>   * Used to store fastsleep workaround state
>   * 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
> @@ -269,21 +192,15 @@ static ssize_t 
> store_fastsleep_workaround_applyonce(struct device *dev,
>        * fastsleep_workaround_applyonce = 1 implies
>        * fastsleep workaround needs to be left in 'applied' state on all
>        * the cores. Do this by-
> -      * 1. Patching out the call to 'undo' workaround in fastsleep exit path
> -      * 2. Sending ipi to all the cores which have at least one online thread
> -      * 3. Patching out the call to 'apply' workaround in fastsleep entry
> -      * path
> +      * 1. Disable the 'undo' workaround in fastsleep exit path
> +      * 2. Sendi IPIs to all the cores which have at least one online thread
> +      * 3. Disable the 'apply' workaround in fastsleep entry path
> +      *
>        * There is no need to send ipi to cores which have all threads
>        * offlined, as last thread of the core entering fastsleep or deeper
>        * state would have applied workaround.
>        */
> -     err = patch_instruction(
> -             (unsigned int *)pnv_fastsleep_workaround_at_exit,
> -             PPC_INST_NOP);
> -     if (err) {
> -             pr_err("fastsleep_workaround_applyonce change failed while 
> patching pnv_fastsleep_workaround_at_exit");
> -             goto fail;
> -     }
> +     power7_fastsleep_workaround_exit = false;
> 
>       get_online_cpus();
>       primary_thread_mask = cpu_online_cores_map();
> @@ -296,13 +213,7 @@ static ssize_t 
> store_fastsleep_workaround_applyonce(struct device *dev,
>               goto fail;
>       }
> 
> -     err = patch_instruction(
> -             (unsigned int *)pnv_fastsleep_workaround_at_entry,
> -             PPC_INST_NOP);
> -     if (err) {
> -             pr_err("fastsleep_workaround_applyonce change failed while 
> patching pnv_fastsleep_workaround_at_entry");
> -             goto fail;
> -     }
> +     power7_fastsleep_workaround_entry = false;
> 
>       fastsleep_workaround_applyonce = 1;
> 
> @@ -315,6 +226,301 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
>                       show_fastsleep_workaround_applyonce,
>                       store_fastsleep_workaround_applyonce);
> 
> +static inline void atomic_start_thread_idle(void)
> +{
> +     int cpu = raw_smp_processor_id();
> +     int first = cpu_first_thread_sibling(cpu);
> +     int thread_nr = cpu_thread_in_core(cpu);
> +     unsigned long *state = &paca_ptrs[first]->idle_state;
> +
> +     clear_bit(thread_nr, state);
> +}
> +
> +static inline void atomic_stop_thread_idle(void)
> +{
> +     int cpu = raw_smp_processor_id();
> +     int first = cpu_first_thread_sibling(cpu);
> +     int thread_nr = cpu_thread_in_core(cpu);
> +     unsigned long *state = &paca_ptrs[first]->idle_state;
> +
> +     set_bit(thread_nr, state);
> +}
> +
> +static inline void atomic_lock_thread_idle(void)
> +{
> +     int cpu = raw_smp_processor_id();
> +     int first = cpu_first_thread_sibling(cpu);
> +     unsigned long *state = &paca_ptrs[first]->idle_state;
> +
> +     while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, 
> state)))
> +             barrier();
> +}
> +
> +static inline void atomic_unlock_and_stop_thread_idle(void)
> +{
> +     int cpu = raw_smp_processor_id();
> +     int first = cpu_first_thread_sibling(cpu);
> +     unsigned long thread = 1UL << cpu_thread_in_core(cpu);
> +     unsigned long *state = &paca_ptrs[first]->idle_state;
> +     u64 s = READ_ONCE(*state);
> +     u64 new, tmp;
> +
> +     BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT));
> +     BUG_ON(s & thread);
> +
> +again:
> +     new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT;
> +     tmp = cmpxchg(state, s, new);
> +     if (unlikely(tmp != s)) {
> +             s = tmp;
> +             goto again;
> +     }
> +}
> +
> +static inline void atomic_unlock_thread_idle(void)
> +{
> +     int cpu = raw_smp_processor_id();
> +     int first = cpu_first_thread_sibling(cpu);
> +     unsigned long *state = &paca_ptrs[first]->idle_state;
> +
> +     BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state));
> +     clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state);
> +}
> +
> +/* P7 and P8 */
> +struct p7_sprs {
> +     /* per core */
> +     u64 tscr;
> +     u64 worc;
> +
> +     /* per subcore */
> +     u64 sdr1;
> +     u64 rpr;
> +     u64 amor;
> +
> +     /* per thread */
> +     u64 lpcr;
> +     u64 hfscr;
> +     u64 fscr;
> +     u64 purr;
> +     u64 spurr;
> +     u64 dscr;
> +     u64 wort;
> +};
> +
> +static unsigned long power7_idle_insn(unsigned long type)
> +{
> +     int cpu = raw_smp_processor_id();
> +     int first = cpu_first_thread_sibling(cpu);
> +     unsigned long *state = &paca_ptrs[first]->idle_state;
> +     unsigned long thread = 1UL << cpu_thread_in_core(cpu);
> +     unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
> +     unsigned long srr1;
> +     bool full_winkle;
> +     struct p7_sprs sprs;
> +     bool sprs_saved = false;
> +     int rc;
> +
> +     memset(&sprs, 0, sizeof(sprs));
> +
> +     if (unlikely(type != PNV_THREAD_NAP)) {
> +             atomic_lock_thread_idle();
> +
> +             BUG_ON(!(*state & thread));
> +             *state &= ~thread;
> +
> +             if (power7_fastsleep_workaround_entry) {
> +                     if ((*state & core_thread_mask) == 0) {
> +                             rc = opal_config_cpu_idle_state(
> +                                             OPAL_CONFIG_IDLE_FASTSLEEP,
> +                                             OPAL_CONFIG_IDLE_APPLY);
> +                             BUG_ON(rc);
> +                     }
> +             }
> +
> +             if (type == PNV_THREAD_WINKLE) {
> +                     sprs.tscr       = mfspr(SPRN_TSCR);
> +                     sprs.worc       = mfspr(SPRN_WORC);
> +
> +                     sprs.sdr1       = mfspr(SPRN_SDR1);
> +                     sprs.rpr        = mfspr(SPRN_RPR);
> +                     sprs.amor       = mfspr(SPRN_AMOR);
> +
> +                     sprs.lpcr       = mfspr(SPRN_LPCR);
> +                     if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
> +                             sprs.hfscr      = mfspr(SPRN_HFSCR);
> +                             sprs.fscr       = mfspr(SPRN_FSCR);
> +                     }
> +                     sprs.purr       = mfspr(SPRN_PURR);
> +                     sprs.spurr      = mfspr(SPRN_SPURR);
> +                     sprs.dscr       = mfspr(SPRN_DSCR);
> +                     sprs.wort       = mfspr(SPRN_WORT);
> +
> +                     sprs_saved = true;
> +
> +                     /*
> +                      * Increment winkle counter and set all winkle bits if
> +                      * all threads are winkling. This allows wakeup side to
> +                      * distinguish between fast sleep and winkle state
> +                      * loss. Fast sleep still has to resync the timebase so
> +                      * this may not be a really big win.
> +                      */
> +                     *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
> +                     if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS)
> +                                     >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT
> +                                     == threads_per_core)
> +                             *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS;
> +                     WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 
> 0);
> +             }
> +
> +             atomic_unlock_thread_idle();
> +     }
> +
> +     local_paca->thread_idle_state = type;
> +     srr1 = isa206_idle_insn_mayloss(type);          /* go idle */
> +     local_paca->thread_idle_state = PNV_THREAD_RUNNING;
> +
> +     WARN_ON_ONCE(!srr1);
> +     WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
> +
> +     if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
> +             hmi_exception_realmode(NULL);
> +
> +     if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) {
> +             if (unlikely(type != PNV_THREAD_NAP)) {
> +                     atomic_lock_thread_idle();
> +                     if (type == PNV_THREAD_WINKLE) {
> +                             WARN_ON((*state & 
> PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
> +                             *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
> +                             *state &= ~(thread << 
> PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
> +                     }
> +                     atomic_unlock_and_stop_thread_idle();
> +             }
> +             return srr1;
> +     }
> +
> +     /* HV state loss */
> +     BUG_ON(type == PNV_THREAD_NAP);
> +
> +     atomic_lock_thread_idle();
> +
> +     full_winkle = false;
> +     if (type == PNV_THREAD_WINKLE) {
> +             WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
> +             *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
> +             if (*state & (thread << 
> PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) {
> +                     *state &= ~(thread << 
> PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
> +                     full_winkle = true;
> +                     BUG_ON(!sprs_saved);
> +             }
> +     }
> +
> +     WARN_ON(*state & thread);
> +
> +     if ((*state & core_thread_mask) != 0)
> +             goto core_woken;
> +
> +     /* Per-core SPRs */
> +     if (full_winkle) {
> +             mtspr(SPRN_TSCR,        sprs.tscr);
> +             mtspr(SPRN_WORC,        sprs.worc);
> +     }
> +
> +     if (power7_fastsleep_workaround_exit) {
> +             rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
> +                                             OPAL_CONFIG_IDLE_UNDO);
> +             BUG_ON(rc);
> +     }
> +
> +     /* TB */
> +     if (opal_resync_timebase() != OPAL_SUCCESS)
> +             BUG();
> +
> +core_woken:
> +     if (!full_winkle)
> +             goto subcore_woken;
> +
> +     if ((*state & local_paca->subcore_sibling_mask) != 0)
> +             goto subcore_woken;
> +
> +     /* Per-subcore SPRs */
> +     mtspr(SPRN_SDR1,        sprs.sdr1);
> +     mtspr(SPRN_RPR,         sprs.rpr);
> +     mtspr(SPRN_AMOR,        sprs.amor);
> +
> +subcore_woken:
> +     /*
> +      * isync after restoring shared SPRs and before unlocking. Unlock
> +      * only contains hwsync which does not necessarily do the right
> +      * thing for SPRs.
> +      */
> +     isync();
> +     atomic_unlock_and_stop_thread_idle();
> +
> +     /* Fast sleep does not lose SPRs */
> +     if (!full_winkle)
> +             return srr1;
> +
> +     /* Per-thread SPRs */
> +     mtspr(SPRN_LPCR,        sprs.lpcr);
> +     if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
> +             mtspr(SPRN_HFSCR,       sprs.hfscr);
> +             mtspr(SPRN_FSCR,        sprs.fscr);
> +     }
> +     mtspr(SPRN_PURR,        sprs.purr);
> +     mtspr(SPRN_SPURR,       sprs.spurr);
> +     mtspr(SPRN_DSCR,        sprs.dscr);
> +     mtspr(SPRN_WORT,        sprs.wort);
> +
> +     mtspr(SPRN_SPRG3,       local_paca->sprg_vdso);
> +
> +     /*
> +      * The SLB has to be restored here, but it sometimes still
> +      * contains entries, so the __ variant must be used to prevent
> +      * multi hits.
> +      */
> +     __slb_restore_bolted_realmode();
> +
> +     return srr1;
> +}
> +
> +#ifdef CONFIG_HOTPLUG_CPU
> +static unsigned long power7_offline(void)
> +{
> +     unsigned long srr1;
> +
> +     mtmsr(MSR_IDLE);
> +
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +     /* Tell KVM we're entering idle. */
> +     /******************************************************/
> +     /*  N O T E   W E L L    ! ! !    N O T E   W E L L   */
> +     /* The following store to HSTATE_HWTHREAD_STATE(r13)  */
> +     /* MUST occur in real mode, i.e. with the MMU off,    */
> +     /* and the MMU must stay off until we clear this flag */
> +     /* and test HSTATE_HWTHREAD_REQ(r13) in               */
> +     /* pnv_powersave_wakeup in this file.                 */
> +     /* The reason is that another thread can switch the   */
> +     /* MMU to a guest context whenever this flag is set   */
> +     /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on,    */
> +     /* that would potentially cause this thread to start  */
> +     /* executing instructions from guest memory in        */
> +     /* hypervisor mode, leading to a host crash or data   */
> +     /* corruption, or worse.                              */
> +     /******************************************************/
> +     local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
> +#endif
> +
> +     __ppc64_runlatch_off();
> +     srr1 = power7_idle_insn(power7_offline_type);
> +     __ppc64_runlatch_on();
> +
> +     mtmsr(MSR_KERNEL);
> +
> +     return srr1;
> +}
> +#endif
> +
>  static unsigned long __power7_idle_type(unsigned long type)
>  {
>       unsigned long srr1;
> @@ -322,9 +528,11 @@ static unsigned long __power7_idle_type(unsigned long 
> type)
>       if (!prep_irq_for_idle_irqsoff())
>               return 0;
> 
> +     mtmsr(MSR_IDLE);
>       __ppc64_runlatch_off();
>       srr1 = power7_idle_insn(type);
>       __ppc64_runlatch_on();
> +     mtmsr(MSR_KERNEL);
> 
>       fini_irq_for_idle_irqsoff();
> 
> @@ -347,6 +555,256 @@ void power7_idle(void)
>       power7_idle_type(PNV_THREAD_NAP);
>  }
> 
> +struct p9_sprs {
> +     /* per core */
> +     u64 ptcr;
> +     u64 rpr;
> +     u64 tscr;
> +     u64 ldbar;
> +     u64 amor;
> +
> +     /* per thread */
> +     u64 lpcr;
> +     u64 hfscr;
> +     u64 fscr;
> +     u64 pid;
> +     u64 purr;
> +     u64 spurr;
> +     u64 dscr;
> +     u64 wort;
> +
> +     u64 mmcra;
> +     u32 mmcr0;
> +     u32 mmcr1;
> +     u64 mmcr2;
> +};
> +
> +static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
> +{
> +     int cpu = raw_smp_processor_id();
> +     int first = cpu_first_thread_sibling(cpu);
> +     unsigned long *state = &paca_ptrs[first]->idle_state;
> +     unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
> +     unsigned long srr1;
> +     unsigned long pls;
> +     unsigned long mmcr0 = 0;
> +     struct p9_sprs sprs;
> +     bool sprs_saved = false;
> +
> +     /* This should not be required but GCC warns about used uninitialized */
> +     memset(&sprs, 0, sizeof(sprs));
> +
> +     if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
> +             /* EC=ESL=0 case */
> +
> +             BUG_ON(!mmu_on);
> +
> +             /*
> +              * Wake synchronously. SRESET via xscom may still cause
> +              * a 0x100 powersave wakeup with SRR1 reason!
> +              */
> +             srr1 = isa300_idle_stop_noloss(psscr);          /* go idle */
> +             if (likely(!srr1))
> +                     return 0;
> +
> +             /*
> +              * Registers not saved, can't recover!
> +              * This would be a hardware bug
> +              */
> +             BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
> +
> +             goto out;
> +     }
> +
> +     /* EC=ESL=1 case */
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +     if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) {
> +             local_paca->requested_psscr = psscr;
> +             /* order setting requested_psscr vs testing dont_stop */
> +             smp_mb();
> +             if (atomic_read(&local_paca->dont_stop)) {
> +                     local_paca->requested_psscr = 0;
> +                     return 0;
> +             }
> +     }
> +#endif
> +
> +     if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
> +              /*
> +               * POWER9 DD2 can incorrectly set PMAO when waking up
> +               * after a state-loss idle. Saving and restoring MMCR0
> +               * over idle is a workaround.
> +               */
> +             mmcr0           = mfspr(SPRN_MMCR0);
> +     }
> +     if ((psscr & PSSCR_RL_MASK) >= pnv_first_spr_loss_level) {
> +             sprs.lpcr       = mfspr(SPRN_LPCR);
> +             sprs.hfscr      = mfspr(SPRN_HFSCR);
> +             sprs.fscr       = mfspr(SPRN_FSCR);
> +             sprs.pid        = mfspr(SPRN_PID);
> +             sprs.purr       = mfspr(SPRN_PURR);
> +             sprs.spurr      = mfspr(SPRN_SPURR);
> +             sprs.dscr       = mfspr(SPRN_DSCR);
> +             sprs.wort       = mfspr(SPRN_WORT);
> +
> +             sprs.mmcra      = mfspr(SPRN_MMCRA);
> +             sprs.mmcr0      = mfspr(SPRN_MMCR0);
> +             sprs.mmcr1      = mfspr(SPRN_MMCR1);
> +             sprs.mmcr2      = mfspr(SPRN_MMCR2);
> +
> +             sprs.ptcr       = mfspr(SPRN_PTCR);
> +             sprs.rpr        = mfspr(SPRN_RPR);
> +             sprs.tscr       = mfspr(SPRN_TSCR);
> +             sprs.ldbar      = mfspr(SPRN_LDBAR);
> +             sprs.amor       = mfspr(SPRN_AMOR);
> +
> +             sprs_saved = true;
> +
> +             atomic_start_thread_idle();
> +     }
> +
> +     srr1 = isa300_idle_stop_mayloss(psscr);         /* go idle */
> +
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +     local_paca->requested_psscr = 0;
> +#endif
> +
> +     psscr = mfspr(SPRN_PSSCR);
> +
> +     WARN_ON_ONCE(!srr1);
> +     WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
> +
> +     /* Workarounds for SMT thread switch problems */
> +     if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
> +             unsigned long mmcra;
> +
> +             /*
> +              * Workaround for POWER9 DD2.0, if we lost resources, the ERAT
> +              * might have been corrupted and needs flushing. We also need
> +              * to reload MMCR0 (see mmcr0 comment above).
> +              */
> +             if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
> +                     asm volatile(PPC_INVALIDATE_ERAT);
> +                     mtspr(SPRN_MMCR0, mmcr0);
> +             }
> +
> +             /*
> +              * DD2.2 and earlier need to set then clear bit 60 in MMCRA
> +              * to ensure the PMU starts running.
> +              */
> +             mmcra = mfspr(SPRN_MMCRA);
> +             mmcra |= PPC_BIT(60);
> +             mtspr(SPRN_MMCRA, mmcra);
> +             mmcra &= ~PPC_BIT(60);
> +             mtspr(SPRN_MMCRA, mmcra);
> +     }
> +
> +     if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
> +             hmi_exception_realmode(NULL);
> +
> +     /*
> +      * On POWER9, SRR1 bits do not match exactly as expected.
> +      * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
> +      * just always test PSSCR for SPR/TB state loss.
> +      */
> +     pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
> +     if (likely(pls < pnv_first_spr_loss_level)) {
> +             if (sprs_saved)
> +                     atomic_stop_thread_idle();
> +             goto out;
> +     }
> +
> +     /* HV state loss */
> +     BUG_ON(!sprs_saved);
> +
> +     atomic_lock_thread_idle();
> +
> +     if ((*state & core_thread_mask) != 0)
> +             goto core_woken;
> +
> +     /* Per-core SPRs */
> +     mtspr(SPRN_PTCR,        sprs.ptcr);
> +     mtspr(SPRN_RPR,         sprs.rpr);
> +     mtspr(SPRN_TSCR,        sprs.tscr);
> +     mtspr(SPRN_LDBAR,       sprs.ldbar);
> +     mtspr(SPRN_AMOR,        sprs.amor);
> +
> +     if (pls >= pnv_first_tb_loss_level) {
> +             /* TB loss */
> +             if (opal_resync_timebase() != OPAL_SUCCESS)
> +                     BUG();
> +     }
> +
> +     /*
> +      * isync after restoring shared SPRs and before unlocking. Unlock
> +      * only contains hwsync which does not necessarily do the right
> +      * thing for SPRs.
> +      */
> +     isync();
> +
> +core_woken:
> +     atomic_unlock_and_stop_thread_idle();
> +
> +     /* Per-thread SPRs */
> +     mtspr(SPRN_LPCR,        sprs.lpcr);
> +     mtspr(SPRN_HFSCR,       sprs.hfscr);
> +     mtspr(SPRN_FSCR,        sprs.fscr);
> +     mtspr(SPRN_PID,         sprs.pid);
> +     mtspr(SPRN_PURR,        sprs.purr);
> +     mtspr(SPRN_SPURR,       sprs.spurr);
> +     mtspr(SPRN_DSCR,        sprs.dscr);
> +     mtspr(SPRN_WORT,        sprs.wort);
> +
> +     mtspr(SPRN_MMCRA,       sprs.mmcra);
> +     mtspr(SPRN_MMCR0,       sprs.mmcr0);
> +     mtspr(SPRN_MMCR1,       sprs.mmcr1);
> +     mtspr(SPRN_MMCR2,       sprs.mmcr2);
> +
> +     mtspr(SPRN_SPRG3,       local_paca->sprg_vdso);
> +
> +     if (!radix_enabled())
> +             __slb_restore_bolted_realmode();
> +
> +out:
> +     if (mmu_on)
> +             mtmsr(MSR_KERNEL);
> +
> +     return srr1;
> +}
> +
> +#ifdef CONFIG_HOTPLUG_CPU
> +static unsigned long power9_offline_stop(unsigned long psscr)
> +{
> +     unsigned long srr1;
> +
> +#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +     __ppc64_runlatch_off();
> +     srr1 = power9_idle_stop(psscr, true);
> +     __ppc64_runlatch_on();
> +#else
> +     /*
> +      * Tell KVM we're entering idle.
> +      * This does not have to be done in real mode because the P9 MMU
> +      * is independent per-thread. Some steppings share radix/hash mode
> +      * between threads, but in that case KVM has a barrier sync in real
> +      * mode before and after switching between radix and hash.
> +      *
> +      * kvm_start_guest must still be called in real mode though, hence
> +      * the false argument.
> +      */
> +     local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
> +
> +     __ppc64_runlatch_off();
> +     srr1 = power9_idle_stop(psscr, false);
> +     __ppc64_runlatch_on();
> +
> +     mtmsr(MSR_KERNEL);
> +#endif
> +
> +     return srr1;
> +}
> +#endif
> +
>  static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
>                                     unsigned long stop_psscr_mask)
>  {
> @@ -360,7 +818,7 @@ static unsigned long __power9_idle_type(unsigned long 
> stop_psscr_val,
>       psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
> 
>       __ppc64_runlatch_off();
> -     srr1 = power9_idle_stop(psscr);
> +     srr1 = power9_idle_stop(psscr, true);
>       __ppc64_runlatch_on();
> 
>       fini_irq_for_idle_irqsoff();
> @@ -409,7 +867,7 @@ void pnv_power9_force_smt4_catch(void)
>                       atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
>       }
>       /* order setting dont_stop vs testing requested_psscr */
> -     mb();
> +     smp_mb();
>       for (thr = 0; thr < threads_per_core; ++thr) {
>               if (!paca_ptrs[cpu0+thr]->requested_psscr)
>                       ++awake_threads;
> @@ -481,7 +939,6 @@ void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 
> lpcr_val)
>  unsigned long pnv_cpu_offline(unsigned int cpu)
>  {
>       unsigned long srr1;
> -     u32 idle_states = pnv_get_supported_cpuidle_states();
> 
>       __ppc64_runlatch_off();
> 
> @@ -492,15 +949,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
>               psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
>                                               pnv_deepest_stop_psscr_val;
>               srr1 = power9_offline_stop(psscr);
> -
> -     } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
> -                (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
> -             srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
> -     } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
> -                (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
> -             srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
> -     } else if (idle_states & OPAL_PM_NAP_ENABLED) {
> -             srr1 = power7_idle_insn(PNV_THREAD_NAP);
> +     } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
> +             srr1 = power7_offline();
>       } else {
>               /* This is the fallback method. We emulate snooze */
>               while (!generic_check_cpu_restart(cpu)) {
> @@ -596,33 +1046,44 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 
> *psscr_mask, u32 flags)
>   * @dt_idle_states: Number of idle state entries
>   * Returns 0 on success
>   */
> -static int __init pnv_power9_idle_init(void)
> +static void __init pnv_power9_idle_init(void)
>  {
>       u64 max_residency_ns = 0;
>       int i;
> 
>       /*
> -      * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
> -      * and the pnv_default_stop_{val,mask}.
> -      *
> -      * pnv_first_deep_stop_state should be set to the first stop
> -      * level to cause hypervisor state loss.
> -      *
>        * pnv_deepest_stop_{val,mask} should be set to values corresponding to
>        * the deepest stop state.
>        *
>        * pnv_default_stop_{val,mask} should be set to values corresponding to
> -      * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state.
> +      * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state.
>        */
> -     pnv_first_deep_stop_state = MAX_STOP_STATE;
> +     pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
> +     pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
>       for (i = 0; i < nr_pnv_idle_states; i++) {
>               int err;
>               struct pnv_idle_states_t *state = &pnv_idle_states[i];
>               u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
> 
> +             if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
> +                  (pnv_first_tb_loss_level > psscr_rl))
> +                     pnv_first_tb_loss_level = psscr_rl;
> +
>               if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
> -                 pnv_first_deep_stop_state > psscr_rl)
> -                     pnv_first_deep_stop_state = psscr_rl;
> +                  (pnv_first_spr_loss_level > psscr_rl))
> +                     pnv_first_spr_loss_level = psscr_rl;
> +
> +             /*
> +              * The idle code does not deal with TB loss occurring
> +              * in a shallower state than SPR loss, so force it to
> +              * behave like SPRs are lost if TB is lost. POWER9 would
> +              * never encouter this, but a POWER8 core would if it
> +              * implemented the stop instruction. So this is for forward
> +              * compatibility.
> +              */
> +             if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
> +                  (pnv_first_spr_loss_level > psscr_rl))
> +                     pnv_first_spr_loss_level = psscr_rl;
> 
>               err = validate_psscr_val_mask(&state->psscr_val,
>                                             &state->psscr_mask,
> @@ -647,6 +1108,7 @@ static int __init pnv_power9_idle_init(void)
>                       pnv_default_stop_val = state->psscr_val;
>                       pnv_default_stop_mask = state->psscr_mask;
>                       default_stop_found = true;
> +                     WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT);
>               }
>       }
> 
> @@ -666,10 +1128,40 @@ static int __init pnv_power9_idle_init(void)
>                       pnv_deepest_stop_psscr_mask);
>       }
> 
> -     pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop 
> = 0x%llx\n",
> -             pnv_first_deep_stop_state);
> +     pr_info("cpuidle-powernv: First stop level that may lose SPRs = 
> 0x%lld\n",
> +             pnv_first_spr_loss_level);
> 
> -     return 0;
> +     pr_info("cpuidle-powernv: First stop level that may lose timebase = 
> 0x%lld\n",
> +             pnv_first_tb_loss_level);
> +}
> +
> +static void __init pnv_disable_deep_states(void)
> +{
> +     /*
> +      * The stop-api is unable to restore hypervisor
> +      * resources on wakeup from platform idle states which
> +      * lose full context. So disable such states.
> +      */
> +     supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
> +     pr_warn("cpuidle-powernv: Disabling idle states that lose full 
> context\n");
> +     pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
> +
> +     if (cpu_has_feature(CPU_FTR_ARCH_300) &&
> +         (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
> +             /*
> +              * Use the default stop state for CPU-Hotplug
> +              * if available.
> +              */
> +             if (default_stop_found) {
> +                     pnv_deepest_stop_psscr_val = pnv_default_stop_val;
> +                     pnv_deepest_stop_psscr_mask = pnv_default_stop_mask;
> +                     pr_warn("cpuidle-powernv: Offlined CPUs will stop with 
> psscr = 0x%016llx\n",
> +                             pnv_deepest_stop_psscr_val);
> +             } else { /* Fallback to snooze loop for CPU-Hotplug */
> +                     deepest_stop_found = false;
> +                     pr_warn("cpuidle-powernv: Offlined CPUs will busy 
> wait\n");
> +             }
> +     }
>  }
> 
>  /*
> @@ -684,10 +1176,8 @@ static void __init pnv_probe_idle_states(void)
>               return;
>       }
> 
> -     if (cpu_has_feature(CPU_FTR_ARCH_300)) {
> -             if (pnv_power9_idle_init())
> -                     return;
> -     }
> +     if (cpu_has_feature(CPU_FTR_ARCH_300))
> +             pnv_power9_idle_init();
> 
>       for (i = 0; i < nr_pnv_idle_states; i++)
>               supported_cpuidle_states |= pnv_idle_states[i].flags;
> @@ -807,11 +1297,33 @@ static int pnv_parse_cpuidle_dt(void)
> 
>  static int __init pnv_init_idle_states(void)
>  {
> +     int cpu;
>       int rc = 0;
> -     supported_cpuidle_states = 0;
> +
> +     /* Set up PACA fields */
> +     for_each_present_cpu(cpu) {
> +             struct paca_struct *p = paca_ptrs[cpu];
> +
> +             p->idle_state = 0;
> +             if (cpu == cpu_first_thread_sibling(cpu))
> +                     p->idle_state = (1 << threads_per_core) - 1;
> +
> +             if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
> +                     /* P7/P8 nap */
> +                     p->thread_idle_state = PNV_THREAD_RUNNING;
> +             } else {
> +                     /* P9 stop */
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +                     p->requested_psscr = 0;
> +                     atomic_set(&p->dont_stop, 0);
> +#endif
> +             }
> +     }
> 
>       /* In case we error out nr_pnv_idle_states will be zero */
>       nr_pnv_idle_states = 0;
> +     supported_cpuidle_states = 0;
> +
>       if (cpuidle_disable != IDLE_NO_OVERRIDE)
>               goto out;
>       rc = pnv_parse_cpuidle_dt();
> @@ -819,27 +1331,40 @@ static int __init pnv_init_idle_states(void)
>               return rc;
>       pnv_probe_idle_states();
> 
> -     if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
> -             patch_instruction(
> -                     (unsigned int *)pnv_fastsleep_workaround_at_entry,
> -                     PPC_INST_NOP);
> -             patch_instruction(
> -                     (unsigned int *)pnv_fastsleep_workaround_at_exit,
> -                     PPC_INST_NOP);
> -     } else {
> -             /*
> -              * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
> -              * workaround is needed to use fastsleep. Provide sysfs
> -              * control to choose how this workaround has to be applied.
> -              */
> -             device_create_file(cpu_subsys.dev_root,
> +     if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
> +             if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
> +                     power7_fastsleep_workaround_entry = false;
> +                     power7_fastsleep_workaround_exit = false;
> +             } else {
> +                     /*
> +                      * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
> +                      * workaround is needed to use fastsleep. Provide sysfs
> +                      * control to choose how this workaround has to be
> +                      * applied.
> +                      */
> +                     device_create_file(cpu_subsys.dev_root,
>                               &dev_attr_fastsleep_workaround_applyonce);
> -     }
> +             }
> +
> +             update_subcore_sibling_mask();
> 
> -     pnv_alloc_idle_core_states();
> +             if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) {
> +                     ppc_md.power_save = power7_idle;
> +                     power7_offline_type = PNV_THREAD_NAP;
> +             }
> 
> -     if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
> -             ppc_md.power_save = power7_idle;
> +             if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) &&
> +                        (supported_cpuidle_states & 
> OPAL_PM_LOSE_FULL_CONTEXT))
> +                     power7_offline_type = PNV_THREAD_WINKLE;
> +             else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) ||
> +                        (supported_cpuidle_states & 
> OPAL_PM_SLEEP_ENABLED_ER1))
> +                     power7_offline_type = PNV_THREAD_SLEEP;
> +     }
> +
> +     if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
> +             if (pnv_save_sprs_for_deep_states())
> +                     pnv_disable_deep_states();
> +     }
> 
>  out:
>       return 0;
> diff --git a/arch/powerpc/platforms/powernv/subcore.c 
> b/arch/powerpc/platforms/powernv/subcore.c
> index 45563004feda..1d7a9fd30dd1 100644
> --- a/arch/powerpc/platforms/powernv/subcore.c
> +++ b/arch/powerpc/platforms/powernv/subcore.c
> @@ -183,7 +183,7 @@ static void unsplit_core(void)
>       cpu = smp_processor_id();
>       if (cpu_thread_in_core(cpu) != 0) {
>               while (mfspr(SPRN_HID0) & mask)
> -                     power7_idle_insn(PNV_THREAD_NAP);
> +                     power7_idle_type(PNV_THREAD_NAP);
> 
>               per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
>               return;
> diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
> index a0f44f992360..77197110e900 100644
> --- a/arch/powerpc/xmon/xmon.c
> +++ b/arch/powerpc/xmon/xmon.c
> @@ -2431,7 +2431,9 @@ static void dump_one_paca(int cpu)
>       DUMP(p, irq_happened, "%#-*x");
>       DUMP(p, io_sync, "%#-*x");
>       DUMP(p, irq_work_pending, "%#-*x");
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
>       DUMP(p, nap_state_lost, "%#-*x");
> +#endif
>       DUMP(p, sprg_vdso, "%#-*llx");
> 
>  #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
> @@ -2439,19 +2441,16 @@ static void dump_one_paca(int cpu)
>  #endif
> 
>  #ifdef CONFIG_PPC_POWERNV
> -     DUMP(p, core_idle_state_ptr, "%-*px");
> -     DUMP(p, thread_idle_state, "%#-*x");
> -     DUMP(p, thread_mask, "%#-*x");
> -     DUMP(p, subcore_sibling_mask, "%#-*x");
> -     DUMP(p, requested_psscr, "%#-*llx");
> -     DUMP(p, stop_sprs.pid, "%#-*llx");
> -     DUMP(p, stop_sprs.ldbar, "%#-*llx");
> -     DUMP(p, stop_sprs.fscr, "%#-*llx");
> -     DUMP(p, stop_sprs.hfscr, "%#-*llx");
> -     DUMP(p, stop_sprs.mmcr1, "%#-*llx");
> -     DUMP(p, stop_sprs.mmcr2, "%#-*llx");
> -     DUMP(p, stop_sprs.mmcra, "%#-*llx");
> -     DUMP(p, dont_stop.counter, "%#-*x");
> +     DUMP(p, idle_state, "%#-*lx");
> +     if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
> +             DUMP(p, thread_idle_state, "%#-*x");
> +             DUMP(p, subcore_sibling_mask, "%#-*x");
> +     } else {
> +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
> +             DUMP(p, requested_psscr, "%#-*llx");
> +             DUMP(p, dont_stop.counter, "%#-*x");
> +#endif
> +     }
>  #endif
> 
>       DUMP(p, accounting.utime, "%#-*lx");
> -- 
> 2.20.1
> 
> 

Reply via email to