On 5/15/19 12:05 PM, Greg Kurz wrote:
> On POWER9, if the hypervisor supports XIVE exploitation mode, the guest OS
> will unconditionally requests for the XIVE interrupt mode even if XIVE was
> deactivated with the kernel command line xive=off. Later on, when the spapr
> XIVE init code handles xive=off, it disables XIVE and tries to fall back on
> the legacy mode XICS.
> 
> This discrepency causes a kernel panic because the hypervisor is configured
> to provide the XIVE interrupt mode to the guest :
> 
> [    0.008837] kernel BUG at arch/powerpc/sysdev/xics/xics-common.c:135!
> [    0.008877] Oops: Exception in kernel mode, sig: 5 [#1]
> [    0.008908] LE SMP NR_CPUS=1024 NUMA pSeries
> [    0.008939] Modules linked in:
> [    0.008964] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G        W         
> 5.0.13-200.fc29.ppc64le #1
> [    0.009018] NIP:  c000000001029ab8 LR: c000000001029aac CTR: 
> c0000000018e0000
> [    0.009065] REGS: c0000007f96d7900 TRAP: 0700   Tainted: G        W        
>   (5.0.13-200.fc29.ppc64le)
> [    0.009119] MSR:  8000000002029033 <SF,VEC,EE,ME,IR,DR,RI,LE>  CR: 
> 28000222  XER: 20040000
> [    0.009168] CFAR: c0000000001b1e28 IRQMASK: 0
> [    0.009168] GPR00: c000000001029aac c0000007f96d7b90 c0000000015e8600 
> 0000000000000000
> [    0.009168] GPR04: 0000000000000001 0000000000000000 0000000000000061 
> 646f6d61696e0d0a
> [    0.009168] GPR08: 00000007fd8f0000 0000000000000001 c0000000014c44c0 
> c0000007f96d76cf
> [    0.009168] GPR12: 0000000000000000 c0000000018e0000 0000000000000001 
> 0000000000000000
> [    0.009168] GPR16: 0000000000000000 0000000000000001 c0000007f96d7c08 
> c0000000016903d0
> [    0.009168] GPR20: c0000007fffe04e8 ffffffffffffffea c000000001620164 
> c00000000161fe58
> [    0.009168] GPR24: c000000000ea6c88 c0000000011151a8 00000000006000c0 
> c0000007f96d7c34
> [    0.009168] GPR28: 0000000000000000 c0000000014b286c c000000001115180 
> c00000000161dc70
> [    0.009558] NIP [c000000001029ab8] xics_smp_probe+0x38/0x98
> [    0.009590] LR [c000000001029aac] xics_smp_probe+0x2c/0x98
> [    0.009622] Call Trace:
> [    0.009639] [c0000007f96d7b90] [c000000001029aac] xics_smp_probe+0x2c/0x98 
> (unreliable)
> [    0.009687] [c0000007f96d7bb0] [c000000001033404] 
> pSeries_smp_probe+0x40/0xa0
> [    0.009734] [c0000007f96d7bd0] [c0000000010212a4] 
> smp_prepare_cpus+0x62c/0x6ec
> [    0.009782] [c0000007f96d7cf0] [c0000000010141b8] 
> kernel_init_freeable+0x148/0x448
> [    0.009829] [c0000007f96d7db0] [c000000000010ba4] kernel_init+0x2c/0x148
> [    0.009870] [c0000007f96d7e20] [c00000000000bdd4] 
> ret_from_kernel_thread+0x5c/0x68
> [    0.009916] Instruction dump:
> [    0.009940] 7c0802a6 60000000 7c0802a6 38800002 f8010010 f821ffe1 3c62001c 
> e863b9a0
> [    0.009988] 4b1882d1 60000000 7c690034 5529d97e <0b090000> 3d22001c 
> e929b998 3ce2ff8f
> 
> Look for xive=off during prom_init and don't ask for XIVE in this case. One
> exception though: if the host only supports XIVE, we still want to boot so
> we ignore xive=off.
> 
> Similarly, have the spapr XIVE init code to looking at the interrupt mode
> negociated during CAS, and ignore xive=off if the hypervisor only supports
> XIVE.
> 
> Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt 
> controller")
> Cc: sta...@vger.kernel.org # v4.20
> Reported-by: Pavithra R. Prakash <pavra...@in.ibm.com>
> Signed-off-by: Greg Kurz <gr...@kaod.org>


Reviewed-by: Cédric Le Goater <c...@kaod.org>

Thanks,

C.

> ---
> eac1e731b59e is a v4.16 commit actually but this patch only applies
> cleanly to v4.20 and newer. If needed I can send a backport for
> older versions.
> ---
>  arch/powerpc/kernel/prom_init.c  |   16 +++++++++++-
>  arch/powerpc/sysdev/xive/spapr.c |   52 
> +++++++++++++++++++++++++++++++++++++-
>  2 files changed, 66 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
> index 523bb99d7676..c8f7eb845927 100644
> --- a/arch/powerpc/kernel/prom_init.c
> +++ b/arch/powerpc/kernel/prom_init.c
> @@ -172,6 +172,7 @@ static unsigned long __prombss prom_tce_alloc_end;
>  
>  #ifdef CONFIG_PPC_PSERIES
>  static bool __prombss prom_radix_disable;
> +static bool __prombss prom_xive_disable;
>  #endif
>  
>  struct platform_support {
> @@ -808,6 +809,12 @@ static void __init early_cmdline_parse(void)
>       }
>       if (prom_radix_disable)
>               prom_debug("Radix disabled from cmdline\n");
> +
> +     opt = prom_strstr(prom_cmd_line, "xive=off");
> +     if (opt) {
> +             prom_xive_disable = true;
> +             prom_debug("XIVE disabled from cmdline\n");
> +     }
>  #endif /* CONFIG_PPC_PSERIES */
>  }
>  
> @@ -1216,10 +1223,17 @@ static void __init prom_parse_xive_model(u8 val,
>       switch (val) {
>       case OV5_FEAT(OV5_XIVE_EITHER): /* Either Available */
>               prom_debug("XIVE - either mode supported\n");
> -             support->xive = true;
> +             support->xive = !prom_xive_disable;
>               break;
>       case OV5_FEAT(OV5_XIVE_EXPLOIT): /* Only Exploitation mode */
>               prom_debug("XIVE - exploitation mode supported\n");
> +             if (prom_xive_disable) {
> +                     /*
> +                      * If we __have__ to do XIVE, we're better off ignoring
> +                      * the command line rather than not booting.
> +                      */
> +                     prom_printf("WARNING: Ignoring cmdline option 
> xive=off\n");
> +             }
>               support->xive = true;
>               break;
>       case OV5_FEAT(OV5_XIVE_LEGACY): /* Only Legacy mode */
> diff --git a/arch/powerpc/sysdev/xive/spapr.c 
> b/arch/powerpc/sysdev/xive/spapr.c
> index 575db3b06a6b..2e2d1b8f810f 100644
> --- a/arch/powerpc/sysdev/xive/spapr.c
> +++ b/arch/powerpc/sysdev/xive/spapr.c
> @@ -20,6 +20,7 @@
>  #include <linux/cpumask.h>
>  #include <linux/mm.h>
>  #include <linux/delay.h>
> +#include <linux/libfdt.h>
>  
>  #include <asm/prom.h>
>  #include <asm/io.h>
> @@ -663,6 +664,55 @@ static bool xive_get_max_prio(u8 *max_prio)
>       return true;
>  }
>  
> +static const u8 *get_vec5_feature(unsigned int index)
> +{
> +     unsigned long root, chosen;
> +     int size;
> +     const u8 *vec5;
> +
> +     root = of_get_flat_dt_root();
> +     chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
> +     if (chosen == -FDT_ERR_NOTFOUND)
> +             return NULL;
> +
> +     vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
> +     if (!vec5)
> +             return NULL;
> +
> +     if (size <= index)
> +             return NULL;
> +
> +     return vec5 + index;
> +}
> +
> +static bool xive_spapr_disabled(void)
> +{
> +     const u8 *vec5_xive;
> +
> +     vec5_xive = get_vec5_feature(OV5_INDX(OV5_XIVE_SUPPORT));
> +     if (vec5_xive) {
> +             u8 val;
> +
> +             val = *vec5_xive & OV5_FEAT(OV5_XIVE_SUPPORT);
> +             switch (val) {
> +             case OV5_FEAT(OV5_XIVE_EITHER):
> +             case OV5_FEAT(OV5_XIVE_LEGACY):
> +                     break;
> +             case OV5_FEAT(OV5_XIVE_EXPLOIT):
> +                     /* Hypervisor only supports XIVE */
> +                     if (xive_cmdline_disabled)
> +                             pr_warn("WARNING: Ignoring cmdline option 
> xive=off\n");
> +                     return false;
> +             default:
> +                     pr_warn("%s: Unknown xive support option: 0x%x\n",
> +                             __func__, val);
> +                     break;
> +             }
> +     }
> +
> +     return xive_cmdline_disabled;
> +}
> +
>  bool __init xive_spapr_init(void)
>  {
>       struct device_node *np;
> @@ -675,7 +725,7 @@ bool __init xive_spapr_init(void)
>       const __be32 *reg;
>       int i;
>  
> -     if (xive_cmdline_disabled)
> +     if (xive_spapr_disabled())
>               return false;
>  
>       pr_devel("%s()\n", __func__);
> 

Reply via email to