ppc: Implement MPIPL in PowerNV

Harsh Prateek Bora Mon, 10 Mar 2025 22:43:04 -0700



On 2/17/25 12:49, Aditya Gupta wrote:

Linux expect a "ibm,opal/dump" node to know whether MPIPL (aka fadump)
is supported on the hardware.

Export the "ibm,opal/dump" node in QEMU's device tree for Linux to know
that PowerNV supports MPIPL.

With the commit, kernel boots thinking fadump is supported, and reserves
memory regions for fadump if "fadump=on" is passed in kernel cmdline:

     Linux/PowerPC load: init=/bin/sh debug fadump=on
     Finalizing device tree... flat tree at 0x20ebaca0
     [    1.005765851,5] DUMP: Payload sent metadata tag : 0x800002a8
     [    1.005980914,5] DUMP: Boot mem size : 0x40000000
     [    0.000000] opal fadump: Kernel metadata addr: 800002a8
     [    0.000000] fadump: Reserved 1024MB of memory at 0x00000040000000 
(System RAM: 20480MB)
     [    0.000000] fadump: Initialized 0x40000000 bytes cma area at 1024MB 
from 0x400102a8 bytes of memory reserved for firmware-assisted dump

Also, OPAL and Linux expect the "mpipl-boot" device tree node on a MPIPL
boot. Hence add "mpipl-boot" property in device tree on an MPIPL boot.

Hence after crash, Linux knows when it's a MPIPL/fadump boot:

     [    0.000000] opal fadump: Firmware-assisted dump is active.
     [    0.000000] fadump: Firmware-assisted dump is active.
     [    0.000000] fadump: Reserving 23552MB of memory at 0x00000040000000 for 
preserving crash data

Do note that fadump boot in PowerNV seems to require more memory,
trying with 1GB causes this error by kernel:

     [    0.000000] fadump: Failed to find memory chunk for reservation!

And even with anything from 2GB - 19GB, the kernel fails to boot due to
some memory issues.

Trying with >20GB memory is recommended for now

Signed-off-by: Aditya Gupta <adit...@linux.ibm.com>
---
  hw/ppc/pnv.c             | 49 ++++++++++++++++++++++++++++++++++++++++
  hw/ppc/pnv_sbe.c         | 18 +++++++++++----
  include/hw/ppc/pnv_sbe.h |  4 ++++
  3 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
index 11fd477b71be..39ed3f873e9a 100644
--- a/hw/ppc/pnv.c
+++ b/hw/ppc/pnv.c
@@ -51,6 +51,7 @@
  #include "hw/ppc/pnv_chip.h"
  #include "hw/ppc/pnv_xscom.h"
  #include "hw/ppc/pnv_pnor.h"
+#include "hw/ppc/pnv_sbe.h"

#include "hw/isa/isa.h"

  #include "hw/char/serial-isa.h"
@@ -697,6 +698,26 @@ static void *pnv_dt_create(MachineState *machine)
          pmc->dt_power_mgt(pnv, fdt);
      }

+ /* Add "dump" node so kernel knows MPIPL (aka fadump) is supported */

+    off = fdt_add_subnode(fdt, 0, "ibm,opal");
+    if (off == -FDT_ERR_EXISTS) {
+        off = fdt_path_offset(fdt, "/ibm,opal");
+    }
+
+    _FDT(off);
+    off = fdt_add_subnode(fdt, off, "dump");
+    _FDT(off);
+    _FDT((fdt_setprop_string(fdt, off, "compatible", "ibm,opal-dump")));
+
+    /* Add kernel and initrd as fw-load-area */
+    uint64_t fw_load_area[4] = {
+        cpu_to_be64(KERNEL_LOAD_ADDR), cpu_to_be64(KERNEL_MAX_SIZE),
+        cpu_to_be64(INITRD_LOAD_ADDR), cpu_to_be64(INITRD_MAX_SIZE)
+    };
+
+    _FDT((fdt_setprop(fdt, off, "fw-load-area",
+                    fw_load_area, sizeof(fw_load_area))));
+


Above could be wrapped in pnv_dt_mpipl(fdt) and called here?

      return fdt;
  }
@@ -714,6 +735,7 @@ static void pnv_reset(MachineState *machine, ResetType type)
      PnvMachineState *pnv = PNV_MACHINE(machine);
      IPMIBmc *bmc;
      void *fdt;
+    int node_offset;
qemu_devices_reset(type);@@ -744,6 +766,33 @@ static void pnv_reset(MachineState *machine, ResetType type)
          _FDT((fdt_pack(fdt)));
      }
+ /*
+     * If it's a MPIPL boot, add the "mpipl-boot" property, and reset the
+     * boolean for MPIPL boot for next boot
+     */
+    if (pnv_sbe_is_mpipl_boot()) {
+        void *fdt_copy = g_malloc0(FDT_MAX_SIZE);


Where is this getting free'ed ?

+
+        /* Create a writable copy of the fdt */
+        _FDT((fdt_open_into(fdt, fdt_copy, FDT_MAX_SIZE)));
+
+        node_offset = fdt_path_offset(fdt_copy, "/ibm,opal/dump");
+        _FDT((fdt_appendprop_u64(fdt_copy, node_offset, "mpipl-boot", 1)));
+
+        /* Update the fdt, and free the original fdt */
+        if (fdt != machine->fdt) {
+            /*
+             * Only free the fdt if it's not machine->fdt, to prevent
+             * double free, since we already free machine->fdt later
+             */
+            g_free(fdt);
+        }
+        fdt = fdt_copy;
+
+        /* This boot is an MPIPL, reset the boolean for next boot */
+        pnv_sbe_reset_is_next_boot_mpipl();
+    }
+


Could above be wrapped in pnv_reset_mpipl(fdt) and called if
pnv_sbe_is_mpipl_boot is true?

      qemu_fdt_dumpdtb(fdt, fdt_totalsize(fdt));
      cpu_physical_memory_write(PNV_FDT_ADDR, fdt, fdt_totalsize(fdt));

diff --git a/hw/ppc/pnv_sbe.c b/hw/ppc/pnv_sbe.c

index 3b50667226b5..671dc81c9501 100644
--- a/hw/ppc/pnv_sbe.c
+++ b/hw/ppc/pnv_sbe.c
@@ -216,6 +216,18 @@ struct proc_dump_area {
      __be32  act_size;      /* Actual data size */
  } __packed;

+static bool is_next_boot_mpipl;

+
+bool pnv_sbe_is_mpipl_boot(void)
+{
+    return is_next_boot_mpipl;
+}
+
+void pnv_sbe_reset_is_next_boot_mpipl(void)
+{
+    is_next_boot_mpipl = false;
+}
+
  static void pnv_sbe_set_host_doorbell(PnvSBE *sbe, uint64_t val)
  {
      val &= SBE_HOST_RESPONSE_MASK; /* Is this right? What does HW do? */
@@ -334,10 +346,8 @@ static void pnv_sbe_power9_xscom_ctrl_write(void *opaque, 
hwaddr addr,
              /* Save processor state */
              pnv_mpipl_save_proc_regs();

- /*

-             * TODO: Pass `mpipl` node in device tree to signify next
-             * boot is an MPIPL boot
-             */
+            /* Mark next boot as Memory-preserving boot */
+            is_next_boot_mpipl = true;

/* Then do a guest reset */

              /*
diff --git a/include/hw/ppc/pnv_sbe.h b/include/hw/ppc/pnv_sbe.h
index f6cbcf990ed9..94bbdc7b6414 100644
--- a/include/hw/ppc/pnv_sbe.h
+++ b/include/hw/ppc/pnv_sbe.h
@@ -56,4 +56,8 @@ struct PnvSBEClass {
  /* Helper to access stashed SKIBOOT_BASE */
  bool pnv_sbe_mpipl_skiboot_base(void);

+/* Helpers to know if next boot is MPIPL boot */

+bool pnv_sbe_is_mpipl_boot(void);
+void pnv_sbe_reset_is_next_boot_mpipl(void);

Usually we have a set along with reset and helpers for modifying structmembers. Above helpers for a gloabl var seems a bit un-necessary. Iguess we want to keep such global vars inside relevant struct.


Thanks
Harsh

+
  #endif /* PPC_PNV_SBE_H */

Re: [PATCH 7/7] hw/ppc: Implement MPIPL in PowerNV

Reply via email to