On 2/17/25 12:49, Aditya Gupta wrote:
When MPIPL is used, OPAL/Linux registers memory regions to be preserved
on a Memory-Preserving boot ('crashkernel boot').
The regions are added to two tables: MDST and MDDT (source and
destination tables)
The MDST contains the start address of the region, and size of region
The MDDT contains the destination address where the region should be
copied (and size of region which will be same as in MDST entry)
Then after a crash, when hostboot (pnv_sbe.c in case of QEMU)
preserves the memory region, it adds the details of preserved regions to
MDRT (results table)
Copy memory regions mentioned in MDST to addresses mentioned in MDDT.
And accordingly update the copied region details in MDRT table.
Note: If we did not preserve the regions, and MDRT is empty then OPAL
simply logs "OPAL dump is not available", while kernel will assume that
firmware would have preserved the regions, and export /proc/vmcore, but
the vmcore won't have most basic kernel structures hence crash will be
unable to analyse the vmcore
Signed-off-by: Aditya Gupta <adit...@linux.ibm.com>
---
hw/ppc/pnv_sbe.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 57 insertions(+)
diff --git a/hw/ppc/pnv_sbe.c b/hw/ppc/pnv_sbe.c
index 361a3854307d..ee905df4e0a6 100644
--- a/hw/ppc/pnv_sbe.c
+++ b/hw/ppc/pnv_sbe.c
@@ -227,6 +227,60 @@ static uint64_t pnv_sbe_power9_xscom_ctrl_read(void
*opaque, hwaddr addr,
return val;
}
+static void pnv_mpipl_preserve_mem(void)
+{
+ /* Get access to metadata */
+ struct mpipl_metadata *metadata = malloc(DUMP_METADATA_AREA_SIZE);
+ struct mdst_table *mdst = malloc(MDST_TABLE_SIZE);
+ struct mddt_table *mddt = malloc(MDDT_TABLE_SIZE);
+ struct mdrt_table *mdrt = malloc(MDRT_TABLE_SIZE);
Where are these getting free()ed? Mem leak ?
+ __be64 source_addr, dest_addr, bytes_to_copy;
+ uint8_t *copy_buffer;
+
+ cpu_physical_memory_read(DUMP_METADATA_AREA_BASE, metadata,
DUMP_METADATA_AREA_SIZE);
+ cpu_physical_memory_read(MDST_TABLE_BASE, mdst, MDST_TABLE_SIZE);
+ cpu_physical_memory_read(MDDT_TABLE_BASE, mddt, MDDT_TABLE_SIZE);
+
+ /* HRMOR_BIT copied from skiboot */
+ #define HRMOR_BIT (1ul << 63)
Could be moved to pnv_sbe.h file.
+
+ for (int i = 0;; ++i) {
+ /* NOTE: Assuming uninitialised will be all zeroes */
+ if ((mdst[i].addr == 0) && (mdst[i].size == 0)) {
+ break;
+ }
What if there is no uninitialized entry till the end of array?
Out-of-bound access since we do not have a loop exit condition?
+
+ if (mdst[i].size != mddt[i].size) {
+ qemu_log_mask(LOG_TRACE,
+ "Warning: Invalid entry, size mismatch in MDST & MDDT\n");
+ continue;
+ }
+
+ if (mdst[i].data_region != mddt[i].data_region) {
+ qemu_log_mask(LOG_TRACE,
+ "Warning: Invalid entry, region mismatch in MDST &
MDDT\n");
+ continue;
+ }
+
+ mdrt[i].src_addr = mdst[i].addr;
+ mdrt[i].dest_addr = mddt[i].addr;
+ mdrt[i].size = mdst[i].size;
+ mdrt[i].data_region = mdst[i].data_region;
+
+ source_addr = cpu_to_be64(mdst[i].addr) & ~HRMOR_BIT;
+ dest_addr = cpu_to_be64(mddt[i].addr) & ~HRMOR_BIT;
+ bytes_to_copy = cpu_to_be32(mddt[i].size);
+
+ /* XXX: Am i assuming we are in big endian mode ? */
If the patches are assuming to work only with BE, it should gracefully
handle the LE case.
Thanks
Harsh
+ copy_buffer = malloc(bytes_to_copy);
+ cpu_physical_memory_read(source_addr, copy_buffer, bytes_to_copy);
+ cpu_physical_memory_write(dest_addr, copy_buffer, bytes_to_copy);
+ free(copy_buffer);
+ }
+
+ cpu_physical_memory_write(MDRT_TABLE_BASE, mdrt, MDRT_TABLE_SIZE);
+}
+
static void pnv_sbe_power9_xscom_ctrl_write(void *opaque, hwaddr addr,
uint64_t val, unsigned size)
{
@@ -250,6 +304,9 @@ static void pnv_sbe_power9_xscom_ctrl_write(void *opaque,
hwaddr addr,
*/
pause_all_vcpus();
+ /* Preserve the memory locations registered for MPIPL */
+ pnv_mpipl_preserve_mem();
+
/*
* TODO: Pass `mpipl` node in device tree to signify next
* boot is an MPIPL boot