Process fw_remove events in QemuCpuhpCollectApicIds() and collect corresponding APIC IDs for CPUs that are being hot-unplugged.
In addition, we now ignore CPUs which only have remove set. These CPUs haven't been processed by OSPM yet. This is based on the QEMU hot-unplug protocol documented here: https://lore.kernel.org/qemu-devel/20201204170939.1815522-3-imamm...@redhat.com/ Also define QEMU_CPUHP_STAT_EJECTED while we are at it. Cc: Laszlo Ersek <ler...@redhat.com> Cc: Jordan Justen <jordan.l.jus...@intel.com> Cc: Ard Biesheuvel <ard.biesheu...@arm.com> Cc: Igor Mammedov <imamm...@redhat.com> Cc: Boris Ostrovsky <boris.ostrov...@oracle.com> Cc: Aaron Young <aaron.yo...@oracle.com> Ref: https://bugzilla.tianocore.org/show_bug.cgi?id=3132 Signed-off-by: Ankur Arora <ankur.a.ar...@oracle.com> --- Notes: I'm treating events (insert=1, fw_remove=1) below as invalid (return EFI_PROTOCOL_ERROR, which ends up as an assert), but I'm not sure that is correct: if ((CpuStatus & QEMU_CPUHP_STAT_INSERT) != 0) { // // The "insert" event guarantees the "enabled" status; plus it excludes - // the "remove" event. + // the "fw_remove" event. // if ((CpuStatus & QEMU_CPUHP_STAT_ENABLED) == 0 || - (CpuStatus & QEMU_CPUHP_STAT_REMOVE) != 0) { + (CpuStatus & QEMU_CPUHP_STAT_FW_REMOVE) != 0) { DEBUG ((DEBUG_ERROR, "%a: CurrentSelector=%u CpuStatus=0x%x: " "inconsistent CPU status\n", __FUNCTION__, CurrentSelector, CpuStatus)); QEMU's handling in cpu_hotplug_rd() can return both of these: cpu_hotplug_rd() { ... case ACPI_CPU_FLAGS_OFFSET_RW: /* pack and return is_* fields */ val |= cdev->cpu ? 1 : 0; val |= cdev->is_inserting ? 2 : 0; val |= cdev->is_removing ? 4 : 0; val |= cdev->fw_remove ? 16 : 0; ... } and I don't see any code that treats is_inserting and is_removing as exclusive. One specific case where this looks it might be a problem is if the user unplugs a CPU and right after that plugs it. As part of the unplug handling, the ACPI AML would, in the scan loop, asynchronously trigger the notify, which would do the OS unplug, set "fw_remove" and then call the SMI_CMD. The subsequent plug could then come and set the "insert" bit. Assuming what I'm describing could happen, I'm not sure what's the right handling: QEMU could treat these bits as exclusive and then OVMF could justifiably treat it as a protocol error? OvmfPkg/Include/IndustryStandard/QemuCpuHotplug.h | 2 ++ OvmfPkg/CpuHotplugSmm/QemuCpuhp.c | 29 +++++++++++++++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/OvmfPkg/Include/IndustryStandard/QemuCpuHotplug.h b/OvmfPkg/Include/IndustryStandard/QemuCpuHotplug.h index a34a6d3fae61..692e3072598c 100644 --- a/OvmfPkg/Include/IndustryStandard/QemuCpuHotplug.h +++ b/OvmfPkg/Include/IndustryStandard/QemuCpuHotplug.h @@ -34,6 +34,8 @@ #define QEMU_CPUHP_STAT_ENABLED BIT0 #define QEMU_CPUHP_STAT_INSERT BIT1 #define QEMU_CPUHP_STAT_REMOVE BIT2 +#define QEMU_CPUHP_STAT_EJECTED BIT3 +#define QEMU_CPUHP_STAT_FW_REMOVE BIT4 #define QEMU_CPUHP_RW_CMD_DATA 0x8 diff --git a/OvmfPkg/CpuHotplugSmm/QemuCpuhp.c b/OvmfPkg/CpuHotplugSmm/QemuCpuhp.c index 8d4a6693c8d6..f871e50c377b 100644 --- a/OvmfPkg/CpuHotplugSmm/QemuCpuhp.c +++ b/OvmfPkg/CpuHotplugSmm/QemuCpuhp.c @@ -245,10 +245,10 @@ QemuCpuhpCollectApicIds ( if ((CpuStatus & QEMU_CPUHP_STAT_INSERT) != 0) { // // The "insert" event guarantees the "enabled" status; plus it excludes - // the "remove" event. + // the "fw_remove" event. // if ((CpuStatus & QEMU_CPUHP_STAT_ENABLED) == 0 || - (CpuStatus & QEMU_CPUHP_STAT_REMOVE) != 0) { + (CpuStatus & QEMU_CPUHP_STAT_FW_REMOVE) != 0) { DEBUG ((DEBUG_ERROR, "%a: CurrentSelector=%u CpuStatus=0x%x: " "inconsistent CPU status\n", __FUNCTION__, CurrentSelector, CpuStatus)); @@ -260,12 +260,31 @@ QemuCpuhpCollectApicIds ( ExtendIds = PluggedApicIds; ExtendCount = PluggedCount; - } else if ((CpuStatus & QEMU_CPUHP_STAT_REMOVE) != 0) { - DEBUG ((DEBUG_VERBOSE, "%a: CurrentSelector=%u: remove\n", __FUNCTION__, - CurrentSelector)); + } else if ((CpuStatus & QEMU_CPUHP_STAT_FW_REMOVE) != 0) { + // + // "fw_remove" event guarantees "enabled". + // + if ((CpuStatus & QEMU_CPUHP_STAT_ENABLED) == 0) { + DEBUG ((DEBUG_ERROR, "%a: CurrentSelector=%u CpuStatus=0x%x: " + "inconsistent CPU status\n", __FUNCTION__, CurrentSelector, + CpuStatus)); + return EFI_PROTOCOL_ERROR; + } + + DEBUG ((DEBUG_VERBOSE, "%a: CurrentSelector=%u: fw_remove\n", + __FUNCTION__, CurrentSelector)); ExtendIds = ToUnplugApicIds; ExtendCount = ToUnplugCount; + } else if ((CpuStatus & QEMU_CPUHP_STAT_REMOVE) != 0) { + // + // Let the OSPM deal with the "remove" event. + // + DEBUG ((DEBUG_INFO, "%a: CurrentSelector=%u: remove (ignored)\n", + __FUNCTION__, CurrentSelector)); + + CurrentSelector++; + continue; } else { DEBUG ((DEBUG_VERBOSE, "%a: CurrentSelector=%u: no event\n", __FUNCTION__, CurrentSelector)); -- 2.9.3 -=-=-=-=-=-=-=-=-=-=-=- Groups.io Links: You receive all messages sent to this group. View/Reply Online (#70876): https://edk2.groups.io/g/devel/message/70876 Mute This Topic: https://groups.io/mt/80199952/21656 Group Owner: devel+ow...@edk2.groups.io Unsubscribe: https://edk2.groups.io/g/devel/unsub [arch...@mail-archive.com] -=-=-=-=-=-=-=-=-=-=-=-