On Thu, Nov 23, 2023 at 09:38:13AM +0000, Huang, Kai wrote:
> 
> > diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
> > index 171d86fe71ef..602b5d3982ff 100644
> > --- a/arch/x86/kernel/acpi/boot.c
> > +++ b/arch/x86/kernel/acpi/boot.c
> > @@ -22,6 +22,7 @@
> >  #include <linux/efi-bgrt.h>
> >  #include <linux/serial_core.h>
> >  #include <linux/pgtable.h>
> > +#include <linux/sched/hotplug.h>
> >  
> >  #include <asm/e820/api.h>
> >  #include <asm/irqdomain.h>
> > @@ -33,6 +34,7 @@
> >  #include <asm/smp.h>
> >  #include <asm/i8259.h>
> >  #include <asm/setup.h>
> > +#include <asm/init.h>
> 
> The above two are leftovers I believe?
> 
> [...]
> 

Right.


> > +
> > +static atomic_t waiting_for_crash_ipi;
> > +
> > +void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa);
> > +
> > +static void acpi_mp_play_dead(void)
> > +{
> > +   play_dead_common();
> > +   asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr,
> > +                         acpi_mp_pgd);
> > +}
> > +
> > +static void acpi_mp_cpu_die(unsigned int cpu)
> > +{
> > +   u32 apicid = per_cpu(x86_cpu_to_apicid, cpu);
> > +   unsigned long timeout;
> > +
> > +   /*
> > +    * Use TEST mailbox command to prove that BIOS got control over
> > +    * the CPU before declaring it dead.
> > +    *
> > +    * BIOS has to clear 'command' field of the mailbox.
> > +    */
> > +   acpi_mp_wake_mailbox->apic_id = apicid;
> > +   smp_store_release(&acpi_mp_wake_mailbox->command,
> > +                     ACPI_MP_WAKE_COMMAND_TEST);
> > +
> > +   /* Don't wait longer than a second. */
> > +   timeout = USEC_PER_SEC;
> > +   while (READ_ONCE(acpi_mp_wake_mailbox->command) && timeout--)
> > +           udelay(1);
> > +}
> > +
> > +static void acpi_mp_stop_other_cpus(int wait)
> > +{
> > +   smp_shutdown_nonboot_cpus(smp_processor_id());
> > +}
> > +
> > +static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
> > +{
> > +   local_irq_disable();
> > +
> > +   crash_save_cpu(regs, raw_smp_processor_id());
> > +
> > +   cpu_emergency_stop_pt();
> > +
> > +   disable_local_APIC();
> > +
> > +   /*
> > +    * Prepare the CPU for reboot _after_ invoking the callback so that the
> > +    * callback can safely use virtualization instructions, e.g. VMCLEAR.
> > +    */
> > +   cpu_emergency_disable_virtualization();
> > +
> > +   atomic_dec(&waiting_for_crash_ipi);
> > +
> > +   asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr,
> > +                         acpi_mp_pgd);
> > +
> > +   return NMI_HANDLED;
> > +}
> > +
> > +static void acpi_mp_crash_stop_other_cpus(void)
> > +{
> > +   unsigned long timeout;
> > +
> > +   /* The kernel is broken so disable interrupts */
> > +   local_irq_disable();
> > +
> > +
> > +   atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
> > +
> > +   /* Would it be better to replace the trap vector here? */
> > +   if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
> > +                            NMI_FLAG_FIRST, "crash"))
> > +           return;         /* Return what? */
> > +
> > +   apic_send_IPI_allbutself(NMI_VECTOR);
> > +
> > +   /* Don't wait longer than a second. */
> > +   timeout = USEC_PER_SEC;
> > +   while (atomic_read(&waiting_for_crash_ipi) && timeout--)
> > +           udelay(1);
> > +}
> > +
> > 
> 
> [...]
> 
> > +   smp_ops.stop_other_cpus = acpi_mp_stop_other_cpus;
> > +   smp_ops.crash_stop_other_cpus = acpi_mp_crash_stop_other_cpus;
> > +
> > 
> 
> The above acpi_mp_crash_stop_other_cpus() and crash_nmi_callback() etc are 
> kinda
> duplicated code with the normal crash kexec() in reboot.c.
> 
> I am not expert here but spent some time looking into the code, and it appears
> the main reason preventing us from reusing that code should be TDX guest 
> doesn't
> play nicely with mwait/halt staff when putting the cpu to offline.  
> 
> I am thinking if we skip/replace them with the asm_acpi_mp_play_dead(), we
> should be able to just reuse the existing smp_ops.stop_other_cpus() and
> smp_ops.crash_stop_other_cpus()?

Okay, fair enough. See fixup below.

> > +
> >  int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
> >                           const unsigned long end)
> >  {
> >     struct acpi_madt_multiproc_wakeup *mp_wake;
> >  
> >     mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
> > -   if (BAD_MADT_ENTRY(mp_wake, end))
> > +   if (!mp_wake)
> > +           return -EINVAL;
> 
> I think you can keep the BAD_MADT_ENTRY() check as a standard check, and ...

No. BAD_MADT_ENTRY() will fail if the struct version is V0 because the
size will be smaller than sizeof(struct acpi_madt_multiproc_wakeup).


diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 4fab2ed454f3..3c8efba86d5c 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -38,6 +38,7 @@ struct smp_ops {
        int (*cpu_disable)(void);
        void (*cpu_die)(unsigned int cpu);
        void (*play_dead)(void);
+       void (*crash_play_dead)(void);
 
        void (*send_call_func_ipi)(const struct cpumask *mask);
        void (*send_call_func_single_ipi)(int cpu);
diff --git a/arch/x86/kernel/acpi/madt_wakeup.c 
b/arch/x86/kernel/acpi/madt_wakeup.c
index 782fe8fd533c..a801f773f9f1 100644
--- a/arch/x86/kernel/acpi/madt_wakeup.c
+++ b/arch/x86/kernel/acpi/madt_wakeup.c
@@ -25,6 +25,12 @@ static u64 acpi_mp_reset_vector_paddr __ro_after_init;
 
 void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa);
 
+static void crash_acpi_mp_play_dead(void)
+{
+       asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr,
+                             acpi_mp_pgd);
+}
+
 static void acpi_mp_play_dead(void)
 {
        play_dead_common();
@@ -58,57 +64,6 @@ static void acpi_mp_stop_other_cpus(int wait)
        smp_shutdown_nonboot_cpus(smp_processor_id());
 }
 
-#ifdef CONFIG_KEXEC_CORE
-static atomic_t waiting_for_crash_ipi;
-
-static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
-{
-       local_irq_disable();
-
-       crash_save_cpu(regs, raw_smp_processor_id());
-
-       cpu_emergency_stop_pt();
-
-       disable_local_APIC();
-
-       /*
-        * Prepare the CPU for reboot _after_ invoking the callback so that the
-        * callback can safely use virtualization instructions, e.g. VMCLEAR.
-        */
-       cpu_emergency_disable_virtualization();
-
-       atomic_dec(&waiting_for_crash_ipi);
-
-       asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr,
-                             acpi_mp_pgd);
-
-       return NMI_HANDLED;
-}
-
-static void acpi_mp_crash_stop_other_cpus(void)
-{
-       unsigned long timeout;
-
-       /* The kernel is broken so disable interrupts */
-       local_irq_disable();
-
-
-       atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
-
-       /* Would it be better to replace the trap vector here? */
-       if (register_nmi_handler(NMI_LOCAL, crash_nmi_callback,
-                                NMI_FLAG_FIRST, "crash"))
-               return;         /* Return what? */
-
-       apic_send_IPI_allbutself(NMI_VECTOR);
-
-       /* Don't wait longer than a second. */
-       timeout = USEC_PER_SEC;
-       while (atomic_read(&waiting_for_crash_ipi) && timeout--)
-               udelay(1);
-}
-#endif
-
 /* The argument is required to match type of x86_mapping_info::alloc_pgt_page 
*/
 static void __init *alloc_pgt_page(void *dummy)
 {
@@ -277,11 +232,9 @@ static int __init acpi_mp_setup_reset(u64 reset_vector)
        }
 
        smp_ops.play_dead = acpi_mp_play_dead;
+       smp_ops.crash_play_dead = crash_acpi_mp_play_dead;
        smp_ops.cpu_die = acpi_mp_cpu_die;
        smp_ops.stop_other_cpus = acpi_mp_stop_other_cpus;
-#ifdef CONFIG_KEXEC_CORE
-       smp_ops.crash_stop_other_cpus = acpi_mp_crash_stop_other_cpus;
-#endif
 
        acpi_mp_reset_vector_paddr = reset_vector;
        acpi_mp_pgd = __pa(pgd);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index c81afffaa954..99e6ab552da0 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -878,10 +878,14 @@ static int crash_nmi_callback(unsigned int val, struct 
pt_regs *regs)
        cpu_emergency_disable_virtualization();
 
        atomic_dec(&waiting_for_crash_ipi);
-       /* Assume hlt works */
-       halt();
-       for (;;)
-               cpu_relax();
+
+       if (smp_ops.crash_play_dead) {
+           smp_ops.crash_play_dead();
+       } else {
+               halt();
+               for (;;)
+                       cpu_relax();
+       }
 
        return NMI_HANDLED;
 }
-- 
  Kiryl Shutsemau / Kirill A. Shutemov

_______________________________________________
kexec mailing list
[email protected]
http://lists.infradead.org/mailman/listinfo/kexec

Reply via email to