* Use MOV CR instead of LMSW.  LMSW has no decode assist at all on AMD CPUs,
   forcing us to fully emulate the instruction.
 * Use __attribute__((used)) to fix the comment about ap_start().
 * Have ap_start() perform a self-INIT for APs, rather than having boot_cpu()
   do it.  This is marginally more parallel, and reduces the amount of remote
   vCPU management that Xen has to do on behalf of the guest.

Signed-off-by: Andrew Cooper <andrew.coop...@citrix.com>
---
CC: Jan Beulich <jbeul...@suse.com>
CC: Roger Pau Monné <roger....@citrix.com>
CC: Wei Liu <w...@xen.org>
---
 tools/firmware/hvmloader/smp.c | 46 ++++++++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/tools/firmware/hvmloader/smp.c b/tools/firmware/hvmloader/smp.c
index 082b17f13818..80154950ac32 100644
--- a/tools/firmware/hvmloader/smp.c
+++ b/tools/firmware/hvmloader/smp.c
@@ -35,9 +35,9 @@ asm (
     "    mov   %cs,%ax               \n"
     "    mov   %ax,%ds               \n"
     "    lgdt  gdt_desr-ap_boot_start\n"
-    "    xor   %ax, %ax              \n"
-    "    inc   %ax                   \n"
-    "    lmsw  %ax                   \n"
+    "    mov   %cr0, %eax            \n"
+    "    or    $1, %al               \n"
+    "    mov   %eax, %cr0            \n"
     "    ljmpl $0x08,$1f             \n"
     "gdt_desr:                       \n"
     "    .word gdt_end - gdt - 1     \n"
@@ -50,8 +50,6 @@ asm (
     "    movl  $stack_top,%esp       \n"
     "    movl  %esp,%ebp             \n"
     "    call  ap_start              \n"
-    "1:  hlt                         \n"
-    "    jmp  1b                     \n"
     "                                \n"
     "    .align 8                    \n"
     "gdt:                            \n"
@@ -68,14 +66,37 @@ asm (
     "    .text                       \n"
     );
 
-void ap_start(void); /* non-static avoids unused-function compiler warning */
-/*static*/ void ap_start(void)
+static void __attribute__((used)) ap_start(void)
 {
-    printf(" - CPU%d ... ", ap_cpuid);
+    unsigned int cpu = ap_cpuid;
+
+    printf(" - CPU%d ... ", cpu);
     cacheattr_init();
     printf("done.\n");
-    wmb();
-    ap_callin = 1;
+
+    /*
+     * Call in to the BSP.  For APs, take ourselves offline.
+     *
+     * We must not use the stack after calling in to the BSP.
+     */
+    asm volatile (
+        "    movb $1, ap_callin          \n"
+
+        "    test %[icr2], %[icr2]       \n"
+        "    jz   .Lbsp                  \n"
+
+        "    movl %[icr2], %[ICR2]       \n"
+        "    movl %[init], %[ICR1]       \n"
+        "1:  hlt                         \n"
+        "    jmp  1b                     \n"
+
+        ".Lbsp:                          \n"
+        :
+        : [icr2] "r" (SET_APIC_DEST_FIELD(LAPIC_ID(cpu))),
+          [init] "i" (APIC_DM_INIT),
+          [ICR1] "m" (*(uint32_t *)(LAPIC_BASE_ADDRESS + APIC_ICR)),
+          [ICR2] "m" (*(uint32_t *)(LAPIC_BASE_ADDRESS + APIC_ICR2))
+        : "memory" );
 }
 
 static void lapic_wait_ready(void)
@@ -111,11 +132,6 @@ static void boot_cpu(unsigned int cpu)
      */
     while ( !ap_callin )
         cpu_relax();
-
-    /* Take the secondary processor offline. */
-    lapic_write(APIC_ICR2, icr2);
-    lapic_write(APIC_ICR, APIC_DM_INIT);
-    lapic_wait_ready();    
 }
 
 void smp_initialise(void)
-- 
2.11.0


Reply via email to