Write an invalid latency value the first time the guest attempts to idle
via P_LVL2 port.

This way the TSC is considered unreliable, and we get away with the
costs relative to APIC timer broadcasts on enter/exit necessary for C1+.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.tip/qemu/hw/acpi.c
===================================================================
--- kvm-userspace.tip.orig/qemu/hw/acpi.c
+++ kvm-userspace.tip/qemu/hw/acpi.c
@@ -120,6 +120,29 @@ static void pm_tmr_timer(void *opaque)
     pm_update_sci(s);
 }
 
+/*
+ * Fake C2 emulation, so the OS will consider the TSC unreliable
+ * and fallback to C1 after the latency is updated to a high value
+ * in acpi-dsdt.dsl.
+ */
+static void qemu_system_cpu_power_notify(void);
+static uint32_t pm_ioport_readb(void *opaque, uint32_t addr)
+{
+    addr &= 0x3f;
+    switch (addr) {
+    case 0x14: /* P_LVL2 */
+         qemu_system_cpu_power_notify();
+    }
+#ifdef DEBUG
+    printf("pm_ioport_readb addr=%x\n", addr);
+#endif
+    return 0;
+}
+
+static void pm_ioport_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+}
+
 static void pm_ioport_writew(void *opaque, uint32_t addr, uint32_t val)
 {
     PIIX4PMState *s = opaque;
@@ -419,6 +442,8 @@ static void pm_io_space_update(PIIX4PMSt
 #if defined(DEBUG)
         printf("PM: mapping to 0x%x\n", pm_io_base);
 #endif
+        register_ioport_write(pm_io_base, 64, 1, pm_ioport_writeb, s);
+        register_ioport_read(pm_io_base, 64, 1, pm_ioport_readb, s);
         register_ioport_write(pm_io_base, 64, 2, pm_ioport_writew, s);
         register_ioport_read(pm_io_base, 64, 2, pm_ioport_readw, s);
         register_ioport_write(pm_io_base, 64, 4, pm_ioport_writel, s);
@@ -537,6 +562,7 @@ void qemu_system_powerdown(void)
 }
 #endif
 #define GPE_BASE 0xafe0
+#define POWER_GPE_BASE 0xb040
 #define PROC_BASE 0xaf00
 #define PCI_BASE 0xae00
 #define PCI_EJ_BASE 0xae08
@@ -553,7 +579,12 @@ struct pci_status {
     uint32_t down;
 };
 
+struct power_gpe_regs {
+    uint8_t disable;
+};
+
 static struct gpe_regs gpe;
+static struct power_gpe_regs power_gpe;
 static struct pci_status pci0_status;
 
 static uint32_t gpe_readb(void *opaque, uint32_t addr)
@@ -622,6 +653,23 @@ static void gpe_writeb(void *opaque, uin
 #endif
 }
 
+static uint32_t cpu_power_read(void *opaque, uint32_t addr)
+{
+    struct power_gpe_regs *p = opaque;
+
+#if defined(DEBUG)
+    printf("cpu power read %lx == %lx\n", addr, p->disable);
+#endif
+    return p->disable;
+}
+
+static void cpu_power_write(void *opaque, uint32_t addr, uint32_t val)
+{
+#if defined(DEBUG)
+    printf("cpu power write %lx <== %lx\n", addr, val);
+#endif
+}
+
 static uint32_t pcihotplug_read(void *opaque, uint32_t addr)
 {
     uint32_t val = 0;
@@ -695,6 +743,9 @@ void qemu_system_hot_add_init(const char
     register_ioport_write(PCI_EJ_BASE, 4, 4, pciej_write, NULL);
     register_ioport_read(PCI_EJ_BASE, 4, 4,  pciej_read, NULL);
 
+    register_ioport_write(POWER_GPE_BASE, 4, 2, cpu_power_write, &power_gpe);
+    register_ioport_read(POWER_GPE_BASE, 4, 2, cpu_power_read, &power_gpe);
+
     model = cpu_model;
 }
 
@@ -737,6 +788,15 @@ void qemu_system_cpu_hot_add(int cpu, in
         disable_processor(&gpe, cpu);
     qemu_set_irq(pm_state->irq, 0);
 }
+
+static void qemu_system_cpu_power_notify(void)
+{
+    power_gpe.disable = 1;
+
+    qemu_set_irq(pm_state->irq, 1);
+    qemu_set_irq(pm_state->irq, 0);
+}
+
 #endif
 
 static void enable_device(struct pci_status *p, struct gpe_regs *g, int slot)

-- 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to