Write an invalid latency value the first time the guest attempts to idle
via P_LVL2 port.
This way the TSC is considered unreliable, and we get away with the
costs relative to APIC timer broadcasts on enter/exit necessary for C1+.
Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>
Index: kvm-userspace.tip/qemu/hw/acpi.c
===================================================================
--- kvm-userspace.tip.orig/qemu/hw/acpi.c
+++ kvm-userspace.tip/qemu/hw/acpi.c
@@ -120,6 +120,29 @@ static void pm_tmr_timer(void *opaque)
pm_update_sci(s);
}
+/*
+ * Fake C2 emulation, so the OS will consider the TSC unreliable
+ * and fallback to C1 after the latency is updated to a high value
+ * in acpi-dsdt.dsl.
+ */
+static void qemu_system_cpu_power_notify(void);
+static uint32_t pm_ioport_readb(void *opaque, uint32_t addr)
+{
+ addr &= 0x3f;
+ switch (addr) {
+ case 0x14: /* P_LVL2 */
+ qemu_system_cpu_power_notify();
+ }
+#ifdef DEBUG
+ printf("pm_ioport_readb addr=%x\n", addr);
+#endif
+ return 0;
+}
+
+static void pm_ioport_writeb(void *opaque, uint32_t addr, uint32_t val)
+{
+}
+
static void pm_ioport_writew(void *opaque, uint32_t addr, uint32_t val)
{
PIIX4PMState *s = opaque;
@@ -419,6 +442,8 @@ static void pm_io_space_update(PIIX4PMSt
#if defined(DEBUG)
printf("PM: mapping to 0x%x\n", pm_io_base);
#endif
+ register_ioport_write(pm_io_base, 64, 1, pm_ioport_writeb, s);
+ register_ioport_read(pm_io_base, 64, 1, pm_ioport_readb, s);
register_ioport_write(pm_io_base, 64, 2, pm_ioport_writew, s);
register_ioport_read(pm_io_base, 64, 2, pm_ioport_readw, s);
register_ioport_write(pm_io_base, 64, 4, pm_ioport_writel, s);
@@ -537,6 +562,7 @@ void qemu_system_powerdown(void)
}
#endif
#define GPE_BASE 0xafe0
+#define POWER_GPE_BASE 0xb040
#define PROC_BASE 0xaf00
#define PCI_BASE 0xae00
#define PCI_EJ_BASE 0xae08
@@ -553,7 +579,12 @@ struct pci_status {
uint32_t down;
};
+struct power_gpe_regs {
+ uint8_t disable;
+};
+
static struct gpe_regs gpe;
+static struct power_gpe_regs power_gpe;
static struct pci_status pci0_status;
static uint32_t gpe_readb(void *opaque, uint32_t addr)
@@ -622,6 +653,23 @@ static void gpe_writeb(void *opaque, uin
#endif
}
+static uint32_t cpu_power_read(void *opaque, uint32_t addr)
+{
+ struct power_gpe_regs *p = opaque;
+
+#if defined(DEBUG)
+ printf("cpu power read %lx == %lx\n", addr, p->disable);
+#endif
+ return p->disable;
+}
+
+static void cpu_power_write(void *opaque, uint32_t addr, uint32_t val)
+{
+#if defined(DEBUG)
+ printf("cpu power write %lx <== %lx\n", addr, val);
+#endif
+}
+
static uint32_t pcihotplug_read(void *opaque, uint32_t addr)
{
uint32_t val = 0;
@@ -695,6 +743,9 @@ void qemu_system_hot_add_init(const char
register_ioport_write(PCI_EJ_BASE, 4, 4, pciej_write, NULL);
register_ioport_read(PCI_EJ_BASE, 4, 4, pciej_read, NULL);
+ register_ioport_write(POWER_GPE_BASE, 4, 2, cpu_power_write, &power_gpe);
+ register_ioport_read(POWER_GPE_BASE, 4, 2, cpu_power_read, &power_gpe);
+
model = cpu_model;
}
@@ -737,6 +788,15 @@ void qemu_system_cpu_hot_add(int cpu, in
disable_processor(&gpe, cpu);
qemu_set_irq(pm_state->irq, 0);
}
+
+static void qemu_system_cpu_power_notify(void)
+{
+ power_gpe.disable = 1;
+
+ qemu_set_irq(pm_state->irq, 1);
+ qemu_set_irq(pm_state->irq, 0);
+}
+
#endif
static void enable_device(struct pci_status *p, struct gpe_regs *g, int slot)
--
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html