QEMU support for direct pmtimer reads. Hopefully its safe, since its a
read-only register ?

With self-disable C2 + this I'm seeing less CPU usage when idle with
CONFIG_CPU_IDLE enabled. Quite noticeable on SMP guests. Windows XP is
comparable to standard (never seen it consume less than 10% either way,
usually 20-30%).

On migration the destination host can either lack ACPI or have the timer
in a different IO port, so emulation is necessary.

Or luckily the pmtimer is in the same address. Since the 24-bit counter
overflow period is only ~= 4.6 seconds, its probably worthwhile to wait
for synchronization before restarting the guest. Not implemented though.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm-userspace.realtip/bios/rombios32.c
===================================================================
--- kvm-userspace.realtip.orig/bios/rombios32.c
+++ kvm-userspace.realtip/bios/rombios32.c
@@ -391,7 +391,7 @@ uint8_t bios_uuid[16];
 unsigned long ebda_cur_addr;
 #endif
 int acpi_enabled;
-uint32_t pm_io_base, smb_io_base;
+uint32_t pm_io_base, pmtmr_base, smb_io_base;
 int pm_sci_int;
 unsigned long bios_table_cur_addr;
 unsigned long bios_table_end_addr;
@@ -819,6 +819,12 @@ static void pci_bios_init_device(PCIDevi
         pci_config_writeb(d, PCI_INTERRUPT_LINE, 9);
 
         pm_io_base = PM_IO_BASE;
+        pmtmr_base = cmos_readb(0x60);
+        pmtmr_base |= cmos_readb(0x61) << 8;
+        pmtmr_base |= cmos_readb(0x62) << 16;
+        pmtmr_base |= cmos_readb(0x63) << 24;
+        if (!pmtmr_base)
+            pmtmr_base = pm_io_base + 0x08;
         pci_config_writel(d, 0x40, pm_io_base | 1);
         pci_config_writeb(d, 0x80, 0x01); /* enable PM io space */
         smb_io_base = SMB_IO_BASE;
@@ -1376,7 +1382,7 @@ void acpi_bios_init(void)
     fadt->acpi_disable = 0xf0;
     fadt->pm1a_evt_blk = cpu_to_le32(pm_io_base);
     fadt->pm1a_cnt_blk = cpu_to_le32(pm_io_base + 0x04);
-    fadt->pm_tmr_blk = cpu_to_le32(pm_io_base + 0x08);
+    fadt->pm_tmr_blk = cpu_to_le32(pmtmr_base);
     fadt->pm1_evt_len = 4;
     fadt->pm1_cnt_len = 2;
     fadt->pm_tmr_len = 4;
Index: kvm-userspace.realtip/qemu/hw/acpi.c
===================================================================
--- kvm-userspace.realtip.orig/qemu/hw/acpi.c
+++ kvm-userspace.realtip/qemu/hw/acpi.c
@@ -40,6 +40,10 @@ typedef struct PIIX4PMState {
     uint16_t pmsts;
     uint16_t pmen;
     uint16_t pmcntrl;
+    uint32_t pmtimer_base;
+    uint8_t direct_access;
+    int32_t pmtimer_offset;
+    uint32_t pmtimer_io_offset;
     uint8_t apmc;
     uint8_t apms;
     QEMUTimer *tmr_timer;
@@ -81,7 +85,12 @@ PIIX4PMState *pm_state;
 static uint32_t get_pmtmr(PIIX4PMState *s)
 {
     uint32_t d;
-    d = muldiv64(qemu_get_clock(vm_clock), PM_FREQ, ticks_per_sec);
+    if (!s->direct_access) {
+        d = muldiv64(qemu_get_clock(vm_clock), PM_FREQ, ticks_per_sec);
+        d += s->pmtimer_offset;
+    } else
+        qemu_kvm_get_pmtimer(&d);
+
     return d & 0xffffff;
 }
 
@@ -235,14 +244,10 @@ static uint32_t pm_ioport_readl(void *op
     uint32_t val;
 
     addr &= 0x3f;
-    switch(addr) {
-    case 0x08:
+    if (addr == s->pmtimer_io_offset)
         val = get_pmtmr(s);
-        break;
-    default:
+    else
         val = 0;
-        break;
-    }
 #ifdef DEBUG
     printf("PM readl port=0x%04x val=0x%08x\n", addr, val);
 #endif
@@ -433,9 +438,9 @@ static uint32_t smb_ioport_readb(void *o
     return val;
 }
 
-static void pm_io_space_update(PIIX4PMState *s)
+static void pm_io_space_update(PIIX4PMState *s, int migration)
 {
-    uint32_t pm_io_base;
+    uint32_t pm_io_base, pmtmr_len;
 
     if (s->dev.config[0x80] & 1) {
         pm_io_base = le32_to_cpu(*(uint32_t *)(s->dev.config + 0x40));
@@ -443,14 +448,29 @@ static void pm_io_space_update(PIIX4PMSt
 
         /* XXX: need to improve memory and ioport allocation */
 #if defined(DEBUG)
-        printf("PM: mapping to 0x%x\n", pm_io_base);
+        printf("PM: mapping to 0x%x mig=%d\n", pm_io_base, migration);
 #endif
         register_ioport_write(pm_io_base, 64, 1, pm_ioport_writeb, s);
         register_ioport_read(pm_io_base, 64, 1, pm_ioport_readb, s);
         register_ioport_write(pm_io_base, 64, 2, pm_ioport_writew, s);
         register_ioport_read(pm_io_base, 64, 2, pm_ioport_readw, s);
-        register_ioport_write(pm_io_base, 64, 4, pm_ioport_writel, s);
-        register_ioport_read(pm_io_base, 64, 4, pm_ioport_readl, s);
+
+        if (migration) {
+            s->pmtimer_io_offset = 0x08;
+            pmtmr_len = 64;
+        } else if (host_pmtimer_base) {
+            s->pmtimer_base = host_pmtimer_base;
+            s->pmtimer_io_offset = 0x0;
+            pmtmr_len = 4;
+            s->direct_access = 1;
+        } else {
+            s->pmtimer_base = pm_io_base;
+            s->pmtimer_io_offset = 0x08;
+            pmtmr_len = 64;
+        }
+
+        register_ioport_write(s->pmtimer_base, pmtmr_len, 4, pm_ioport_writel, 
s);
+        register_ioport_read(s->pmtimer_base, pmtmr_len, 4, pm_ioport_readl, 
s);
     }
 }
 
@@ -459,12 +479,13 @@ static void pm_write_config(PCIDevice *d
 {
     pci_default_write_config(d, address, val, len);
     if (address == 0x80)
-        pm_io_space_update((PIIX4PMState *)d);
+        pm_io_space_update((PIIX4PMState *)d, 0);
 }
 
 static void pm_save(QEMUFile* f,void *opaque)
 {
     PIIX4PMState *s = opaque;
+    uint32_t pmtmr_val;
 
     pci_device_save(&s->dev, f);
 
@@ -475,6 +496,14 @@ static void pm_save(QEMUFile* f,void *op
     qemu_put_8s(f, &s->apms);
     qemu_put_timer(f, s->tmr_timer);
     qemu_put_be64(f, s->tmr_overflow_time);
+    qemu_put_be32(f, s->pmtimer_base);
+    if (s->direct_access) {
+        if (qemu_kvm_get_pmtimer(&pmtmr_val) < 0)
+            pmtmr_val = 1 << 30;
+    } else
+            pmtmr_val = get_pmtmr(s);
+
+    qemu_put_be32(f, pmtmr_val);
 }
 
 static int pm_load(QEMUFile* f,void* opaque,int version_id)
@@ -482,7 +511,7 @@ static int pm_load(QEMUFile* f,void* opa
     PIIX4PMState *s = opaque;
     int ret;
 
-    if (version_id > 1)
+    if (version_id > 2)
         return -EINVAL;
 
     ret = pci_device_load(&s->dev, f);
@@ -496,10 +525,31 @@ static int pm_load(QEMUFile* f,void* opa
     qemu_get_8s(f, &s->apms);
     qemu_get_timer(f, s->tmr_timer);
     s->tmr_overflow_time=qemu_get_be64(f);
+    if (version_id >= 2) {
+        uint32_t pmtmr_val;
 
-    pm_io_space_update(s);
+        s->pmtimer_base = qemu_get_be32(f);
+        pmtmr_val = qemu_get_be32(f);
+        if (pmtmr_val & (1 << 30))
+           return -EINVAL;
+#ifdef KVM_CAP_OPEN_IOPORT
+        /*
+         * Could wait for synchronicity instead of closing
+         * direct access.
+         */
+        if (host_pmtimer_base) {
+            ret = kvm_close_direct_pmtimer();
+            if (ret)
+                return ret;
+            host_pmtimer_base = 0;
+        }
+#endif
+        s->pmtimer_offset = pmtmr_val - get_pmtmr(s);
+    }
 
-    return 0;
+    pm_io_space_update(s, 1);
+
+   return 0;
 }
 
 i2c_bus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
@@ -548,7 +598,7 @@ i2c_bus *piix4_pm_init(PCIBus *bus, int 
 
     s->tmr_timer = qemu_new_timer(vm_clock, pm_tmr_timer, s);
 
-    register_savevm("piix4_pm", 0, 1, pm_save, pm_load, s);
+    register_savevm("piix4_pm", 0, 2, pm_save, pm_load, s);
 
     s->smbus = i2c_init_bus();
     s->irq = sci_irq;
Index: kvm-userspace.realtip/qemu/hw/pc.c
===================================================================
--- kvm-userspace.realtip.orig/qemu/hw/pc.c
+++ kvm-userspace.realtip/qemu/hw/pc.c
@@ -253,6 +253,11 @@ static void cmos_init(ram_addr_t ram_siz
     }
     rtc_set_memory(s, 0x5f, smp_cpus - 1);
 
+    rtc_set_memory(s, 0x60, host_pmtimer_base);
+    rtc_set_memory(s, 0x61, host_pmtimer_base >> 8);
+    rtc_set_memory(s, 0x62, host_pmtimer_base >> 16);
+    rtc_set_memory(s, 0x63, host_pmtimer_base >> 24);
+
     if (ram_size > (16 * 1024 * 1024))
         val = (ram_size / 65536) - ((16 * 1024 * 1024) / 65536);
     else
Index: kvm-userspace.realtip/qemu/qemu-kvm-x86.c
===================================================================
--- kvm-userspace.realtip.orig/qemu/qemu-kvm-x86.c
+++ kvm-userspace.realtip/qemu/qemu-kvm-x86.c
@@ -11,12 +11,17 @@
 
 #include <string.h>
 #include "hw/hw.h"
+#include "sysemu.h"
 
 #include "qemu-kvm.h"
 #include <libkvm.h>
 #include <pthread.h>
 #include <sys/utsname.h>
 #include <linux/kvm_para.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
 
 #define MSR_IA32_TSC           0x10
 
@@ -545,6 +550,123 @@ static int get_para_features(kvm_context
        return features;
 }
 
+#ifdef KVM_CAP_OPEN_IOPORT
+int kvm_arch_open_pmtimer(void)
+{
+    int fd, ret = 0;
+    char buf[16384];
+    char *line, *saveptr;
+    uint32_t pmtmr;
+    struct kvm_ioport_list *ioport_list;
+
+    if (no_direct_pmtimer)
+        return ret;
+
+    fd = open("/proc/ioports", O_RDONLY);
+    if (fd == -1) {
+        perror("open /proc/ioports");
+        exit(0);
+    }
+    ret = read(fd, buf, 16384);
+    if (ret == -1) {
+        perror("read /proc/ioports");
+        exit(0);
+    }
+
+    line = strtok_r(buf, "\n", &saveptr);
+    do {
+        char *pmstr;
+        line = pmstr = strtok_r(NULL, "\n", &saveptr);
+        if (pmstr && strstr(pmstr, "ACPI PM_TMR")) {
+            pmstr = strtok(line, "-");
+            while (*pmstr == ' ')
+                pmstr++;
+            host_pmtimer_base = strtoul(pmstr, NULL, 16);
+            /*
+             * Fail now instead of during migration
+             */
+            if (qemu_kvm_get_pmtimer(&pmtmr) < 0)
+                host_pmtimer_base = 0;
+            break;
+        }
+    } while (line);
+
+    if (!host_pmtimer_base)
+        return 0;
+
+    ioport_list = qemu_malloc(sizeof(struct kvm_ioport_list) +
+                         sizeof(struct kvm_ioport) * 2);
+    if (!ioport_list)
+        goto out_no_pmtimer;
+    ioport_list->nranges = 2;
+    ioport_list->ioports[0].addr = 0x80;
+    ioport_list->ioports[0].len = 1;
+    ioport_list->ioports[1].addr = host_pmtimer_base;
+    ioport_list->ioports[1].len = 4;
+
+    ret = kvm_set_open_ioports(kvm_context, ioport_list);
+    if (ret) {
+        perror("kvm_set_open_ioports");
+        goto out_no_pmtimer_free;
+    }
+
+    qemu_free(ioport_list);
+    return 0;
+
+out_no_pmtimer_free:
+    qemu_free(ioport_list);
+out_no_pmtimer:
+    host_pmtimer_base = 0;
+    return 0;
+}
+
+int kvm_close_direct_pmtimer(void)
+{
+    struct kvm_ioport_list *ioport_list;
+    int ret;
+
+    ioport_list = qemu_malloc(sizeof(struct kvm_ioport_list) +
+                              sizeof(struct kvm_ioport));
+    if (!ioport_list)
+        return -EINVAL;
+    ioport_list->nranges = 1;
+    ioport_list->ioports[0].addr = 0x80;
+    ioport_list->ioports[0].len = 1;
+
+    ret = kvm_set_open_ioports(kvm_context, ioport_list);
+
+    qemu_free(ioport_list);
+    return ret;
+}
+#else
+int kvm_arch_open_pmtimer(void)
+{
+    return 0;
+}
+#endif
+
+int kvm_arch_qemu_init(void)
+{
+    kvm_arch_open_pmtimer();
+    return 0;
+}
+
+int qemu_kvm_get_pmtimer(uint32_t *value)
+{
+    int fd, ret;
+
+    fd = open("/dev/pmtimer", O_RDONLY);
+    if (fd == -1)
+        return -1;
+
+    ret = read(fd, value, sizeof(value));
+    close(fd);
+
+    *value &= 0xffffff;
+
+    return ret;
+}
+
 int kvm_arch_qemu_init_env(CPUState *cenv)
 {
     struct kvm_cpuid_entry cpuid_ent[100];
Index: kvm-userspace.realtip/qemu/qemu-kvm.c
===================================================================
--- kvm-userspace.realtip.orig/qemu/qemu-kvm.c
+++ kvm-userspace.realtip/qemu/qemu-kvm.c
@@ -677,6 +677,7 @@ int kvm_qemu_create_context(void)
     r = kvm_arch_qemu_create_context();
     if(r <0)
        kvm_qemu_destroy();
+    kvm_arch_qemu_init();
     return 0;
 }
 
Index: kvm-userspace.realtip/qemu/qemu-kvm.h
===================================================================
--- kvm-userspace.realtip.orig/qemu/qemu-kvm.h
+++ kvm-userspace.realtip/qemu/qemu-kvm.h
@@ -49,6 +49,7 @@ void kvm_cpu_destroy_phys_mem(target_phy
                              unsigned long size);
 
 int kvm_arch_qemu_create_context(void);
+int kvm_arch_qemu_init(void);
 
 void kvm_arch_save_regs(CPUState *env);
 void kvm_arch_load_regs(CPUState *env);
@@ -60,6 +61,8 @@ int kvm_arch_has_work(CPUState *env);
 int kvm_arch_try_push_interrupts(void *opaque);
 void kvm_arch_update_regs_for_sipi(CPUState *env);
 void kvm_arch_cpu_reset(CPUState *env);
+int qemu_kvm_get_pmtimer(uint32_t *value);
+int kvm_close_direct_pmtimer(void);
 
 CPUState *qemu_kvm_cpu_env(int index);
 
Index: kvm-userspace.realtip/qemu/sysemu.h
===================================================================
--- kvm-userspace.realtip.orig/qemu/sysemu.h
+++ kvm-userspace.realtip/qemu/sysemu.h
@@ -94,6 +94,7 @@ extern int win2k_install_hack;
 extern int alt_grab;
 extern int usb_enabled;
 extern int smp_cpus;
+extern unsigned int host_pmtimer_base;
 extern int cursor_hide;
 extern int graphic_rotate;
 extern int no_quit;
@@ -101,6 +102,7 @@ extern int semihosting_enabled;
 extern int autostart;
 extern int old_param;
 extern int hpagesize;
+extern int no_direct_pmtimer;
 extern const char *bootp_filename;
 
 
Index: kvm-userspace.realtip/qemu/vl.c
===================================================================
--- kvm-userspace.realtip.orig/qemu/vl.c
+++ kvm-userspace.realtip/qemu/vl.c
@@ -209,6 +209,7 @@ int win2k_install_hack = 0;
 int usb_enabled = 0;
 static VLANState *first_vlan;
 int smp_cpus = 1;
+unsigned int host_pmtimer_base;
 const char *vnc_display;
 #if defined(TARGET_SPARC)
 #define MAX_CPUS 16
@@ -235,6 +236,7 @@ int time_drift_fix = 0;
 unsigned int kvm_shadow_memory = 0;
 const char *mem_path = NULL;
 int hpagesize = 0;
+int no_direct_pmtimer = 0;
 const char *cpu_vendor_string;
 #ifdef TARGET_ARM
 int old_param = 0;
@@ -7931,6 +7933,7 @@ enum {
     QEMU_OPTION_tdf,
     QEMU_OPTION_kvm_shadow_memory,
     QEMU_OPTION_mempath,
+    QEMU_OPTION_no_direct_pmtimer,
 };
 
 typedef struct QEMUOption {
@@ -8058,6 +8061,7 @@ const QEMUOption qemu_options[] = {
     { "clock", HAS_ARG, QEMU_OPTION_clock },
     { "startdate", HAS_ARG, QEMU_OPTION_startdate },
     { "mem-path", HAS_ARG, QEMU_OPTION_mempath },
+    { "no-direct-pmtimer", 0, QEMU_OPTION_no_direct_pmtimer },
     { NULL },
 };
 
@@ -8962,6 +8966,9 @@ int main(int argc, char **argv)
             case QEMU_OPTION_mempath:
                mem_path = optarg;
                break;
+            case QEMU_OPTION_no_direct_pmtimer:
+                no_direct_pmtimer = 1;
+                break;
             case QEMU_OPTION_name:
                 qemu_name = optarg;
                 break;

-- 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to