QEMU support for direct pmtimer reads. Hopefully its safe, since its a
read-only register ?
With self-disable C2 + this I'm seeing less CPU usage when idle with
CONFIG_CPU_IDLE enabled. Quite noticeable on SMP guests. Windows XP is
comparable to standard (never seen it consume less than 10% either way,
usually 20-30%).
On migration the destination host can either lack ACPI or have the timer
in a different IO port, so emulation is necessary.
Or luckily the pmtimer is in the same address. Since the 24-bit counter
overflow period is only ~= 4.6 seconds, its probably worthwhile to wait
for synchronization before restarting the guest. Not implemented though.
Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>
Index: kvm-userspace.realtip/bios/rombios32.c
===================================================================
--- kvm-userspace.realtip.orig/bios/rombios32.c
+++ kvm-userspace.realtip/bios/rombios32.c
@@ -391,7 +391,7 @@ uint8_t bios_uuid[16];
unsigned long ebda_cur_addr;
#endif
int acpi_enabled;
-uint32_t pm_io_base, smb_io_base;
+uint32_t pm_io_base, pmtmr_base, smb_io_base;
int pm_sci_int;
unsigned long bios_table_cur_addr;
unsigned long bios_table_end_addr;
@@ -819,6 +819,12 @@ static void pci_bios_init_device(PCIDevi
pci_config_writeb(d, PCI_INTERRUPT_LINE, 9);
pm_io_base = PM_IO_BASE;
+ pmtmr_base = cmos_readb(0x60);
+ pmtmr_base |= cmos_readb(0x61) << 8;
+ pmtmr_base |= cmos_readb(0x62) << 16;
+ pmtmr_base |= cmos_readb(0x63) << 24;
+ if (!pmtmr_base)
+ pmtmr_base = pm_io_base + 0x08;
pci_config_writel(d, 0x40, pm_io_base | 1);
pci_config_writeb(d, 0x80, 0x01); /* enable PM io space */
smb_io_base = SMB_IO_BASE;
@@ -1376,7 +1382,7 @@ void acpi_bios_init(void)
fadt->acpi_disable = 0xf0;
fadt->pm1a_evt_blk = cpu_to_le32(pm_io_base);
fadt->pm1a_cnt_blk = cpu_to_le32(pm_io_base + 0x04);
- fadt->pm_tmr_blk = cpu_to_le32(pm_io_base + 0x08);
+ fadt->pm_tmr_blk = cpu_to_le32(pmtmr_base);
fadt->pm1_evt_len = 4;
fadt->pm1_cnt_len = 2;
fadt->pm_tmr_len = 4;
Index: kvm-userspace.realtip/qemu/hw/acpi.c
===================================================================
--- kvm-userspace.realtip.orig/qemu/hw/acpi.c
+++ kvm-userspace.realtip/qemu/hw/acpi.c
@@ -40,6 +40,10 @@ typedef struct PIIX4PMState {
uint16_t pmsts;
uint16_t pmen;
uint16_t pmcntrl;
+ uint32_t pmtimer_base;
+ uint8_t direct_access;
+ int32_t pmtimer_offset;
+ uint32_t pmtimer_io_offset;
uint8_t apmc;
uint8_t apms;
QEMUTimer *tmr_timer;
@@ -81,7 +85,12 @@ PIIX4PMState *pm_state;
static uint32_t get_pmtmr(PIIX4PMState *s)
{
uint32_t d;
- d = muldiv64(qemu_get_clock(vm_clock), PM_FREQ, ticks_per_sec);
+ if (!s->direct_access) {
+ d = muldiv64(qemu_get_clock(vm_clock), PM_FREQ, ticks_per_sec);
+ d += s->pmtimer_offset;
+ } else
+ qemu_kvm_get_pmtimer(&d);
+
return d & 0xffffff;
}
@@ -235,14 +244,10 @@ static uint32_t pm_ioport_readl(void *op
uint32_t val;
addr &= 0x3f;
- switch(addr) {
- case 0x08:
+ if (addr == s->pmtimer_io_offset)
val = get_pmtmr(s);
- break;
- default:
+ else
val = 0;
- break;
- }
#ifdef DEBUG
printf("PM readl port=0x%04x val=0x%08x\n", addr, val);
#endif
@@ -433,9 +438,9 @@ static uint32_t smb_ioport_readb(void *o
return val;
}
-static void pm_io_space_update(PIIX4PMState *s)
+static void pm_io_space_update(PIIX4PMState *s, int migration)
{
- uint32_t pm_io_base;
+ uint32_t pm_io_base, pmtmr_len;
if (s->dev.config[0x80] & 1) {
pm_io_base = le32_to_cpu(*(uint32_t *)(s->dev.config + 0x40));
@@ -443,14 +448,29 @@ static void pm_io_space_update(PIIX4PMSt
/* XXX: need to improve memory and ioport allocation */
#if defined(DEBUG)
- printf("PM: mapping to 0x%x\n", pm_io_base);
+ printf("PM: mapping to 0x%x mig=%d\n", pm_io_base, migration);
#endif
register_ioport_write(pm_io_base, 64, 1, pm_ioport_writeb, s);
register_ioport_read(pm_io_base, 64, 1, pm_ioport_readb, s);
register_ioport_write(pm_io_base, 64, 2, pm_ioport_writew, s);
register_ioport_read(pm_io_base, 64, 2, pm_ioport_readw, s);
- register_ioport_write(pm_io_base, 64, 4, pm_ioport_writel, s);
- register_ioport_read(pm_io_base, 64, 4, pm_ioport_readl, s);
+
+ if (migration) {
+ s->pmtimer_io_offset = 0x08;
+ pmtmr_len = 64;
+ } else if (host_pmtimer_base) {
+ s->pmtimer_base = host_pmtimer_base;
+ s->pmtimer_io_offset = 0x0;
+ pmtmr_len = 4;
+ s->direct_access = 1;
+ } else {
+ s->pmtimer_base = pm_io_base;
+ s->pmtimer_io_offset = 0x08;
+ pmtmr_len = 64;
+ }
+
+ register_ioport_write(s->pmtimer_base, pmtmr_len, 4, pm_ioport_writel,
s);
+ register_ioport_read(s->pmtimer_base, pmtmr_len, 4, pm_ioport_readl,
s);
}
}
@@ -459,12 +479,13 @@ static void pm_write_config(PCIDevice *d
{
pci_default_write_config(d, address, val, len);
if (address == 0x80)
- pm_io_space_update((PIIX4PMState *)d);
+ pm_io_space_update((PIIX4PMState *)d, 0);
}
static void pm_save(QEMUFile* f,void *opaque)
{
PIIX4PMState *s = opaque;
+ uint32_t pmtmr_val;
pci_device_save(&s->dev, f);
@@ -475,6 +496,14 @@ static void pm_save(QEMUFile* f,void *op
qemu_put_8s(f, &s->apms);
qemu_put_timer(f, s->tmr_timer);
qemu_put_be64(f, s->tmr_overflow_time);
+ qemu_put_be32(f, s->pmtimer_base);
+ if (s->direct_access) {
+ if (qemu_kvm_get_pmtimer(&pmtmr_val) < 0)
+ pmtmr_val = 1 << 30;
+ } else
+ pmtmr_val = get_pmtmr(s);
+
+ qemu_put_be32(f, pmtmr_val);
}
static int pm_load(QEMUFile* f,void* opaque,int version_id)
@@ -482,7 +511,7 @@ static int pm_load(QEMUFile* f,void* opa
PIIX4PMState *s = opaque;
int ret;
- if (version_id > 1)
+ if (version_id > 2)
return -EINVAL;
ret = pci_device_load(&s->dev, f);
@@ -496,10 +525,31 @@ static int pm_load(QEMUFile* f,void* opa
qemu_get_8s(f, &s->apms);
qemu_get_timer(f, s->tmr_timer);
s->tmr_overflow_time=qemu_get_be64(f);
+ if (version_id >= 2) {
+ uint32_t pmtmr_val;
- pm_io_space_update(s);
+ s->pmtimer_base = qemu_get_be32(f);
+ pmtmr_val = qemu_get_be32(f);
+ if (pmtmr_val & (1 << 30))
+ return -EINVAL;
+#ifdef KVM_CAP_OPEN_IOPORT
+ /*
+ * Could wait for synchronicity instead of closing
+ * direct access.
+ */
+ if (host_pmtimer_base) {
+ ret = kvm_close_direct_pmtimer();
+ if (ret)
+ return ret;
+ host_pmtimer_base = 0;
+ }
+#endif
+ s->pmtimer_offset = pmtmr_val - get_pmtmr(s);
+ }
- return 0;
+ pm_io_space_update(s, 1);
+
+ return 0;
}
i2c_bus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
@@ -548,7 +598,7 @@ i2c_bus *piix4_pm_init(PCIBus *bus, int
s->tmr_timer = qemu_new_timer(vm_clock, pm_tmr_timer, s);
- register_savevm("piix4_pm", 0, 1, pm_save, pm_load, s);
+ register_savevm("piix4_pm", 0, 2, pm_save, pm_load, s);
s->smbus = i2c_init_bus();
s->irq = sci_irq;
Index: kvm-userspace.realtip/qemu/hw/pc.c
===================================================================
--- kvm-userspace.realtip.orig/qemu/hw/pc.c
+++ kvm-userspace.realtip/qemu/hw/pc.c
@@ -253,6 +253,11 @@ static void cmos_init(ram_addr_t ram_siz
}
rtc_set_memory(s, 0x5f, smp_cpus - 1);
+ rtc_set_memory(s, 0x60, host_pmtimer_base);
+ rtc_set_memory(s, 0x61, host_pmtimer_base >> 8);
+ rtc_set_memory(s, 0x62, host_pmtimer_base >> 16);
+ rtc_set_memory(s, 0x63, host_pmtimer_base >> 24);
+
if (ram_size > (16 * 1024 * 1024))
val = (ram_size / 65536) - ((16 * 1024 * 1024) / 65536);
else
Index: kvm-userspace.realtip/qemu/qemu-kvm-x86.c
===================================================================
--- kvm-userspace.realtip.orig/qemu/qemu-kvm-x86.c
+++ kvm-userspace.realtip/qemu/qemu-kvm-x86.c
@@ -11,12 +11,17 @@
#include <string.h>
#include "hw/hw.h"
+#include "sysemu.h"
#include "qemu-kvm.h"
#include <libkvm.h>
#include <pthread.h>
#include <sys/utsname.h>
#include <linux/kvm_para.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
#define MSR_IA32_TSC 0x10
@@ -545,6 +550,123 @@ static int get_para_features(kvm_context
return features;
}
+#ifdef KVM_CAP_OPEN_IOPORT
+int kvm_arch_open_pmtimer(void)
+{
+ int fd, ret = 0;
+ char buf[16384];
+ char *line, *saveptr;
+ uint32_t pmtmr;
+ struct kvm_ioport_list *ioport_list;
+
+ if (no_direct_pmtimer)
+ return ret;
+
+ fd = open("/proc/ioports", O_RDONLY);
+ if (fd == -1) {
+ perror("open /proc/ioports");
+ exit(0);
+ }
+ ret = read(fd, buf, 16384);
+ if (ret == -1) {
+ perror("read /proc/ioports");
+ exit(0);
+ }
+
+ line = strtok_r(buf, "\n", &saveptr);
+ do {
+ char *pmstr;
+ line = pmstr = strtok_r(NULL, "\n", &saveptr);
+ if (pmstr && strstr(pmstr, "ACPI PM_TMR")) {
+ pmstr = strtok(line, "-");
+ while (*pmstr == ' ')
+ pmstr++;
+ host_pmtimer_base = strtoul(pmstr, NULL, 16);
+ /*
+ * Fail now instead of during migration
+ */
+ if (qemu_kvm_get_pmtimer(&pmtmr) < 0)
+ host_pmtimer_base = 0;
+ break;
+ }
+ } while (line);
+
+ if (!host_pmtimer_base)
+ return 0;
+
+ ioport_list = qemu_malloc(sizeof(struct kvm_ioport_list) +
+ sizeof(struct kvm_ioport) * 2);
+ if (!ioport_list)
+ goto out_no_pmtimer;
+ ioport_list->nranges = 2;
+ ioport_list->ioports[0].addr = 0x80;
+ ioport_list->ioports[0].len = 1;
+ ioport_list->ioports[1].addr = host_pmtimer_base;
+ ioport_list->ioports[1].len = 4;
+
+ ret = kvm_set_open_ioports(kvm_context, ioport_list);
+ if (ret) {
+ perror("kvm_set_open_ioports");
+ goto out_no_pmtimer_free;
+ }
+
+ qemu_free(ioport_list);
+ return 0;
+
+out_no_pmtimer_free:
+ qemu_free(ioport_list);
+out_no_pmtimer:
+ host_pmtimer_base = 0;
+ return 0;
+}
+
+int kvm_close_direct_pmtimer(void)
+{
+ struct kvm_ioport_list *ioport_list;
+ int ret;
+
+ ioport_list = qemu_malloc(sizeof(struct kvm_ioport_list) +
+ sizeof(struct kvm_ioport));
+ if (!ioport_list)
+ return -EINVAL;
+ ioport_list->nranges = 1;
+ ioport_list->ioports[0].addr = 0x80;
+ ioport_list->ioports[0].len = 1;
+
+ ret = kvm_set_open_ioports(kvm_context, ioport_list);
+
+ qemu_free(ioport_list);
+ return ret;
+}
+#else
+int kvm_arch_open_pmtimer(void)
+{
+ return 0;
+}
+#endif
+
+int kvm_arch_qemu_init(void)
+{
+ kvm_arch_open_pmtimer();
+ return 0;
+}
+
+int qemu_kvm_get_pmtimer(uint32_t *value)
+{
+ int fd, ret;
+
+ fd = open("/dev/pmtimer", O_RDONLY);
+ if (fd == -1)
+ return -1;
+
+ ret = read(fd, value, sizeof(value));
+ close(fd);
+
+ *value &= 0xffffff;
+
+ return ret;
+}
+
int kvm_arch_qemu_init_env(CPUState *cenv)
{
struct kvm_cpuid_entry cpuid_ent[100];
Index: kvm-userspace.realtip/qemu/qemu-kvm.c
===================================================================
--- kvm-userspace.realtip.orig/qemu/qemu-kvm.c
+++ kvm-userspace.realtip/qemu/qemu-kvm.c
@@ -677,6 +677,7 @@ int kvm_qemu_create_context(void)
r = kvm_arch_qemu_create_context();
if(r <0)
kvm_qemu_destroy();
+ kvm_arch_qemu_init();
return 0;
}
Index: kvm-userspace.realtip/qemu/qemu-kvm.h
===================================================================
--- kvm-userspace.realtip.orig/qemu/qemu-kvm.h
+++ kvm-userspace.realtip/qemu/qemu-kvm.h
@@ -49,6 +49,7 @@ void kvm_cpu_destroy_phys_mem(target_phy
unsigned long size);
int kvm_arch_qemu_create_context(void);
+int kvm_arch_qemu_init(void);
void kvm_arch_save_regs(CPUState *env);
void kvm_arch_load_regs(CPUState *env);
@@ -60,6 +61,8 @@ int kvm_arch_has_work(CPUState *env);
int kvm_arch_try_push_interrupts(void *opaque);
void kvm_arch_update_regs_for_sipi(CPUState *env);
void kvm_arch_cpu_reset(CPUState *env);
+int qemu_kvm_get_pmtimer(uint32_t *value);
+int kvm_close_direct_pmtimer(void);
CPUState *qemu_kvm_cpu_env(int index);
Index: kvm-userspace.realtip/qemu/sysemu.h
===================================================================
--- kvm-userspace.realtip.orig/qemu/sysemu.h
+++ kvm-userspace.realtip/qemu/sysemu.h
@@ -94,6 +94,7 @@ extern int win2k_install_hack;
extern int alt_grab;
extern int usb_enabled;
extern int smp_cpus;
+extern unsigned int host_pmtimer_base;
extern int cursor_hide;
extern int graphic_rotate;
extern int no_quit;
@@ -101,6 +102,7 @@ extern int semihosting_enabled;
extern int autostart;
extern int old_param;
extern int hpagesize;
+extern int no_direct_pmtimer;
extern const char *bootp_filename;
Index: kvm-userspace.realtip/qemu/vl.c
===================================================================
--- kvm-userspace.realtip.orig/qemu/vl.c
+++ kvm-userspace.realtip/qemu/vl.c
@@ -209,6 +209,7 @@ int win2k_install_hack = 0;
int usb_enabled = 0;
static VLANState *first_vlan;
int smp_cpus = 1;
+unsigned int host_pmtimer_base;
const char *vnc_display;
#if defined(TARGET_SPARC)
#define MAX_CPUS 16
@@ -235,6 +236,7 @@ int time_drift_fix = 0;
unsigned int kvm_shadow_memory = 0;
const char *mem_path = NULL;
int hpagesize = 0;
+int no_direct_pmtimer = 0;
const char *cpu_vendor_string;
#ifdef TARGET_ARM
int old_param = 0;
@@ -7931,6 +7933,7 @@ enum {
QEMU_OPTION_tdf,
QEMU_OPTION_kvm_shadow_memory,
QEMU_OPTION_mempath,
+ QEMU_OPTION_no_direct_pmtimer,
};
typedef struct QEMUOption {
@@ -8058,6 +8061,7 @@ const QEMUOption qemu_options[] = {
{ "clock", HAS_ARG, QEMU_OPTION_clock },
{ "startdate", HAS_ARG, QEMU_OPTION_startdate },
{ "mem-path", HAS_ARG, QEMU_OPTION_mempath },
+ { "no-direct-pmtimer", 0, QEMU_OPTION_no_direct_pmtimer },
{ NULL },
};
@@ -8962,6 +8966,9 @@ int main(int argc, char **argv)
case QEMU_OPTION_mempath:
mem_path = optarg;
break;
+ case QEMU_OPTION_no_direct_pmtimer:
+ no_direct_pmtimer = 1;
+ break;
case QEMU_OPTION_name:
qemu_name = optarg;
break;
--
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html