On Fri, Sep 17, 2010 at 11:15 AM, <anthony.per...@citrix.com> wrote: > From: Anthony PERARD <anthony.per...@citrix.com> > > Xen currently uses a different BIOS (hvmloader + rombios) therefore the > Qemu acpi_piix4 implementation wouldn't work correctly with Xen. > We plan on fixing this properly but at the moment we are just adding a > new Xen specific acpi_piix4 implementation. > This patch is optional; without it the VM boots but it cannot shutdown > properly or go to S3. > > Signed-off-by: Anthony PERARD <anthony.per...@citrix.com> > Signed-off-by: Stefano Stabellini <stefano.stabell...@eu.citrix.com> > --- > Makefile.target | 1 + > hw/xen_acpi_piix4.c | 405 > +++++++++++++++++++++++++++++++++++++++++++++++++++ > hw/xen_common.h | 3 + > hw/xen_machine_fv.c | 6 +- > 4 files changed, 410 insertions(+), 5 deletions(-) > create mode 100644 hw/xen_acpi_piix4.c > > diff --git a/Makefile.target b/Makefile.target > index ea14393..db7f96b 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -189,6 +189,7 @@ obj-$(CONFIG_NO_XEN) += xen-stub.o > # xen full virtualized machine > obj-$(CONFIG_XEN) += xen_machine_fv.o > obj-$(CONFIG_XEN) += xen_platform.o > +obj-$(CONFIG_XEN) += xen_acpi_piix4.o > > # USB layer > obj-$(CONFIG_USB_OHCI) += usb-ohci.o > diff --git a/hw/xen_acpi_piix4.c b/hw/xen_acpi_piix4.c > new file mode 100644 > index 0000000..f4792f2 > --- /dev/null > +++ b/hw/xen_acpi_piix4.c > @@ -0,0 +1,405 @@ > + /* > + * PIIX4 ACPI controller emulation > + * > + * Winston liwen Wang, winston.l.w...@intel.com > + * Copyright (c) 2006 , Intel Corporation. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > deal > + * in the Software without restriction, including without limitation the > rights > + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell > + * copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > FROM, > + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN > + * THE SOFTWARE. > + */ > + > +#include "hw.h" > +#include "pc.h" > +#include "pci.h" > +#include "sysemu.h" > +#include "acpi.h" > + > +#include "xen_backend.h" > +#include "xen_common.h" > +#include "qemu-log.h" > + > +#include <xen/hvm/ioreq.h> > +#include <xen/hvm/params.h> > + > +#define PIIX4ACPI_LOG_ERROR 0 > +#define PIIX4ACPI_LOG_INFO 1 > +#define PIIX4ACPI_LOG_DEBUG 2 > +#define PIIX4ACPI_LOGLEVEL PIIX4ACPI_LOG_INFO > +#define PIIX4ACPI_LOG(level, fmt, ...) do { if (level <= PIIX4ACPI_LOGLEVEL) > qemu_log(fmt, ## __VA_ARGS__); } while (0) > + > +/* Sleep state type codes as defined by the \_Sx objects in the DSDT. */ > +/* These must be kept in sync with the DSDT (hvmloader/acpi/dsdt.asl) */ > +#define SLP_TYP_S4 (6 << 10) > +#define SLP_TYP_S3 (5 << 10) > +#define SLP_TYP_S5 (7 << 10) > + > +#define ACPI_DBG_IO_ADDR 0xb044 > +#define ACPI_PHP_IO_ADDR 0x10c0 > + > +#define PHP_EVT_ADD 0x0 > +#define PHP_EVT_REMOVE 0x3 > + > +/* The bit in GPE0_STS/EN to notify the pci hotplug event */ > +#define ACPI_PHP_GPE_BIT 3 > + > +#define DEVFN_TO_PHP_SLOT_REG(devfn) (devfn >> 1) > +#define PHP_SLOT_REG_TO_DEVFN(reg, hilo) ((reg << 1) | hilo) > + > +/* ioport to monitor cpu add/remove status */ > +#define PROC_BASE 0xaf00 > + > +typedef struct PCIAcpiState {
PCIACPIState > + PCIDevice dev; > + uint16_t pm1_control; /* pm1a_ECNT_BLK */ > + qemu_irq irq; > + qemu_irq cmos_s3; > +} PCIAcpiState; > + > +typedef struct GPEState { > + /* GPE0 block */ > + uint8_t gpe0_sts[ACPI_GPE0_BLK_LEN / 2]; > + uint8_t gpe0_en[ACPI_GPE0_BLK_LEN / 2]; > + > + /* CPU bitmap */ > + uint8_t cpus_sts[32]; > + > + /* SCI IRQ level */ > + uint8_t sci_asserted; > + > +} GPEState; > + > +static GPEState gpe_state; > + > +static qemu_irq sci_irq; > + > +typedef struct AcpiDeviceState AcpiDeviceState; > +AcpiDeviceState *acpi_device_table; The above globals and static variable should be eliminated, see ac4040955b1669f0aac5937f623d6587d5210679. > + > +static const VMStateDescription vmstate_acpi = { > + .name = "PIIX4 ACPI", > + .version_id = 1, > + .fields = (VMStateField []) { > + VMSTATE_PCI_DEVICE(dev, PCIAcpiState), > + VMSTATE_UINT16(pm1_control, PCIAcpiState), > + VMSTATE_END_OF_LIST() > + } > +}; > + > +static void acpiPm1Control_writeb(void *opaque, uint32_t addr, uint32_t val) acpi_pm1_control_writeb(). > +{ > + PCIAcpiState *s = opaque; > + s->pm1_control = (s->pm1_control & 0xff00) | (val & 0xff); > +} > + > +static uint32_t acpiPm1Control_readb(void *opaque, uint32_t addr) Likewise. > +{ > + PCIAcpiState *s = opaque; > + /* Mask out the write-only bits */ > + return (uint8_t)(s->pm1_control & > ~(ACPI_BITMASK_GLOBAL_LOCK_RELEASE|ACPI_BITMASK_SLEEP_ENABLE)); Please add spaces around '|' and break the line. Adding ACPI_BITMASK_WRITEONLY may help. > +} > + > +static void acpi_shutdown(PCIAcpiState *s, uint32_t val) > +{ > + if (!(val & ACPI_BITMASK_SLEEP_ENABLE)) > + return; braces > + > + switch (val & ACPI_BITMASK_SLEEP_TYPE) { > + case SLP_TYP_S3: > + qemu_system_reset(); > + qemu_irq_raise(s->cmos_s3); > + xc_set_hvm_param(xen_xc, xen_domid, HVM_PARAM_ACPI_S_STATE, 3); > + break; > + case SLP_TYP_S4: > + case SLP_TYP_S5: > + qemu_system_shutdown_request(); > + break; > + default: > + break; > + } > +} > + > +static void acpiPm1ControlP1_writeb(void *opaque, uint32_t addr, uint32_t > val) > +{ > + PCIAcpiState *s = opaque; > + > + val <<= 8; > + s->pm1_control = ((s->pm1_control & 0xff) | val) & > ~ACPI_BITMASK_SLEEP_ENABLE; > + > + acpi_shutdown(s, val); > +} > + > +static uint32_t acpiPm1ControlP1_readb(void *opaque, uint32_t addr) > +{ > + PCIAcpiState *s = opaque; > + /* Mask out the write-only bits */ > + return (uint8_t)((s->pm1_control & > ~(ACPI_BITMASK_GLOBAL_LOCK_RELEASE|ACPI_BITMASK_SLEEP_ENABLE)) >> 8); > +} > + > +static void acpiPm1Control_writew(void *opaque, uint32_t addr, uint32_t val) > +{ > + PCIAcpiState *s = opaque; > + > + s->pm1_control = val & ~ACPI_BITMASK_SLEEP_ENABLE; > + > + acpi_shutdown(s, val); > +} > + > +static uint32_t acpiPm1Control_readw(void *opaque, uint32_t addr) > +{ > + PCIAcpiState *s = opaque; > + /* Mask out the write-only bits */ > + return (s->pm1_control & > ~(ACPI_BITMASK_GLOBAL_LOCK_RELEASE|ACPI_BITMASK_SLEEP_ENABLE)); > +} > + > +static void acpi_map(PCIDevice *pci_dev, int region_num, > + uint32_t addr, uint32_t size, int type) > +{ > + PCIAcpiState *d = (PCIAcpiState *)pci_dev; > + > + /* Byte access */ > + register_ioport_write(addr + 4, 1, 1, acpiPm1Control_writeb, d); > + register_ioport_read(addr + 4, 1, 1, acpiPm1Control_readb, d); > + register_ioport_write(addr + 4 + 1, 1, 1, acpiPm1ControlP1_writeb, d); > + register_ioport_read(addr + 4 +1, 1, 1, acpiPm1ControlP1_readb, d); > + > + /* Word access */ > + register_ioport_write(addr + 4, 2, 2, acpiPm1Control_writew, d); > + register_ioport_read(addr + 4, 2, 2, acpiPm1Control_readw, d); > +} > + > +static inline int test_bit(uint8_t *map, int bit) > +{ > + return ( map[bit / 8] & (1 << (bit % 8)) ); > +} > + > +static inline void set_bit(uint8_t *map, int bit) > +{ > + map[bit / 8] |= (1 << (bit % 8)); > +} > + > +static inline void clear_bit(uint8_t *map, int bit) > +{ > + map[bit / 8] &= ~(1 << (bit % 8)); > +} > + > +static void acpi_dbg_writel(void *opaque, uint32_t addr, uint32_t val) > +{ > + PIIX4ACPI_LOG(PIIX4ACPI_LOG_DEBUG, "ACPI: DBG: 0x%08x\n", val); > + PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "ACPI:debug: write addr=0x%x, > val=0x%x.\n", addr, val); > +} > + > +/* GPEx_STS occupy 1st half of the block, while GPEx_EN 2nd half */ > +static uint32_t gpe_sts_read(void *opaque, uint32_t addr) > +{ > + GPEState *s = opaque; > + > + return s->gpe0_sts[addr - ACPI_GPE0_BLK_ADDRESS]; > +} > + > +/* write 1 to clear specific GPE bits */ > +static void gpe_sts_write(void *opaque, uint32_t addr, uint32_t val) > +{ > + GPEState *s = opaque; > + int hotplugged = 0; > + > + PIIX4ACPI_LOG(PIIX4ACPI_LOG_DEBUG, "gpe_sts_write: addr=0x%x, > val=0x%x.\n", addr, val); > + > + hotplugged = test_bit(&s->gpe0_sts[0], ACPI_PHP_GPE_BIT); > + s->gpe0_sts[addr - ACPI_GPE0_BLK_ADDRESS] &= ~val; > + if ( s->sci_asserted && > + hotplugged && > + !test_bit(&s->gpe0_sts[0], ACPI_PHP_GPE_BIT)) { > + PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "Clear the GPE0_STS bit for ACPI > hotplug & deassert the IRQ.\n"); > + qemu_irq_lower(sci_irq); > + } > + > +} > + > +static uint32_t gpe_en_read(void *opaque, uint32_t addr) > +{ > + GPEState *s = opaque; > + > + return s->gpe0_en[addr - (ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / > 2)]; > +} > + > +/* write 0 to clear en bit */ > +static void gpe_en_write(void *opaque, uint32_t addr, uint32_t val) > +{ > + GPEState *s = opaque; > + int reg_count; > + > + PIIX4ACPI_LOG(PIIX4ACPI_LOG_DEBUG, "gpe_en_write: addr=0x%x, > val=0x%x.\n", addr, val); > + reg_count = addr - (ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2); > + s->gpe0_en[reg_count] = val; > + /* If disable GPE bit right after generating SCI on it, > + * need deassert the intr to avoid redundant intrs > + */ > + if ( s->sci_asserted && > + reg_count == (ACPI_PHP_GPE_BIT / 8) && > + !(val & (1 << (ACPI_PHP_GPE_BIT % 8))) ) { > + PIIX4ACPI_LOG(PIIX4ACPI_LOG_INFO, "deassert due to disable GPE > bit.\n"); > + s->sci_asserted = 0; > + qemu_irq_lower(sci_irq); > + } > + > +} > + > +static const VMStateDescription vmstate_gpe = { > + .name = "gpe", > + .version_id = 2, > + .minimum_version_id = 2, > + .minimum_version_id_old = 2, > + .fields = (VMStateField []) { > + VMSTATE_BUFFER(gpe0_sts, GPEState), > + VMSTATE_BUFFER(gpe0_en, GPEState), > + VMSTATE_UINT8(sci_asserted, GPEState), > + VMSTATE_END_OF_LIST() > + } > +}; > + > +static uint32_t gpe_cpus_readb(void *opaque, uint32_t addr) > +{ > + uint32_t val = 0; > + GPEState *g = opaque; > + > + switch (addr) { > + case PROC_BASE ... PROC_BASE+31: > + val = g->cpus_sts[addr - PROC_BASE]; break; > + default: > + break; > + } > + > + return val; > +} > + > +static void gpe_cpus_writeb(void *opaque, uint32_t addr, uint32_t val) > +{ > + /* GPEState *g = opaque; */ > + > + switch (addr) { > + case PROC_BASE ... PROC_BASE + 31: > + /* don't allow to change cpus_sts from inside a guest */ > + break; > + default: > + break; > + } > +} > + > +static void gpe_acpi_init(void) > +{ > + GPEState *s = &gpe_state; > + memset(s, 0, sizeof(GPEState)); > + > + s->cpus_sts[0] = 1; > + > + register_ioport_read(PROC_BASE, 32, 1, gpe_cpus_readb, s); > + register_ioport_write(PROC_BASE, 32, 1, gpe_cpus_writeb, s); > + > + register_ioport_read(ACPI_GPE0_BLK_ADDRESS, > + ACPI_GPE0_BLK_LEN / 2, > + 1, > + gpe_sts_read, > + s); > + register_ioport_read(ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2, > + ACPI_GPE0_BLK_LEN / 2, > + 1, > + gpe_en_read, > + s); > + > + register_ioport_write(ACPI_GPE0_BLK_ADDRESS, > + ACPI_GPE0_BLK_LEN / 2, > + 1, > + gpe_sts_write, > + s); > + register_ioport_write(ACPI_GPE0_BLK_ADDRESS + ACPI_GPE0_BLK_LEN / 2, > + ACPI_GPE0_BLK_LEN / 2, > + 1, > + gpe_en_write, > + s); > + > + vmstate_register(NULL, 0, &vmstate_gpe, s); > +} > + > +static int piix4_pm_xen_initfn(PCIDevice *dev) > +{ > + PCIAcpiState *s = DO_UPCAST(PCIAcpiState, dev, dev); > + uint8_t *pci_conf; > + > + pci_conf = s->dev.config; > + pci_config_set_vendor_id(pci_conf, PCI_VENDOR_ID_INTEL); > + pci_config_set_device_id(pci_conf, PCI_DEVICE_ID_INTEL_82371AB_3); > + pci_conf[0x08] = 0x01; /* B0 stepping */ > + pci_conf[0x09] = 0x00; /* base class */ > + pci_config_set_class(pci_conf, PCI_CLASS_BRIDGE_OTHER); > + pci_conf[PCI_HEADER_TYPE] = PCI_HEADER_TYPE_NORMAL; /* header_type */ > + pci_conf[0x3d] = 0x01; /* Hardwired to PIRQA is used */ > + > + /* PMBA POWER MANAGEMENT BASE ADDRESS, hardcoded to 0x1f40 > + * to make shutdown work for IPF, due to IPF Guest Firmware > + * will enumerate pci devices. > + * > + * TODO: if Guest Firmware or Guest OS will change this PMBA, > + * More logic will be added. > + */ > + pci_conf[0x40] = 0x41; /* Special device-specific BAR at 0x40 */ > + pci_conf[0x41] = 0x1f; > + pci_conf[0x42] = 0x00; > + pci_conf[0x43] = 0x00; > + > + s->pm1_control = ACPI_BITMASK_SCI_ENABLE; Please extract this line to a reset function.