----- Original Message ----- > From: "Michael S. Tsirkin" <m...@redhat.com> > To: "Igor Mammedov" <imamm...@redhat.com> > Cc: "Anthony Liguori" <aligu...@us.ibm.com>, "Eduardo Habkost" > <ehabk...@redhat.com>, qemu-devel@nongnu.org, "Isaku > Yamahata" <yamah...@valinux.co.jp>, "Alex Williamson" > <alex.william...@redhat.com>, "Gerd Hoffmann" > <kra...@redhat.com>, "Laszlo Ersek" <ler...@redhat.com>, "Andreas Färber" > <afaer...@suse.de>, "David Gibson" > <da...@gibson.dropbear.id.au>, "Paolo Bonzini" <pbonz...@redhat.com> > Sent: Thursday, July 25, 2013 5:23:21 PM > Subject: Re: [Qemu-devel] [PATCH] pc: limit 64 bit hole to 2G by default > > On Thu, Jul 25, 2013 at 11:16:06AM -0400, Igor Mammedov wrote: > > > > > > ----- Original Message ----- > > > From: "Igor Mammedov" <imamm...@redhat.com> > > > To: "Michael S. Tsirkin" <m...@redhat.com> > > > Cc: "Anthony Liguori" <aligu...@us.ibm.com>, "Eduardo Habkost" > > > <ehabk...@redhat.com>, qemu-devel@nongnu.org, "Isaku > > > Yamahata" <yamah...@valinux.co.jp>, "Alex Williamson" > > > <alex.william...@redhat.com>, "Gerd Hoffmann" > > > <kra...@redhat.com>, "Paolo Bonzini" <pbonz...@redhat.com>, "Laszlo > > > Ersek" <ler...@redhat.com>, "Andreas Färber" > > > <afaer...@suse.de>, "David Gibson" <da...@gibson.dropbear.id.au> > > > Sent: Thursday, July 25, 2013 3:40:05 PM > > > Subject: Re: [Qemu-devel] [PATCH] pc: limit 64 bit hole to 2G by default > > > > > > On Wed, 24 Jul 2013 09:01:04 +0300 > > > "Michael S. Tsirkin" <m...@redhat.com> wrote: > > > > > > > It turns out that some 32 bit windows guests crash > > > > if 64 bit PCI hole size is >2G. > > > > Limit it to 2G for piix and q35 by default, > > > > add properties to let management override the hole size. > > > > > > > > Examples: > > > > -global i440FX-pcihost.pci_hole64_size=137438953472 > > > > > > > > -global q35-pcihost.pci_hole64_size=137438953472 > > > > > > > > Reported-by: Igor Mammedov <imamm...@redhat.com>, > > > > Signed-off-by: Michael S. Tsirkin <m...@redhat.com> > > > > --- > > > > hw/i386/pc.c | 35 ++++++++++++++++++++--------------- > > > > hw/i386/pc_piix.c | 14 +------------- > > > > hw/pci-host/piix.c | 42 > > > > ++++++++++++++++++++++++++++++++++-------- > > > > hw/pci-host/q35.c | 29 +++++++++++++++++------------ > > > > include/hw/i386/pc.h | 7 +++++-- > > > > include/hw/pci-host/q35.h | 1 + > > > > 6 files changed, 78 insertions(+), 50 deletions(-) > > > > > > > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > > > > index a7c578f..9cc0fda 100644 > > > > --- a/hw/i386/pc.c > > > > +++ b/hw/i386/pc.c > > > > @@ -1072,27 +1072,32 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t > > > > below_4g_mem_size, > > > > memset(&guest_info->found_cpus, 0, sizeof guest_info->found_cpus); > > > > qemu_for_each_cpu(pc_set_cpu_guest_info, guest_info); > > > > > > > > - guest_info->pci_info.w32.end = IO_APIC_DEFAULT_ADDRESS; > > > > - if (sizeof(hwaddr) == 4) { > > > > - guest_info->pci_info.w64.begin = 0; > > > > - guest_info->pci_info.w64.end = 0; > > > > - } else { > > > > + guest_info_state->machine_done.notify = > > > > pc_guest_info_machine_done; > > > > + > > > > qemu_add_machine_init_done_notifier(&guest_info_state->machine_done); > > > > + return guest_info; > > > > +} > > > > + > > > > +void pc_init_pci_info(PcPciInfo *pci_info, > > > > + uint64_t pci_hole64_start, > > > > + uint64_t pci_hole64_size) > > > > +{ > > > > + pci_info->w32.end = IO_APIC_DEFAULT_ADDRESS; > > > weird ident > > > > > > > + > > > > + if (pci_hole64_size & ((0x1 << 30) - 1)) { > > > > + error_report("Invalid value for pci_hole64_size: " > > > > + "must be a multiple of 1G. Rounding up."); > > > > + } > > > > + pci_hole64_size = ROUND_UP(pci_hole64_size, 0x1ULL << 30); > > > > + > > > if pci_hole64_size is a property it would be better to put check, > > > in property setter (custom one) and error out instead of doing fixup, > > > lets user fix his wrong cmd line. > > > > > > > /* > > > > * BIOS does not set MTRR entries for the 64 bit window, so no > > > > need to > > > > * align address to power of two. Align address at 1G, this > > > > makes > > > > sure > > > > * it can be exactly covered with a PAT entry even when using > > > > huge > > > > * pages. > > > > */ > > > > - guest_info->pci_info.w64.begin = > > > > - ROUND_UP((0x1ULL << 32) + above_4g_mem_size, 0x1ULL << > > > > 30); > > > > - guest_info->pci_info.w64.end = guest_info->pci_info.w64.begin > > > > + > > > > - (0x1ULL << 31); > > > > - assert(guest_info->pci_info.w64.begin <= > > > > guest_info->pci_info.w64.end); > > > > - } > > > > - > > > > - guest_info_state->machine_done.notify = > > > > pc_guest_info_machine_done; > > > > - > > > > qemu_add_machine_init_done_notifier(&guest_info_state->machine_done); > > > > - return guest_info; > > > > + pci_info->w64.begin = ROUND_UP(pci_hole64_start, 0x1ULL << > > > > 30); > > > > + pci_info->w64.end = pci_info->w64.begin + pci_hole64_size; > > > > + assert(pci_info->w64.begin <= pci_info->w64.end); > > > > } > > > > > > > > void pc_acpi_init(const char *default_dsdt) > > > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c > > > > index 76df42b..da61fa3 100644 > > > > --- a/hw/i386/pc_piix.c > > > > +++ b/hw/i386/pc_piix.c > > > > @@ -137,15 +137,6 @@ static void pc_init1(MemoryRegion *system_memory, > > > > > > > > guest_info->has_pci_info = has_pci_info; > > > > > > > > - /* Set PCI window size the way seabios has always done it. */ > > > > - /* Power of 2 so bios can cover it with a single MTRR */ > > > > - if (ram_size <= 0x80000000) > > > > - guest_info->pci_info.w32.begin = 0x80000000; > > > > - else if (ram_size <= 0xc0000000) > > > > - guest_info->pci_info.w32.begin = 0xc0000000; > > > > - else > > > > - guest_info->pci_info.w32.begin = 0xe0000000; > > > > - > > > > /* allocate ram and load rom/bios */ > > > > if (!xen_enabled()) { > > > > fw_cfg = pc_memory_init(system_memory, > > > > @@ -169,10 +160,7 @@ static void pc_init1(MemoryRegion *system_memory, > > > > below_4g_mem_size, > > > > 0x100000000ULL - below_4g_mem_size, > > > > 0x100000000ULL + above_4g_mem_size, > > > > - (sizeof(hwaddr) == 4 > > > > - ? 0 > > > > - : ((uint64_t)1 << 62)), > > > > - pci_memory, ram_memory); > > > > + pci_memory, ram_memory, guest_info); > > > > } else { > > > > pci_bus = NULL; > > > > i440fx_state = NULL; > > > > diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c > > > > index 7fb2fb1..963b3d8 100644 > > > > --- a/hw/pci-host/piix.c > > > > +++ b/hw/pci-host/piix.c > > > > @@ -40,6 +41,7 @@ > > > > > > > > typedef struct I440FXState { > > > > PCIHostState parent_obj; > > > > + uint64_t pci_hole64_size; > > > > } I440FXState; > > > > > > > > #define PIIX_NUM_PIC_IRQS 16 /* i8259 * 2 */ > > > > @@ -234,9 +236,9 @@ static PCIBus *i440fx_common_init(const char > > > > *device_name, > > > > hwaddr pci_hole_start, > > > > hwaddr pci_hole_size, > > > > hwaddr pci_hole64_start, > > > ^^^ could but to be more consistent if moved to a place where 64 PCI hole > > > is > > > initialized, replace it with above_4g_memory_size, and let > > > i440fx_common_init() > > > set it near the place where 64 PCI hole end is set. > > > > > > > - hwaddr pci_hole64_size, > > > > MemoryRegion *pci_address_space, > > > > - MemoryRegion *ram_memory) > > > > + MemoryRegion *ram_memory, > > > > + PcGuestInfo *guest_info) > > > > { > > > > DeviceState *dev; > > > > PCIBus *b; > > > > @@ -245,15 +247,31 @@ static PCIBus *i440fx_common_init(const char > > > > *device_name, > > > > PIIX3State *piix3; > > > > PCII440FXState *f; > > > > unsigned i; > > > > + I440FXState *i440fx; > > > > > > > > dev = qdev_create(NULL, "i440FX-pcihost"); > > > > s = PCI_HOST_BRIDGE(dev); > > > > + i440fx = OBJECT_CHECK(I440FXState, dev, "i440FX-pcihost"); > > > > b = pci_bus_new(dev, NULL, pci_address_space, > > > > address_space_io, 0, TYPE_PCI_BUS); > > > > s->bus = b; > > > > object_property_add_child(qdev_get_machine(), "i440fx", > > > > OBJECT(dev), > > > > NULL); > > > > qdev_init_nofail(dev); > > > > > > > > + if (guest_info) { > > > > + /* Set PCI window size the way seabios has always done it. */ > > > > + /* Power of 2 so bios can cover it with a single MTRR */ > > > > + if (ram_size <= 0x80000000) > > > > + guest_info->pci_info.w32.begin = 0x80000000; > > > > + else if (ram_size <= 0xc0000000) > > > > + guest_info->pci_info.w32.begin = 0xc0000000; > > > > + else > > > > + guest_info->pci_info.w32.begin = 0xe0000000; > > > > + > > > > + pc_init_pci_info(&guest_info->pci_info, > > > > + pci_hole64_start, i440fx->pci_hole64_size); > > > > + } > > > split brain init of the same data structure make it ugly, would be more > > > readable > > > inlined. > > > > > > Wouldn't it be better/cleaner to put PcPciInfo inside of > > > I440FXState/MCHPCIState > > > and make QOM based API to access it as in latest ACPI tables series? > > > > > we could event not use PcPciInfo at all if memory_region_find() would > > return > > address for non terminating regions (i.e. aliases and containters). > > then we could get all necessary info from f->pci_hole_64bit and > > f->pci_hole. > > > > Paolo, > > would be following patch acceptable: > > > > diff --git a/memory.c b/memory.c > > index 757e9a5..0f1fb10 100644 > > --- a/memory.c > > +++ b/memory.c > > @@ -1551,6 +1551,7 @@ MemoryRegionSection memory_region_find(MemoryRegion > > *mr, > > addr += root->addr; > > } > > > > + ret.offset_within_region = addr; > > as = memory_region_to_address_space(root); > > range = addrrange_make(int128_make64(addr), int128_make64(size)); > > fr = address_space_lookup(as, range); > > > > Then to get PCI hole info all we would need is: > > > > get_pci_hole_info(f, uint64_t *start, uint64_t *end) { > > MemoryRegionSection ms = memory_region_find(f->pci_hole, 0, 1); > > sz = memory_region_size(mr); > > *start = ms.offset_within_region; > > *end = *start + sz; > > } > > We'll need to get the regions somehow, and all this will > really break for example if we decide to cover the 64 bit > holes in 2 regions instead of one, for some reason. it would break in case PcPciInfo as well, structure would need a second w64x2 range for the second region.
> > Frankly I don't see any advantages. Only single authoritative source of this info => No data duplication? > > > > > > > > > d = pci_create_simple(b, 0, device_name); > > > > *pi440fx_state = I440FX_PCI_DEVICE(d); > > > > f = *pi440fx_state; > > > > @@ -265,8 +283,8 @@ static PCIBus *i440fx_common_init(const char > > > > *device_name, > > > > memory_region_add_subregion(f->system_memory, pci_hole_start, > > > > &f->pci_hole); > > > > memory_region_init_alias(&f->pci_hole_64bit, OBJECT(d), > > > > "pci-hole64", > > > > f->pci_address_space, > > > > - pci_hole64_start, pci_hole64_size); > > > > - if (pci_hole64_size) { > > > > + pci_hole64_start, > > > > i440fx->pci_hole64_size); > > > > + if (i440fx->pci_hole64_size) { > > > > memory_region_add_subregion(f->system_memory, > > > > pci_hole64_start, > > > > &f->pci_hole_64bit); > > > > } > > > > @@ -322,8 +340,8 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, > > > > int > > > > *piix3_devfn, > > > > hwaddr pci_hole_start, > > > > hwaddr pci_hole_size, > > > > hwaddr pci_hole64_start, > > > > - hwaddr pci_hole64_size, > > > > - MemoryRegion *pci_memory, MemoryRegion > > > > *ram_memory) > > > > + MemoryRegion *pci_memory, MemoryRegion > > > > *ram_memory, > > > > + PcGuestInfo *guest_info) > > > > > > > > { > > > > PCIBus *b; > > > > @@ -332,8 +350,9 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, > > > > int > > > > *piix3_devfn, > > > > piix3_devfn, isa_bus, pic, > > > > address_space_mem, address_space_io, > > > > ram_size, > > > > pci_hole_start, pci_hole_size, > > > > - pci_hole64_start, pci_hole64_size, > > > > - pci_memory, ram_memory); > > > > + pci_hole64_start, > > > > + pci_memory, ram_memory, > > > > + guest_info); > > > > return b; > > > > } > > > > > > > > @@ -645,6 +664,12 @@ static const char > > > > *i440fx_pcihost_root_bus_path(PCIHostState *host_bridge, > > > > return "0000"; > > > > } > > > > > > > > +static Property i440fx_props[] = { > > > > + DEFINE_PROP_UINT64("pci_hole64_size", I440FXState, > > > > + pci_hole64_size, 0x1ULL << 31), > > > > + DEFINE_PROP_END_OF_LIST(), > > > > +}; > > > > + > > > > static void i440fx_pcihost_class_init(ObjectClass *klass, void *data) > > > > { > > > > DeviceClass *dc = DEVICE_CLASS(klass); > > > > @@ -655,6 +680,7 @@ static void i440fx_pcihost_class_init(ObjectClass > > > > *klass, void *data) > > > > k->init = i440fx_pcihost_initfn; > > > > dc->fw_name = "pci"; > > > > dc->no_user = 1; > > > > + dc->props = i440fx_props; > > > > } > > > > > > > > static const TypeInfo i440fx_pcihost_info = { > > > > diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c > > > > index c761a43..4dd7ca4 100644 > > > > --- a/hw/pci-host/q35.c > > > > +++ b/hw/pci-host/q35.c > > > > @@ -73,6 +74,8 @@ static const char > > > > *q35_host_root_bus_path(PCIHostState > > > > *host_bridge, > > > > static Property mch_props[] = { > > > > DEFINE_PROP_UINT64("MCFG", Q35PCIHost, host.base_addr, > > > > MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT), > > > > + DEFINE_PROP_UINT64("pci_hole64_size", Q35PCIHost, > > > > + mch.pci_hole64_size, 0x1ULL << 31), > > > > DEFINE_PROP_END_OF_LIST(), > > > > }; > > > > > > > > @@ -250,16 +253,20 @@ static void mch_reset(DeviceState *qdev) > > > > static int mch_init(PCIDevice *d) > > > > { > > > > int i; > > > > - hwaddr pci_hole64_size; > > > > MCHPCIState *mch = MCH_PCI_DEVICE(d); > > > > > > > > - /* Leave enough space for the biggest MCFG BAR */ > > > > - /* TODO: this matches current bios behaviour, but > > > > - * it's not a power of two, which means an MTRR > > > > - * can't cover it exactly. > > > > - */ > > > > - mch->guest_info->pci_info.w32.begin = > > > > MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT > > > > + > > > > - MCH_HOST_BRIDGE_PCIEXBAR_MAX; > > > > + if (mch->guest_info) { > > > > + /* Leave enough space for the biggest MCFG BAR */ > > > > + /* TODO: this matches current bios behaviour, but > > > > + * it's not a power of two, which means an MTRR > > > > + * can't cover it exactly. > > > > + */ > > > > + mch->guest_info->pci_info.w32.begin = > > > > MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT + > > > > + MCH_HOST_BRIDGE_PCIEXBAR_MAX; > > > > + pc_init_pci_info(&mch->guest_info->pci_info, > > > > + 0x100000000ULL + mch->above_4g_mem_size, > > > > + mch->pci_hole64_size); > > > > + } > > > > > > > > /* setup pci memory regions */ > > > > memory_region_init_alias(&mch->pci_hole, OBJECT(mch), "pci-hole", > > > > @@ -268,13 +275,11 @@ static int mch_init(PCIDevice *d) > > > > 0x100000000ULL - mch->below_4g_mem_size); > > > > memory_region_add_subregion(mch->system_memory, > > > > mch->below_4g_mem_size, > > > > &mch->pci_hole); > > > > - pci_hole64_size = (sizeof(hwaddr) == 4 ? 0 : > > > > - ((uint64_t)1 << 62)); > > > > memory_region_init_alias(&mch->pci_hole_64bit, OBJECT(mch), > > > > "pci-hole64", > > > > mch->pci_address_space, > > > > 0x100000000ULL + mch->above_4g_mem_size, > > > > - pci_hole64_size); > > > > - if (pci_hole64_size) { > > > > + mch->pci_hole64_size); > > > > + if (mch->pci_hole64_size) { > > > > memory_region_add_subregion(mch->system_memory, > > > > 0x100000000ULL + > > > > mch->above_4g_mem_size, > > > > &mch->pci_hole_64bit); > > > > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > > > > index 0e6f519..72b4456 100644 > > > > --- a/include/hw/i386/pc.h > > > > +++ b/include/hw/i386/pc.h > > > > @@ -132,6 +132,9 @@ void pc_acpi_init(const char *default_dsdt); > > > > > > > > PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, > > > > ram_addr_t above_4g_mem_size); > > > > +void pc_init_pci_info(PcPciInfo *pci_info, > > > > + uint64_t pci_hole64_start, > > > > + uint64_t pci_hole64_size); > > > > > > > > FWCfgState *pc_memory_init(MemoryRegion *system_memory, > > > > const char *kernel_filename, > > > > @@ -183,9 +186,9 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, > > > > int > > > > *piix_devfn, > > > > hwaddr pci_hole_start, > > > > hwaddr pci_hole_size, > > > > hwaddr pci_hole64_start, > > > > - hwaddr pci_hole64_size, > > > > MemoryRegion *pci_memory, > > > > - MemoryRegion *ram_memory); > > > > + MemoryRegion *ram_memory, > > > > + PcGuestInfo *guest_info); > > > > > > > > PCIBus *find_i440fx(void); > > > > /* piix4.c */ > > > > diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h > > > > index 3d59ae1..869ecb2 100644 > > > > --- a/include/hw/pci-host/q35.h > > > > +++ b/include/hw/pci-host/q35.h > > > > @@ -52,6 +52,7 @@ typedef struct MCHPCIState { > > > > MemoryRegion smram_region; > > > > MemoryRegion pci_hole; > > > > MemoryRegion pci_hole_64bit; > > > > + uint64_t pci_hole64_size; > > > > uint8_t smm_enabled; > > > > ram_addr_t below_4g_mem_size; > > > > ram_addr_t above_4g_mem_size; > > > > > > > > > >