On Thu, Jul 11, 2013 at 07:13:39AM +0200, Igor Mammedov wrote: > On Wed, 10 Jul 2013 13:10:03 +0300 > "Michael S. Tsirkin" <m...@redhat.com> wrote: > > > On Wed, Jun 26, 2013 at 05:13:33PM +0800, Hu Tao wrote: > > > The numa_fw_cfg paravirt interface is extended to include SRAT > > > information for > > > all hotplug-able dimms. There are 3 words for each hotplug-able memory > > > slot, > > > denoting start address, size and node proximity. The new info is appended > > > after > > > existing numa info, so that the fw_cfg layout does not break. This > > > information > > > is used by Seabios to build hotplug memory device objects at runtime. > > > nb_numa_nodes is set to 1 by default (not 0), so that we always pass srat > > > info > > > to SeaBIOS. > > > > > > v3->v4: numa_fw_cfg needs to be initalized after memory controller sets > > > up dimm > > > ranges. Make changes for pc_piix and pc_q35 to set numa_fw_cfg after > > > i440fx > > > initialization. > > > > > > v2->v3: setting nb_numa_nodes to 1 is not needed > > > > > > v1->v2: > > > Dimm SRAT info (#dimms) is appended at end of existing numa fw_cfg in > > > order not > > > to break existing layout > > > Documentation of the new fwcfg layout is included in docs/specs/fwcfg.txt > > > > > > Signed-off-by: Vasilis Liaskovitis <vasilis.liaskovi...@profitbricks.com> > > > Signed-off-by: Hu Tao <hu...@cn.fujitsu.com> > > > > Please do not add any more fwcfg interfaces - generating > > ACPI in qemu removes the need for it. > > > > So please rebase on top of that work and generate the appropriate ACPI > > tables directly. > > > > You can find the latest code gnerating ACPI from qemu here: > > git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git acpi > > will it work with upstream Seabios or custom tree is required for it as well?
Yes. git://git.kernel.org/pub/scm/virt/kvm/mst/seabios.git acpi > > > > This code is work in progress, but once you base on > > top of that, I can put it on that branch and keep updating if > > interfaces change. > > > > > --- > > > docs/specs/fwcfg.txt | 28 ++++++++++++++++++++++++++++ > > > hw/i386/pc.c | 30 ++++++++++++++++++++++++------ > > > hw/i386/pc_piix.c | 1 + > > > hw/i386/pc_q35.c | 7 +++++-- > > > include/hw/i386/pc.h | 1 + > > > include/sysemu/sysemu.h | 1 + > > > 6 files changed, 60 insertions(+), 8 deletions(-) > > > create mode 100644 docs/specs/fwcfg.txt > > > > > > diff --git a/docs/specs/fwcfg.txt b/docs/specs/fwcfg.txt > > > new file mode 100644 > > > index 0000000..e6fcd8f > > > --- /dev/null > > > +++ b/docs/specs/fwcfg.txt > > > @@ -0,0 +1,28 @@ > > > +QEMU<->BIOS Paravirt Documentation > > > +-------------------------------------- > > > + > > > +This document describes paravirt data structures passed from QEMU to > > > BIOS. > > > + > > > +fw_cfg SRAT paravirt info > > > +-------------------- > > > +The SRAT info passed from QEMU to BIOS has the following layout: > > > + > > > +----------------------------------------------------------------------------------------------- > > > +#nodes | cpu0_pxm | cpu1_pxm | ... | cpulast_pxm | node0_mem | node1_mem > > > | ... | nodelast_mem > > > + > > > +----------------------------------------------------------------------------------------------- > > > +#dimms | dimm0_start | dimm0_sz | dimm0_pxm | ... | dimmlast_start | > > > dimmlast_sz | dimmlast_pxm > > > + > > > +Entry 0 contains the number of numa nodes (nb_numa_nodes). > > > + > > > +Entries 1..max_cpus: The next max_cpus entries describe node proximity > > > for each > > > +one of the vCPUs in the system. > > > + > > > +Entries max_cpus+1..max_cpus+nb_numa_nodes+1: The next nb_numa_nodes > > > entries > > > +describe the memory size for each one of the NUMA nodes in the system. > > > + > > > +Entry max_cpus+nb_numa_nodes+1 contains the number of memory dimms > > > (nb_hp_dimms) > > > + > > > +The last 3 * nb_hp_dimms entries are organized in triplets: Each triplet > > > contains > > > +the physical address offset, size (in bytes), and node proximity for the > > > +respective dimm. > > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > > > index 65838a6..b51d3b5 100644 > > > --- a/hw/i386/pc.c > > > +++ b/hw/i386/pc.c > > > @@ -55,6 +55,7 @@ > > > #include "hw/acpi/acpi.h" > > > #include "hw/cpu/icc_bus.h" > > > #include "hw/boards.h" > > > +#include "hw/mem-hotplug/dimm.h" > > > > > > /* debug PC/ISA interrupts */ > > > //#define DEBUG_IRQ > > > @@ -606,8 +607,6 @@ static FWCfgState *bochs_bios_init(void) > > > FWCfgState *fw_cfg; > > > uint8_t *smbios_table; > > > size_t smbios_len; > > > - uint64_t *numa_fw_cfg; > > > - int i, j; > > > unsigned int apic_id_limit = pc_apic_id_limit(max_cpus); > > > > > > fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0); > > > @@ -640,11 +639,25 @@ static FWCfgState *bochs_bios_init(void) > > > &e820_table, sizeof(e820_table)); > > > > > > fw_cfg_add_bytes(fw_cfg, FW_CFG_HPET, &hpet_cfg, sizeof(hpet_cfg)); > > > + > > > + return fw_cfg; > > > +} > > > + > > > +void bochs_meminfo_bios_init(void *fw_cfg) > > > +{ > > > + uint64_t *numa_fw_cfg; > > > + uint64_t *hp_dimms_fw_cfg; > > > + int i, j; > > > + unsigned int apic_id_limit = pc_apic_id_limit(max_cpus); > > > + > > > /* allocate memory for the NUMA channel: one (64bit) word for the > > > number > > > * of nodes, one word for each VCPU->node and one word for each node > > > to > > > * hold the amount of memory. > > > + * Finally one word for the number of hotplug memory slots and three > > > words > > > + * for each hotplug memory slot (start address, size and node > > > proximity). > > > */ > > > - numa_fw_cfg = g_new0(uint64_t, 1 + apic_id_limit + nb_numa_nodes); > > > + numa_fw_cfg = g_new0(uint64_t, > > > + 2 + apic_id_limit + nb_numa_nodes + 3 * > > > nb_hp_dimms); > > > numa_fw_cfg[0] = cpu_to_le64(nb_numa_nodes); > > > for (i = 0; i < max_cpus; i++) { > > > unsigned int apic_id = x86_cpu_apic_id_from_index(i); > > > @@ -659,11 +672,16 @@ static FWCfgState *bochs_bios_init(void) > > > for (i = 0; i < nb_numa_nodes; i++) { > > > numa_fw_cfg[apic_id_limit + 1 + i] = cpu_to_le64(node_mem[i]); > > > } > > > + > > > + numa_fw_cfg[1 + apic_id_limit + nb_numa_nodes] = > > > cpu_to_le64(nb_hp_dimms); > > > + > > > + hp_dimms_fw_cfg = numa_fw_cfg + 2 + apic_id_limit + nb_numa_nodes; > > > + if (nb_hp_dimms) { > > > + dimm_setup_fwcfg_layout(hp_dimms_fw_cfg); > > > + } > > > fw_cfg_add_bytes(fw_cfg, FW_CFG_NUMA, numa_fw_cfg, > > > - (1 + apic_id_limit + nb_numa_nodes) * > > > + (2 + apic_id_limit + nb_numa_nodes + 3 * > > > nb_hp_dimms) * > > > sizeof(*numa_fw_cfg)); > > > - > > > - return fw_cfg; > > > } > > > > > > static long get_file_size(FILE *f) > > > diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c > > > index fb056df..6e18343 100644 > > > --- a/hw/i386/pc_piix.c > > > +++ b/hw/i386/pc_piix.c > > > @@ -138,6 +138,7 @@ static void pc_init1(MemoryRegion *system_memory, > > > if (!xen_enabled()) { > > > fw_cfg = pc_memory_init(kernel_filename, kernel_cmdline, > > > initrd_filename, > > > below_4g_mem_size, above_4g_mem_size); > > > + bochs_meminfo_bios_init(fw_cfg); > > > } > > > > > > if (kvm_irqchip_in_kernel()) { > > > diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c > > > index 5fe14bb..2c14977 100644 > > > --- a/hw/i386/pc_q35.c > > > +++ b/hw/i386/pc_q35.c > > > @@ -74,6 +74,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args) > > > ICH9LPCState *ich9_lpc; > > > PCIDevice *ahci; > > > DeviceState *icc_bridge; > > > + void *fw_cfg = NULL; > > > > > > icc_bridge = qdev_create(NULL, TYPE_ICC_BRIDGE); > > > object_property_add_child(qdev_get_machine(), "icc-bridge", > > > @@ -97,8 +98,9 @@ static void pc_q35_init(QEMUMachineInitArgs *args) > > > > > > /* allocate ram and load rom/bios */ > > > if (!xen_enabled()) { > > > - pc_memory_init(kernel_filename, kernel_cmdline, > > > - initrd_filename, below_4g_mem_size, > > > above_4g_mem_size); > > > + fw_cfg = pc_memory_init(kernel_filename, kernel_cmdline, > > > + initrd_filename, below_4g_mem_size, > > > + above_4g_mem_size); > > > } > > > > > > /* irq lines */ > > > @@ -116,6 +118,7 @@ static void pc_q35_init(QEMUMachineInitArgs *args) > > > q35_host->mch.address_space_io = get_system_io(); > > > /* pci */ > > > qdev_init_nofail(DEVICE(q35_host)); > > > + bochs_meminfo_bios_init(fw_cfg); > > > host_bus = q35_host->host.pci.bus; > > > /* create ISA bus */ > > > lpc = pci_create_simple_multifunction(host_bus, > > > PCI_DEVFN(ICH9_LPC_DEV, > > > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > > > index 959b92b..4a29e6e 100644 > > > --- a/include/hw/i386/pc.h > > > +++ b/include/hw/i386/pc.h > > > @@ -231,6 +231,7 @@ int pvpanic_init(ISABus *bus); > > > #define E820_UNUSABLE 5 > > > > > > int e820_add_entry(uint64_t, uint64_t, uint32_t); > > > +void bochs_meminfo_bios_init(void *fw_cfg); > > > > > > #define PC_COMPAT_1_5 \ > > > {\ > > > diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h > > > index 2fb71af..2644faa 100644 > > > --- a/include/sysemu/sysemu.h > > > +++ b/include/sysemu/sysemu.h > > > @@ -132,6 +132,7 @@ extern QEMUClock *rtc_clock; > > > extern int nb_numa_nodes; > > > extern uint64_t node_mem[MAX_NODES]; > > > extern unsigned long *node_cpumask[MAX_NODES]; > > > +extern int nb_hp_dimms; > > > > > > #define MAX_OPTION_ROMS 16 > > > typedef struct QEMUOptionRom { > > > -- > > > 1.8.3.1 > > > > >