On Fri, Oct 04, 2013 at 06:18:42PM +0200, Igor Mammedov wrote: > On Thu, 3 Oct 2013 18:05:35 +0300 > "Michael S. Tsirkin" <m...@redhat.com> wrote: > > > This defines a structure that will be used to fill in acpi tables > > where relevant properties are not yet available using QOM. > > > > Reviewed-by: Laszlo Ersek <ler...@redhat.com> > > Reviewed-by: Gerd Hoffmann <kra...@redhat.com> > > Tested-by: Gerd Hoffmann <kra...@redhat.com> > > Signed-off-by: Michael S. Tsirkin <m...@redhat.com> > > --- > > include/hw/i386/pc.h | 9 +++++++++ > > hw/i386/pc.c | 31 +++++++++++++++++++++++++++++++ > > 2 files changed, 40 insertions(+) > > > > diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h > > index 9b2ddc4..085a621 100644 > > --- a/include/hw/i386/pc.h > > +++ b/include/hw/i386/pc.h > > @@ -9,6 +9,9 @@ > > #include "hw/i386/ioapic.h" > > > > #include "qemu/range.h" > > +#include "qemu/bitmap.h" > > +#include "sysemu/sysemu.h" > > +#include "hw/pci/pci.h" > > > > /* PC-style peripherals (also used by other machines). */ > > > > @@ -20,6 +23,12 @@ typedef struct PcPciInfo { > > struct PcGuestInfo { > > bool has_pci_info; > > bool isapc_ram_fw; > > + hwaddr ram_size; > > + unsigned apic_id_limit; > > + bool apic_xrupt_override; > > + uint64_t numa_nodes; > > + uint64_t *node_mem; > > + uint64_t *node_cpu; > > FWCfgState *fw_cfg; > > }; > > > > diff --git a/hw/i386/pc.c b/hw/i386/pc.c > > index 0c313fe..dbae9da 100644 > > --- a/hw/i386/pc.c > > +++ b/hw/i386/pc.c > > @@ -1028,6 +1028,23 @@ static void pc_fw_cfg_guest_info(PcGuestInfo > > *guest_info) > > fw_cfg_add_file(guest_info->fw_cfg, "etc/pci-info", info, sizeof > > *info); > > } > > > > +static void pc_set_cpu_guest_info(CPUState *cpu, PcGuestInfo *guest_info) > > +{ > > + CPUClass *klass = CPU_GET_CLASS(cpu); > > + uint64_t apic_id = klass->get_arch_id(cpu); > > + int j; > > + > > + assert(apic_id < guest_info->apic_id_limit); > > + > > + for (j = 0; j < guest_info->numa_nodes; j++) { > > + assert(cpu->cpu_index < max_cpus); > > + if (test_bit(cpu->cpu_index, node_cpumask[j])) { > > + guest_info->node_cpu[apic_id] = cpu_to_le64(j); > > + break; > > + } > > + } > > +} > > + > > typedef struct PcGuestInfoState { > > PcGuestInfo info; > > Notifier machine_done; > > @@ -1047,6 +1064,20 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t > > below_4g_mem_size, > > { > > PcGuestInfoState *guest_info_state = g_malloc0(sizeof > > *guest_info_state); > > PcGuestInfo *guest_info = &guest_info_state->info; > > + CPUState *cpu; > > + > > + guest_info->ram_size = below_4g_mem_size + above_4g_mem_size; > > + guest_info->apic_id_limit = pc_apic_id_limit(max_cpus); > > + guest_info->apic_xrupt_override = kvm_allows_irq0_override(); > > + guest_info->numa_nodes = nb_numa_nodes; > > + guest_info->node_mem = g_memdup(node_mem, guest_info->numa_nodes * > > + sizeof *guest_info->node_mem); > > + guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit * > > + sizeof *guest_info->node_cpu); > > + > > + CPU_FOREACH(cpu) { > > + pc_set_cpu_guest_info(cpu, guest_info); > > + } > > pc_guest_info_init() is called only once, now lets suppose we hotplug CPUs > and then reboot guest. Hotadded CPUs won't be accounted in guest_info.node_cpu > since it's initialized only once and is never updated. As result guest will > get stale SRAT table. > > Using a callback in acpi_setup/update could allow to get an updated > guest_info.
Actually we can fix this simpler just by filling in all numa info ahead of the time. Something like the following should fix this, right? diff --git a/hw/i386/pc.c b/hw/i386/pc.c index bbf11ed..a7fcbf9 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1029,23 +1029,6 @@ static void pc_fw_cfg_guest_info(PcGuestInfo *guest_info) fw_cfg_add_file(guest_info->fw_cfg, "etc/pci-info", info, sizeof *info); } -static void pc_set_cpu_guest_info(CPUState *cpu, PcGuestInfo *guest_info) -{ - CPUClass *klass = CPU_GET_CLASS(cpu); - uint64_t apic_id = klass->get_arch_id(cpu); - int j; - - assert(apic_id < guest_info->apic_id_limit); - - for (j = 0; j < guest_info->numa_nodes; j++) { - assert(cpu->cpu_index < max_cpus); - if (test_bit(cpu->cpu_index, node_cpumask[j])) { - guest_info->node_cpu[apic_id] = cpu_to_le64(j); - break; - } - } -} - typedef struct PcGuestInfoState { PcGuestInfo info; Notifier machine_done; @@ -1066,7 +1049,7 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, { PcGuestInfoState *guest_info_state = g_malloc0(sizeof *guest_info_state); PcGuestInfo *guest_info = &guest_info_state->info; - CPUState *cpu; + int i, j; guest_info->ram_size = below_4g_mem_size + above_4g_mem_size; guest_info->apic_id_limit = pc_apic_id_limit(max_cpus); @@ -1077,8 +1060,15 @@ PcGuestInfo *pc_guest_info_init(ram_addr_t below_4g_mem_size, guest_info->node_cpu = g_malloc0(guest_info->apic_id_limit * sizeof *guest_info->node_cpu); - CPU_FOREACH(cpu) { - pc_set_cpu_guest_info(cpu, guest_info); + for (i = 0; i < max_cpus; i++) { + unsigned int apic_id = x86_cpu_apic_id_from_index(i); + assert(apic_id < guest_info->apic_id_limit); + for (j = 0; j < nb_numa_nodes; j++) { + if (test_bit(i, node_cpumask[j])) { + guest_info->node_cpu[apic_id] = j; + break; + } + } } guest_info_state->machine_done.notify = pc_guest_info_machine_done;