Hi, Thanks for the patch, and sorry for taking so long to look at it.
On Wed, Nov 25, 2020 at 07:54:17PM +0800, Zhenyu Ye wrote: > From 0b4318c9dbf6fa152ec14eab29837ea06e2d78e5 Mon Sep 17 00:00:00 2001 > From: eillon <yezhen...@huawei.com> > Date: Wed, 25 Nov 2020 19:17:03 +0800 > Subject: [PATCH] x86/cpu: initialize the CPU concurrently > > Currently we initialize cpu one by one in qemu_init_vcpu(), every cpu > should have to wait util cpu->created = true. When > cpus_accel->create_vcpu_thread > costs too long time or the number of CPUs is too large, this will prolong > the boot time. > How long was boot time before and after the patch? > This patch initializes the CPU concurrently. > > Signed-off-by: eillon <yezhen...@huawei.com> > --- > hw/i386/x86.c | 7 +++++-- > include/hw/core/cpu.h | 3 +++ > include/hw/i386/x86.h | 3 ++- > softmmu/cpus.c | 9 +++++++-- > 4 files changed, 17 insertions(+), 5 deletions(-) > > diff --git a/hw/i386/x86.c b/hw/i386/x86.c > index 5944fc44ed..a98f68b220 100644 > --- a/hw/i386/x86.c > +++ b/hw/i386/x86.c > @@ -101,9 +101,11 @@ uint32_t x86_cpu_apic_id_from_index(X86MachineState > *x86ms, > } > > > -void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, Error **errp) > +void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, > + bool last_cpu, Error **errp) > { > Object *cpu = object_new(MACHINE(x86ms)->cpu_type); > + ((CPUState *)cpu)->last_cpu = last_cpu; Please use the CPU() macro instead of direct casts. Preferably with a separate variable. e.g.: Object *obj = object_new(MACHINE(x86ms)->cpu_type); CPUState *cpu = CPU(obj); > > if (!object_property_set_uint(cpu, "apic-id", apic_id, errp)) { > goto out; > @@ -135,7 +137,8 @@ void x86_cpus_init(X86MachineState *x86ms, int > default_cpu_version) > ms->smp.max_cpus - 1) > + 1; > possible_cpus = mc->possible_cpu_arch_ids(ms); > for (i = 0; i < ms->smp.cpus; i++) { > - x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, &error_fatal); > + x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, > + i == ms->smp.cpus - 1, &error_fatal); > } > } > > diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h > index 3d92c967ff..b7e05e2d58 100644 > --- a/include/hw/core/cpu.h > +++ b/include/hw/core/cpu.h > @@ -467,6 +467,9 @@ struct CPUState { > > /* track IOMMUs whose translations we've cached in the TCG TLB */ > GArray *iommu_notifiers; > + > + /* The last cpu to init. */ > + bool last_cpu; > }; > > typedef QTAILQ_HEAD(CPUTailQ, CPUState) CPUTailQ; > diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h > index 739fac5087..0f7a6e5d16 100644 > --- a/include/hw/i386/x86.h > +++ b/include/hw/i386/x86.h > @@ -84,7 +84,8 @@ void init_topo_info(X86CPUTopoInfo *topo_info, const > X86MachineState *x86ms); > uint32_t x86_cpu_apic_id_from_index(X86MachineState *pcms, > unsigned int cpu_index); > > -void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, Error **errp); > +void x86_cpu_new(X86MachineState *pcms, int64_t apic_id, > + bool last_cpu, Error **errp); > void x86_cpus_init(X86MachineState *pcms, int default_cpu_version); > CpuInstanceProperties x86_cpu_index_to_props(MachineState *ms, > unsigned cpu_index); > diff --git a/softmmu/cpus.c b/softmmu/cpus.c > index e46ac68ad0..5a8eae28ab 100644 > --- a/softmmu/cpus.c > +++ b/softmmu/cpus.c > @@ -621,8 +621,13 @@ void qemu_init_vcpu(CPUState *cpu) > g_assert(cpus_accel != NULL && cpus_accel->create_vcpu_thread != NULL); > cpus_accel->create_vcpu_thread(cpu); > > - while (!cpu->created) { > - qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); > + if (cpu->last_cpu) { What about all the other architectures that don't set last_cpu=true? They will never wait for the CPU to be created. > + CPUState *cs = first_cpu; > + CPU_FOREACH(cs) { > + while (!cs->created) { > + qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); > + } > + } I suggest doing this "wait for all CPUs" step outside qemu_init_vcpu(). What about not making the last CPU special, and just providing a optional mechanism to wait for all VCPU threads after the CPU objects were created? e.g.: struct CPUState { ... /* * If true, qemu_init_vcpu() will not wait for the VCPU thread to be created * before returning. */ bool async_init; }; void qemu_wait_vcpu_thread_init(CPUState *cpu) { while (!cpu->created) { qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); } } void qemu_wait_all_vcpu_threads_init(void) { CPU_FOREACH(cpu) { qemu_wait_vcpu_thread_init(cpu); } } void qemu_init_vcpu(CPUState *cpu) { ... if (!cpu->async_init) { qemu_wait_vcpu_thread_init(cpu); } } void x86_cpu_new(X86MachineState *x86ms, int64_t apic_id, bool async, Error **errp) { ... cpu->async_init = async; qdev_realize(DEVICE(cpu), NULL, errp); ... } void x86_cpus_init(...) { ... for (i = 0; i < ms->smp.cpus; i++) { x86_cpu_new(x86ms, possible_cpus->cpus[i].arch_id, true, &error_fatal); } qemu_wait_all_vcpu_threads_init(); } > } > } > > -- > 2.22.0.windows.1 > -- Eduardo