It's not possible to specify the number of threads of a guest when running QEMU/TCG. Today, users can have setups like:
... -accel tcg,thread=multi -smp 8,threads=1,cores=8 ... or ... -accel tcg,thread=multi -smp 8,sockets=2,cores=4,threads=1 ... However, the following is not possible: ... -accel tcg,thread=multi -smp 16,threads=4,cores=2,sockets=2 ... qemu-system-ppc64: TCG cannot support more than 1 thread/core on a pseries machine The reason is due to how SMT is implemented since Power8. This patch implements a very basic simulation of the msgsndp instruction, using ext interrupt instead of doorbells. The result is a better user experience, allowing them to play with SMT modes. However, it doesn't relate with MTTCG threads in any way. Results: ... -accel tcg,thread=multi -smp 16,threads=4,cores=2,sockets=2 ... root@ubuntu:~# ppc64_cpu --smt SMT=4 root@ubuntu:~# ppc64_cpu --info Core 0: 0* 1* 2* 3* Core 1: 4* 5* 6* 7* Core 2: 8* 9* 10* 11* Core 3: 12* 13* 14* 15* root@ubuntu:~# ppc64_cpu --smt=2 root@ubuntu:~# ppc64_cpu --info Core 0: 0* 1* 2 3 Core 1: 4* 5* 6 7 Core 2: 8* 9* 10 11 Core 3: 12* 13* 14 15 root@ubuntu:~# ppc64_cpu --smt=off root@ubuntu:~# ppc64_cpu --info Core 0: 0* 1 2 3 Core 1: 4* 5 6 7 Core 2: 8* 9 10 11 Core 3: 12* 13 14 15 root@ubuntu:~# ppc64_cpu --smt SMT is off root@ubuntu:~# lscpu Architecture: ppc64le Byte Order: Little Endian CPU(s): 16 On-line CPU(s) list: 0,4,8,12 Off-line CPU(s) list: 1-3,5-7,9-11,13-15 Thread(s) per core: 1 Core(s) per socket: 2 Socket(s): 2 NUMA node(s): 1 Model: 2.0 (pvr 004e 1200) Model name: POWER9 (architected), altivec supported Hypervisor vendor: KVM Virtualization type: para L1d cache: 32K L1i cache: 32K NUMA node0 CPU(s): 0,4,8,12 root@ubuntu:~# ppc64_cpu --smt=4 root@ubuntu:~# lscpu Architecture: ppc64le Byte Order: Little Endian CPU(s): 16 On-line CPU(s) list: 0-15 Thread(s) per core: 4 Core(s) per socket: 2 Socket(s): 2 NUMA node(s): 1 Model: 2.0 (pvr 004e 1200) Model name: POWER9 (architected), altivec supported Hypervisor vendor: KVM Virtualization type: para L1d cache: 32K L1i cache: 32K NUMA node0 CPU(s): 0-15 Note: it's also possible to simulate SMT in TCG single threaded mode. Signed-off-by: Jose Ricardo Ziviani <jos...@linux.ibm.com> --- hw/ppc/spapr.c | 5 ----- target/ppc/excp_helper.c | 24 ++++++++++++++++++++++++ target/ppc/helper.h | 1 + target/ppc/translate.c | 11 +++++++++++ 4 files changed, 36 insertions(+), 5 deletions(-) diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 8783b43396..3a864dfc7d 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -2572,11 +2572,6 @@ static void spapr_set_vsmt_mode(SpaprMachineState *spapr, Error **errp) int ret; unsigned int smp_threads = ms->smp.threads; - if (!kvm_enabled() && (smp_threads > 1)) { - error_setg(&local_err, "TCG cannot support more than 1 thread/core " - "on a pseries machine"); - goto out; - } if (!is_power_of_2(smp_threads)) { error_setg(&local_err, "Cannot support %d threads/core on a pseries " "machine because it must be a power of 2", smp_threads); diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 50b004d00d..ac5d196641 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -1231,6 +1231,30 @@ static int book3s_dbell2irq(target_ulong rb) return msg == DBELL_TYPE_DBELL_SERVER ? PPC_INTERRUPT_HDOORBELL : -1; } +void helper_msgsndp(target_ulong rb) +{ + CPUState *cs; + int irq = rb & DBELL_TYPE_MASK; + int thread_id = rb & 0x3f; + + if (irq != DBELL_TYPE_DBELL_SERVER) { + return; + } + + qemu_mutex_lock_iothread(); + CPU_FOREACH(cs) { + PowerPCCPU *cpu = POWERPC_CPU(cs); + + if (cpu->vcpu_id == thread_id) { + continue; + } + + cpu->env.pending_interrupts |= 1 << PPC_INTERRUPT_EXT; + cpu_interrupt(cs, CPU_INTERRUPT_HARD); + } + qemu_mutex_unlock_iothread(); +} + void helper_book3s_msgclr(CPUPPCState *env, target_ulong rb) { int irq = book3s_dbell2irq(rb); diff --git a/target/ppc/helper.h b/target/ppc/helper.h index 380c9b1e2a..eadd08324b 100644 --- a/target/ppc/helper.h +++ b/target/ppc/helper.h @@ -630,6 +630,7 @@ DEF_HELPER_FLAGS_3(store_sr, TCG_CALL_NO_RWG, void, env, tl, tl) DEF_HELPER_FLAGS_1(602_mfrom, TCG_CALL_NO_RWG_SE, tl, tl) DEF_HELPER_1(msgsnd, void, tl) +DEF_HELPER_1(msgsndp, void, tl) DEF_HELPER_2(msgclr, void, env, tl) DEF_HELPER_1(book3s_msgsnd, void, tl) DEF_HELPER_2(book3s_msgclr, void, env, tl) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index 4a5de28036..083731292b 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -6657,6 +6657,15 @@ static void gen_msgsnd(DisasContext *ctx) #endif /* defined(CONFIG_USER_ONLY) */ } +static void gen_msgsndp(DisasContext *ctx) +{ +#if defined(CONFIG_USER_ONLY) + GEN_PRIV; +#else + gen_helper_msgsndp(cpu_gpr[rB(ctx->opcode)]); +#endif /* defined(CONFIG_USER_ONLY) */ +} + static void gen_msgsync(DisasContext *ctx) { #if defined(CONFIG_USER_ONLY) @@ -7176,6 +7185,8 @@ GEN_HANDLER2_E(tlbilx_booke206, "tlbilx", 0x1F, 0x12, 0x00, 0x03800001, PPC_NONE, PPC2_BOOKE206), GEN_HANDLER2_E(msgsnd, "msgsnd", 0x1F, 0x0E, 0x06, 0x03ff0001, PPC_NONE, PPC2_PRCNTL), +GEN_HANDLER2_E(msgsndp, "msgsndp", 0x1F, 0x0E, 0x04, 0x03ff0001, + PPC_NONE, PPC_POWER), GEN_HANDLER2_E(msgclr, "msgclr", 0x1F, 0x0E, 0x07, 0x03ff0001, PPC_NONE, PPC2_PRCNTL), GEN_HANDLER2_E(msgsync, "msgsync", 0x1F, 0x16, 0x1B, 0x00000000, -- 2.21.0