CPUs that are not the boot CPU need to run in spinning code to check if they should run off to execute and if so where to jump to. This usually happens by leaving secondary CPUs looping and checking if some variable in memory changed.
In an environment like Qemu however we can be more clever. We can just export the spin table the primary CPU modifies as MMIO region that would event based wake up the respective secondary CPUs. That saves us quite some cycles while the secondary CPUs are not up yet. So this patch adds a PV device that simply exports the spinning table into the guest and thus allows the primary CPU to wake up secondary ones. Signed-off-by: Alexander Graf <ag...@suse.de> --- v1 -> v2: - change into MMIO scheme - map the secondary NIP instead of 0 1:1 - only map 64MB for TLB, same as u-boot - prepare code for 64-bit spinnings --- Makefile.target | 2 +- hw/ppce500_mpc8544ds.c | 33 ++++++++- hw/ppce500_spin.c | 182 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 212 insertions(+), 5 deletions(-) create mode 100644 hw/ppce500_spin.c diff --git a/Makefile.target b/Makefile.target index 19f6101..925abd3 100644 --- a/Makefile.target +++ b/Makefile.target @@ -251,7 +251,7 @@ endif obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o obj-ppc-y += ppc440.o ppc440_bamboo.o # PowerPC E500 boards -obj-ppc-y += ppce500_mpc8544ds.o mpc8544_guts.o +obj-ppc-y += ppce500_mpc8544ds.o mpc8544_guts.o ppce500_spin.o # PowerPC 440 Xilinx ML507 reference board. obj-ppc-y += virtex_ml507.o obj-ppc-$(CONFIG_KVM) += kvm_ppc.o diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c index c74119a..96a5362 100644 --- a/hw/ppce500_mpc8544ds.c +++ b/hw/ppce500_mpc8544ds.c @@ -49,6 +49,7 @@ #define MPC8544_PCI_IO 0xE1000000 #define MPC8544_PCI_IOLEN 0x10000 #define MPC8544_UTIL_BASE (MPC8544_CCSRBAR_BASE + 0xe0000) +#define MPC8544_SPIN_BASE 0xEF000000 struct boot_info { @@ -164,6 +165,18 @@ static void mmubooke_create_initial_mapping(CPUState *env, tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX; } +static void mpc8544ds_cpu_reset_sec(void *opaque) +{ + CPUState *env = opaque; + + cpu_reset(env); + + /* Secondary CPU starts in halted state for now. Needs to change when + implementing non-kernel boot. */ + env->halted = 1; + env->exception_index = EXCP_HLT; +} + static void mpc8544ds_cpu_reset(void *opaque) { CPUState *env = opaque; @@ -172,6 +185,7 @@ static void mpc8544ds_cpu_reset(void *opaque) cpu_reset(env); /* Set initial guest state. */ + env->halted = 0; env->gpr[1] = (16<<20) - 8; env->gpr[3] = bi->dt_base; env->nip = bi->entry; @@ -199,7 +213,6 @@ static void mpc8544ds_init(ram_addr_t ram_size, unsigned int pci_irq_nrs[4] = {1, 2, 3, 4}; qemu_irq **irqs, *mpic; DeviceState *dev; - struct boot_info *boot_info; CPUState *firstenv = NULL; /* Setup CPUs */ @@ -234,9 +247,16 @@ static void mpc8544ds_init(ram_addr_t ram_size, env->spr[SPR_40x_TCR] = 1 << 26; /* Register reset handler */ - boot_info = qemu_mallocz(sizeof(struct boot_info)); - qemu_register_reset(mpc8544ds_cpu_reset, env); - env->load_info = boot_info; + if (!i) { + /* Primary CPU */ + struct boot_info *boot_info; + boot_info = qemu_mallocz(sizeof(struct boot_info)); + qemu_register_reset(mpc8544ds_cpu_reset, env); + env->load_info = boot_info; + } else { + /* Secondary CPUs */ + qemu_register_reset(mpc8544ds_cpu_reset_sec, env); + } } env = firstenv; @@ -289,6 +309,9 @@ static void mpc8544ds_init(ram_addr_t ram_size, } } + /* Register spinning region */ + sysbus_create_simple("e500-spin", MPC8544_SPIN_BASE, NULL); + /* Load kernel. */ if (kernel_filename) { kernel_size = load_uimage(kernel_filename, &entry, &loadaddr, NULL); @@ -321,6 +344,8 @@ static void mpc8544ds_init(ram_addr_t ram_size, /* If we're loading a kernel directly, we must load the device tree too. */ if (kernel_filename) { + struct boot_info *boot_info; + #ifndef CONFIG_FDT cpu_abort(env, "Compiled without FDT support - can't load kernel\n"); #endif diff --git a/hw/ppce500_spin.c b/hw/ppce500_spin.c new file mode 100644 index 0000000..299aab6 --- /dev/null +++ b/hw/ppce500_spin.c @@ -0,0 +1,182 @@ +#include "hw.h" +#include "sysemu.h" +#include "sysbus.h" +#include "rwhandler.h" +#include "kvm.h" + +#define MAX_CPUS 32 + +typedef struct spin_info { + uint64_t addr; + uint64_t r3; + uint32_t resv; + uint32_t pir; + uint64_t r6; +} __attribute__ ((packed)) SpinInfo; + +typedef struct spin_state { + SysBusDevice busdev; + ReadWriteHandler rw; + SpinInfo spin[MAX_CPUS]; +} SpinState; + +typedef struct spin_kick { + CPUState *env; + SpinInfo *spin; +} SpinKick; + +static void spin_reset(void *opaque) +{ + SpinState *s = opaque; + int i; + + for (i = 0; i < MAX_CPUS; i++) { + SpinInfo *info = &s->spin[i]; + + info->pir = i; + info->r3 = i; + info->addr = 1; + info->r6 = 0; + } +} + +/* Create -kernel TLB entries for BookE, linearly spanning 256MB. */ +static inline target_phys_addr_t booke206_page_size_to_tlb(uint64_t size) +{ + return (ffs(size >> 10) - 1) >> 1; +} + +static void mmubooke_create_initial_mapping(CPUState *env, + target_ulong va, + target_phys_addr_t pa, + target_phys_addr_t len) +{ + ppcmas_tlb_t *tlb = booke206_get_tlbm(env, 1, 0, 0); + target_phys_addr_t size; + + size = (booke206_page_size_to_tlb(len) << MAS1_TSIZE_SHIFT); + tlb->mas1 = MAS1_VALID | size; + tlb->mas2 = va & TARGET_PAGE_MASK; + tlb->mas7_3 = pa & TARGET_PAGE_MASK; + tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX; +} + +static void spin_kick(void *data) +{ + SpinKick *kick = data; + CPUState *env = kick->env; + SpinInfo *curspin = kick->spin; + target_phys_addr_t map_size = 64 * 1024 * 1024; + + cpu_synchronize_state(env); + curspin->pir = env->spr[SPR_PIR]; + env->nip = curspin->addr; + env->gpr[3] = curspin->r3; + env->gpr[4] = 0; + env->gpr[5] = 0; + env->gpr[6] = curspin->r6; + env->gpr[7] = map_size; + env->gpr[8] = 0; + env->gpr[9] = 0; + + mmubooke_create_initial_mapping(env, env->nip, env->nip, map_size); + + env->halted = 0; + env->exception_index = -1; + qemu_cpu_kick(env); +} + +static void spin_write(ReadWriteHandler *h, pcibus_t addr, uint32_t value, + int len) +{ + SpinState *s = container_of(h, SpinState, rw); + int env_idx = addr / sizeof(SpinInfo); + CPUState *env; + SpinInfo *curspin = &s->spin[env_idx]; + uint8_t *curspin_p = (uint8_t*)curspin; + + for (env = first_cpu; env != NULL; env = env->next_cpu) { + if (env->cpu_index == env_idx) { + break; + } + } + + if (!env) { + /* Unknown CPU */ + return; + } + + if (!env->cpu_index) { + /* primary CPU doesn't spin */ + return; + } + + curspin_p = &curspin_p[addr % sizeof(SpinInfo)]; + switch (len) { + case 1: + stb_p(curspin_p, value); + break; + case 2: + stw_p(curspin_p, value); + break; + case 4: + stl_p(curspin_p, value); + break; + } + + if (!(curspin->addr & 1)) { + /* run CPU */ + SpinKick kick = { + .env = env, + .spin = curspin, + }; + + run_on_cpu(env, spin_kick, &kick); + } +} + +static uint32_t spin_read(ReadWriteHandler *h, pcibus_t addr, int len) +{ + SpinState *s = container_of(h, SpinState, rw); + uint8_t *spin_p = &((uint8_t*)s->spin)[addr]; + + switch (len) { + case 1: + return ldub_p(spin_p); + case 2: + return lduw_p(spin_p); + case 4: + return ldl_p(spin_p); + default: + assert(0); + } +} + +static int ppce500_spin_initfn(SysBusDevice *dev) +{ + SpinState *s; + int iomem; + + s = FROM_SYSBUS(SpinState, sysbus_from_qdev(dev)); + + s->rw.read = spin_read; + s->rw.write = spin_write; + iomem = cpu_register_io_memory_simple(&s->rw, DEVICE_BIG_ENDIAN); + sysbus_init_mmio(dev, sizeof(SpinInfo) * MAX_CPUS, iomem); + + qemu_register_reset(spin_reset, s); + + return 0; +} + +static SysBusDeviceInfo ppce500_spin_info = { + .init = ppce500_spin_initfn, + .qdev.name = "e500-spin", + .qdev.size = sizeof(SpinState), +}; + +static void ppce500_spin_register(void) +{ + sysbus_register_withprop(&ppce500_spin_info); +} +device_init(ppce500_spin_register); -- 1.6.0.2