On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf <ag...@suse.de> wrote: > CPUs that are not the boot CPU need to run in spinning code to check if they > should run off to execute and if so where to jump to. This usually happens > by leaving secondary CPUs looping and checking if some variable in memory > changed. > > In an environment like Qemu however we can be more clever. We can just export > the spin table the primary CPU modifies as MMIO region that would event based > wake up the respective secondary CPUs. That saves us quite some cycles while > the secondary CPUs are not up yet. > > So this patch adds a PV device that simply exports the spinning table into the > guest and thus allows the primary CPU to wake up secondary ones.
On Sparc32, there is no need for a PV device. The CPU is woken up from halted state with an IPI. Maybe you could use this approach? > Signed-off-by: Alexander Graf <ag...@suse.de> > > --- > > v1 -> v2: > > - change into MMIO scheme > - map the secondary NIP instead of 0 1:1 > - only map 64MB for TLB, same as u-boot > - prepare code for 64-bit spinnings > > v2 -> v3: > > - remove r6 > - set MAS2_M > - map EA 0 > - use second TLB1 entry > > v3 -> v4: > > - change to memoryops > > v4 -> v5: > > - fix endianness bugs > --- > Makefile.target | 2 +- > hw/ppce500_mpc8544ds.c | 33 ++++++++- > hw/ppce500_spin.c | 186 > ++++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 216 insertions(+), 5 deletions(-) > create mode 100644 hw/ppce500_spin.c > > diff --git a/Makefile.target b/Makefile.target > index 2ed9099..3f689ce 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -247,7 +247,7 @@ endif > obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o > obj-ppc-y += ppc440.o ppc440_bamboo.o > # PowerPC E500 boards > -obj-ppc-y += ppce500_mpc8544ds.o mpc8544_guts.o > +obj-ppc-y += ppce500_mpc8544ds.o mpc8544_guts.o ppce500_spin.o > # PowerPC 440 Xilinx ML507 reference board. > obj-ppc-y += virtex_ml507.o > obj-ppc-$(CONFIG_KVM) += kvm_ppc.o > diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c > index 9379624..3b8b449 100644 > --- a/hw/ppce500_mpc8544ds.c > +++ b/hw/ppce500_mpc8544ds.c > @@ -49,6 +49,7 @@ > #define MPC8544_PCI_IO 0xE1000000 > #define MPC8544_PCI_IOLEN 0x10000 > #define MPC8544_UTIL_BASE (MPC8544_CCSRBAR_BASE + 0xe0000) > +#define MPC8544_SPIN_BASE 0xEF000000 > > struct boot_info > { > @@ -164,6 +165,18 @@ static void mmubooke_create_initial_mapping(CPUState > *env, > tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX; > } > > +static void mpc8544ds_cpu_reset_sec(void *opaque) > +{ > + CPUState *env = opaque; > + > + cpu_reset(env); > + > + /* Secondary CPU starts in halted state for now. Needs to change when > + implementing non-kernel boot. */ > + env->halted = 1; > + env->exception_index = EXCP_HLT; > +} > + > static void mpc8544ds_cpu_reset(void *opaque) > { > CPUState *env = opaque; > @@ -172,6 +185,7 @@ static void mpc8544ds_cpu_reset(void *opaque) > cpu_reset(env); > > /* Set initial guest state. */ > + env->halted = 0; > env->gpr[1] = (16<<20) - 8; > env->gpr[3] = bi->dt_base; > env->nip = bi->entry; > @@ -199,7 +213,6 @@ static void mpc8544ds_init(ram_addr_t ram_size, > unsigned int pci_irq_nrs[4] = {1, 2, 3, 4}; > qemu_irq **irqs, *mpic; > DeviceState *dev; > - struct boot_info *boot_info; > CPUState *firstenv = NULL; > > /* Setup CPUs */ > @@ -234,9 +247,16 @@ static void mpc8544ds_init(ram_addr_t ram_size, > env->spr[SPR_40x_TCR] = 1 << 26; > > /* Register reset handler */ > - boot_info = g_malloc0(sizeof(struct boot_info)); > - qemu_register_reset(mpc8544ds_cpu_reset, env); > - env->load_info = boot_info; > + if (!i) { > + /* Primary CPU */ > + struct boot_info *boot_info; > + boot_info = g_malloc0(sizeof(struct boot_info)); > + qemu_register_reset(mpc8544ds_cpu_reset, env); > + env->load_info = boot_info; > + } else { > + /* Secondary CPUs */ > + qemu_register_reset(mpc8544ds_cpu_reset_sec, env); > + } > } > > env = firstenv; > @@ -289,6 +309,9 @@ static void mpc8544ds_init(ram_addr_t ram_size, > } > } > > + /* Register spinning region */ > + sysbus_create_simple("e500-spin", MPC8544_SPIN_BASE, NULL); > + > /* Load kernel. */ > if (kernel_filename) { > kernel_size = load_uimage(kernel_filename, &entry, &loadaddr, NULL); > @@ -321,6 +344,8 @@ static void mpc8544ds_init(ram_addr_t ram_size, > > /* If we're loading a kernel directly, we must load the device tree too. > */ > if (kernel_filename) { > + struct boot_info *boot_info; > + > #ifndef CONFIG_FDT > cpu_abort(env, "Compiled without FDT support - can't load kernel\n"); > #endif > diff --git a/hw/ppce500_spin.c b/hw/ppce500_spin.c > new file mode 100644 > index 0000000..38451ac > --- /dev/null > +++ b/hw/ppce500_spin.c > @@ -0,0 +1,186 @@ > +#include "hw.h" > +#include "sysemu.h" > +#include "sysbus.h" > +#include "kvm.h" > + > +#define MAX_CPUS 32 > + > +typedef struct spin_info { > + uint64_t addr; > + uint64_t r3; > + uint32_t resv; > + uint32_t pir; > + uint64_t reserved; > +} __attribute__ ((packed)) SpinInfo; This attribute isn't needed, the fields are aligned and also the structure is internal to QEMU so misalignment wouldn't matter. In the future, please use QEMU_PACKED. > + > +typedef struct spin_state { > + SysBusDevice busdev; > + MemoryRegion iomem; > + SpinInfo spin[MAX_CPUS]; > +} SpinState; > + > +typedef struct spin_kick { > + CPUState *env; > + SpinInfo *spin; > +} SpinKick; > + > +static void spin_reset(void *opaque) > +{ > + SpinState *s = opaque; > + int i; > + > + for (i = 0; i < MAX_CPUS; i++) { > + SpinInfo *info = &s->spin[i]; > + > + info->pir = i; > + info->r3 = i; > + info->addr = 1; > + } > +} > + > +/* Create -kernel TLB entries for BookE, linearly spanning 256MB. */ > +static inline target_phys_addr_t booke206_page_size_to_tlb(uint64_t size) > +{ > + return (ffs(size >> 10) - 1) >> 1; > +} > + > +static void mmubooke_create_initial_mapping(CPUState *env, > + target_ulong va, > + target_phys_addr_t pa, > + target_phys_addr_t len) > +{ > + ppcmas_tlb_t *tlb = booke206_get_tlbm(env, 1, 0, 1); > + target_phys_addr_t size; > + > + size = (booke206_page_size_to_tlb(len) << MAS1_TSIZE_SHIFT); > + tlb->mas1 = MAS1_VALID | size; > + tlb->mas2 = (va & TARGET_PAGE_MASK) | MAS2_M; > + tlb->mas7_3 = pa & TARGET_PAGE_MASK; > + tlb->mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX; > +} > + > +static void spin_kick(void *data) > +{ > + SpinKick *kick = data; > + CPUState *env = kick->env; > + SpinInfo *curspin = kick->spin; > + target_phys_addr_t map_size = 64 * 1024 * 1024; > + target_phys_addr_t map_start; > + > + cpu_synchronize_state(env); > + stl_p(&curspin->pir, env->spr[SPR_PIR]); > + env->nip = ldq_p(&curspin->addr) & (map_size - 1); ldq_be_p() for non-PV emulation. > + env->gpr[3] = ldq_p(&curspin->r3); > + env->gpr[4] = 0; > + env->gpr[5] = 0; > + env->gpr[6] = 0; > + env->gpr[7] = map_size; > + env->gpr[8] = 0; > + env->gpr[9] = 0; > + > + map_start = ldq_p(&curspin->addr) & ~(map_size - 1); > + mmubooke_create_initial_mapping(env, 0, map_start, map_size); > + > + env->halted = 0; > + env->exception_index = -1; > + qemu_cpu_kick(env); > +} > + > +static void spin_write(void *opaque, target_phys_addr_t addr, uint64_t value, > + unsigned len) > +{ > + SpinState *s = opaque; > + int env_idx = addr / sizeof(SpinInfo); > + CPUState *env; > + SpinInfo *curspin = &s->spin[env_idx]; > + uint8_t *curspin_p = (uint8_t*)curspin; > + > + for (env = first_cpu; env != NULL; env = env->next_cpu) { > + if (env->cpu_index == env_idx) { > + break; > + } > + } > + > + if (!env) { > + /* Unknown CPU */ > + return; > + } > + > + if (!env->cpu_index) { > + /* primary CPU doesn't spin */ > + return; > + } > + > + curspin_p = &curspin_p[addr % sizeof(SpinInfo)]; > + switch (len) { > + case 1: > + stb_p(curspin_p, value); > + break; > + case 2: > + stw_p(curspin_p, value); > + break; > + case 4: > + stl_p(curspin_p, value); > + break; > + } > + > + if (!(ldq_p(&curspin->addr) & 1)) { > + /* run CPU */ > + SpinKick kick = { > + .env = env, > + .spin = curspin, > + }; > + > + run_on_cpu(env, spin_kick, &kick); > + } > +} > + > +static uint64_t spin_read(void *opaque, target_phys_addr_t addr, unsigned > len) > +{ > + SpinState *s = opaque; > + uint8_t *spin_p = &((uint8_t*)s->spin)[addr]; > + > + switch (len) { > + case 1: > + return ldub_p(spin_p); > + case 2: > + return lduw_p(spin_p); > + case 4: > + return ldl_p(spin_p); > + default: > + assert(0); abort() > + } > +} > + > +const MemoryRegionOps spin_rw_ops = { > + .read = spin_read, > + .write = spin_write, > + .endianness = DEVICE_BIG_ENDIAN, > +}; > + > +static int ppce500_spin_initfn(SysBusDevice *dev) > +{ > + SpinState *s; > + > + s = FROM_SYSBUS(SpinState, sysbus_from_qdev(dev)); > + > + memory_region_init_io(&s->iomem, &spin_rw_ops, s, "e500 spin pv device", > + sizeof(SpinInfo) * MAX_CPUS); > + sysbus_init_mmio_region(dev, &s->iomem); > + > + qemu_register_reset(spin_reset, s); > + > + return 0; > +} > + > +static SysBusDeviceInfo ppce500_spin_info = { > + .init = ppce500_spin_initfn, > + .qdev.name = "e500-spin", > + .qdev.size = sizeof(SpinState), > +}; > + > +static void ppce500_spin_register(void) > +{ > + sysbus_register_withprop(&ppce500_spin_info); > +} > +device_init(ppce500_spin_register); > -- > 1.6.0.2 > >