Hello, On 07/26/2016 08:23 AM, David Gibson wrote: > On Mon, Jul 25, 2016 at 04:24:43PM +0200, Cédric Le Goater wrote: >> From: Benjamin Herrenschmidt <b...@kernel.crashing.org> >> >> No devices yet, not even an interrupt controller, just to get >> started. >> >> (Folded in Stewart Smith patch to add command lien support) > ^^^^ typo
and it looks like french to me. The changelog needs an update with a little paragraph on what we are not emulating. The lowlevel firmware Hostboot should be mentionned. >> >> Signed-off-by: Benjamin Herrenschmidt <b...@kernel.crashing.org> >> [clg: updated for qemu-2.7 >> replaced fprintf by error_report >> used a common definition of _FDT macro >> removed VMStateDescription as migration is not yet supported >> added IBM Copyright statements >> ] >> Signed-off-by: Cédric Le Goater <c...@kaod.org> >> --- >> default-configs/ppc64-softmmu.mak | 1 + >> hw/ppc/Makefile.objs | 2 + >> hw/ppc/pnv.c | 593 >> ++++++++++++++++++++++++++++++++++++++ >> include/hw/ppc/pnv.h | 35 +++ >> 4 files changed, 631 insertions(+) >> create mode 100644 hw/ppc/pnv.c >> create mode 100644 include/hw/ppc/pnv.h >> >> diff --git a/default-configs/ppc64-softmmu.mak >> b/default-configs/ppc64-softmmu.mak >> index c4be59f638ed..516a6e25aba3 100644 >> --- a/default-configs/ppc64-softmmu.mak >> +++ b/default-configs/ppc64-softmmu.mak >> @@ -40,6 +40,7 @@ CONFIG_I8259=y >> CONFIG_XILINX=y >> CONFIG_XILINX_ETHLITE=y >> CONFIG_PSERIES=y >> +CONFIG_POWERNV=y >> CONFIG_PREP=y >> CONFIG_MAC=y >> CONFIG_E500=y >> diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs >> index 91a3420f473a..cbde482dd1b4 100644 >> --- a/hw/ppc/Makefile.objs >> +++ b/hw/ppc/Makefile.objs >> @@ -5,6 +5,8 @@ obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o >> obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o >> obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o spapr_rng.o >> obj-$(CONFIG_PSERIES) += spapr_cpu_core.o >> +# IBM PowerNV >> +obj-$(CONFIG_POWERNV) += pnv.o >> ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy) >> obj-y += spapr_pci_vfio.o >> endif >> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c >> new file mode 100644 >> index 000000000000..5096a073e829 >> --- /dev/null >> +++ b/hw/ppc/pnv.c >> @@ -0,0 +1,593 @@ >> +/* >> + * QEMU PowerPC PowerNV model >> + * >> + * Copyright (c) 2004-2007 Fabrice Bellard >> + * Copyright (c) 2007 Jocelyn Mayer >> + * Copyright (c) 2010 David Gibson, IBM Corporation. >> + * Copyright (c) 2014-2016 BenH, IBM Corporation. >> + * >> + * Permission is hereby granted, free of charge, to any person obtaining a >> copy >> + * of this software and associated documentation files (the "Software"), to >> deal >> + * in the Software without restriction, including without limitation the >> rights >> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell >> + * copies of the Software, and to permit persons to whom the Software is >> + * furnished to do so, subject to the following conditions: >> + * >> + * The above copyright notice and this permission notice shall be included >> in >> + * all copies or substantial portions of the Software. >> + * >> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS >> OR >> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, >> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL >> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR >> OTHER >> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING >> FROM, >> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN >> + * THE SOFTWARE. >> + * >> + */ >> +#include "qemu/osdep.h" >> +#include "sysemu/sysemu.h" >> +#include "hw/hw.h" >> +#include "hw/fw-path-provider.h" >> +#include "elf.h" >> +#include "net/net.h" >> +#include "sysemu/block-backend.h" >> +#include "sysemu/cpus.h" >> +#include "sysemu/kvm.h" >> +#include "sysemu/numa.h" >> +#include "kvm_ppc.h" >> +#include "mmu-hash64.h" >> +#include "qom/cpu.h" >> + >> +#include "hw/boards.h" >> +#include "hw/ppc/fdt.h" >> +#include "hw/ppc/ppc.h" >> +#include "hw/ppc/pnv.h" >> +#include "hw/loader.h" >> + >> +#include "exec/address-spaces.h" >> +#include "qemu/config-file.h" >> +#include "qemu/error-report.h" >> +#include "trace.h" >> +#include "hw/nmi.h" >> + >> +#include "hw/compat.h" >> + >> +#include <libfdt.h> >> + >> +#define FDT_ADDR 0x01000000 >> +#define FDT_MAX_SIZE 0x00100000 >> +#define FW_MAX_SIZE 0x00400000 >> +#define FW_FILE_NAME "skiboot.lid" >> +#define KERNEL_FILE_NAME "skiroot.lid" >> +#define KERNEL_LOAD_ADDR 0x20000000 > > The KERNEL_* names don't really make sense here. yes. >> +#define TIMEBASE_FREQ 512000000ULL >> + >> +#define MAX_CPUS 255 >> + >> +#define PHANDLE_XICP 0x00001111 > > Since you're not implementing an interrupt controller yet, I don't > think you need this define. indeed. This is too early. >> + >> +typedef struct sPowerNVMachineState sPowerNVMachineState; >> + >> +#define TYPE_POWERNV_MACHINE "powernv-machine" >> +#define POWERNV_MACHINE(obj) \ >> + OBJECT_CHECK(sPowerNVMachineState, (obj), TYPE_POWERNV_MACHINE) >> + >> +/** >> + * sPowerNVMachineState: >> + */ >> +struct sPowerNVMachineState { >> + /*< private >*/ >> + MachineState parent_obj; >> + PnvSystem sys; > > Having all the system specific information in this substructure is a > bit awkward. I think it would be preferable to just open code the > fields you need directly into sPowerNVMachineState. I agree. >> +}; >> + >> +static size_t create_page_sizes_prop(CPUPPCState *env, uint32_t *prop, >> + size_t maxsize) >> +{ >> + size_t maxcells = maxsize / sizeof(uint32_t); >> + int i, j, count; >> + uint32_t *p = prop; >> + >> + for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) { >> + struct ppc_one_seg_page_size *sps = &env->sps.sps[i]; >> + >> + if (!sps->page_shift) { >> + break; >> + } >> + for (count = 0; count < PPC_PAGE_SIZES_MAX_SZ; count++) { >> + if (sps->enc[count].page_shift == 0) { >> + break; >> + } >> + } >> + if ((p - prop) >= (maxcells - 3 - count * 2)) { >> + break; >> + } >> + *(p++) = cpu_to_be32(sps->page_shift); >> + *(p++) = cpu_to_be32(sps->slb_enc); >> + *(p++) = cpu_to_be32(count); >> + for (j = 0; j < count; j++) { >> + *(p++) = cpu_to_be32(sps->enc[j].page_shift); >> + *(p++) = cpu_to_be32(sps->enc[j].pte_enc); >> + } >> + } >> + >> + return (p - prop) * sizeof(uint32_t); >> +} > > Hm, I do wonder if we should make a place for helper functions that > are common between spapr and powernv. done. powernv_populate_memory() and powernv_populate_memory_node() are also very similar in spapr. I wonder if we can merge them ? >> +static void powernv_populate_memory_node(void *fdt, int nodeid, hwaddr >> start, >> + hwaddr size) >> +{ >> + /* Probablly bogus, need to match with what's going on in CPU nodes */ >> + uint32_t chip_id[] = { >> + cpu_to_be32(0x0), cpu_to_be32(nodeid) >> + }; >> + char *mem_name; >> + uint64_t mem_reg_property[2]; >> + >> + mem_reg_property[0] = cpu_to_be64(start); >> + mem_reg_property[1] = cpu_to_be64(size); >> + >> + mem_name = g_strdup_printf("memory@"TARGET_FMT_lx, start); >> + _FDT((fdt_begin_node(fdt, mem_name))); >> + g_free(mem_name); >> + _FDT((fdt_property_string(fdt, "device_type", "memory"))); >> + _FDT((fdt_property(fdt, "reg", mem_reg_property, >> + sizeof(mem_reg_property)))); >> + _FDT((fdt_property(fdt, "ibm,chip-id", chip_id, sizeof(chip_id)))); >> + _FDT((fdt_end_node(fdt))); >> +} >> + >> +static int powernv_populate_memory(void *fdt) >> +{ >> + hwaddr mem_start, node_size; >> + int i, nb_nodes = nb_numa_nodes; >> + NodeInfo *nodes = numa_info; >> + NodeInfo ramnode; >> + >> + /* No NUMA nodes, assume there is just one node with whole RAM */ >> + if (!nb_numa_nodes) { >> + nb_nodes = 1; >> + ramnode.node_mem = ram_size; >> + nodes = &ramnode; >> + } >> + >> + for (i = 0, mem_start = 0; i < nb_nodes; ++i) { >> + if (!nodes[i].node_mem) { >> + continue; >> + } >> + if (mem_start >= ram_size) { >> + node_size = 0; >> + } else { >> + node_size = nodes[i].node_mem; >> + if (node_size > ram_size - mem_start) { >> + node_size = ram_size - mem_start; >> + } >> + } >> + for ( ; node_size; ) { >> + hwaddr sizetmp = pow2floor(node_size); >> + >> + /* mem_start != 0 here */ >> + if (ctzl(mem_start) < ctzl(sizetmp)) { >> + sizetmp = 1ULL << ctzl(mem_start); >> + } >> + >> + powernv_populate_memory_node(fdt, i, mem_start, sizetmp); >> + node_size -= sizetmp; >> + mem_start += sizetmp; >> + } >> + } >> + >> + return 0; >> +} >> + >> +static void powernv_create_cpu_node(void *fdt, CPUState *cs, int >> smt_threads) >> +{ >> + PowerPCCPU *cpu = POWERPC_CPU(cs); >> + CPUPPCState *env = &cpu->env; >> + DeviceClass *dc = DEVICE_GET_CLASS(cs); >> + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); >> + uint32_t servers_prop[smt_threads]; >> + uint32_t gservers_prop[smt_threads * 2]; >> + int i, index = ppc_get_vcpu_dt_id(cpu); >> + uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), >> + 0xffffffff, 0xffffffff}; >> + uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ; >> + uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; >> + uint32_t page_sizes_prop[64]; >> + size_t page_sizes_prop_size; >> + char *nodename; >> + >> + if ((index % smt_threads) != 0) { >> + return; >> + } > > This hack exists in spapr for historical reasons. I'd prefer if on > powernv you treat cpu cores as first class objects from the beginning, > and just call this once per core. like this is done with sPAPRCPUCore I suppose. I will get inspiration from the spapr cores and see if I can find some common ground with the powernv cores. we should. >> + nodename = g_strdup_printf("%s@%x", dc->fw_name, index); >> + >> + _FDT((fdt_begin_node(fdt, nodename))); >> + >> + g_free(nodename); >> + >> + _FDT((fdt_property_cell(fdt, "reg", index))); >> + _FDT((fdt_property_string(fdt, "device_type", "cpu"))); >> + >> + _FDT((fdt_property_cell(fdt, "cpu-version", env->spr[SPR_PVR]))); >> + _FDT((fdt_property_cell(fdt, "d-cache-block-size", >> + env->dcache_line_size))); >> + _FDT((fdt_property_cell(fdt, "d-cache-line-size", >> + env->dcache_line_size))); >> + _FDT((fdt_property_cell(fdt, "i-cache-block-size", >> + env->icache_line_size))); >> + _FDT((fdt_property_cell(fdt, "i-cache-line-size", >> + env->icache_line_size))); >> + >> + if (pcc->l1_dcache_size) { >> + _FDT((fdt_property_cell(fdt, "d-cache-size", pcc->l1_dcache_size))); >> + } else { >> + error_report("Warning: Unknown L1 dcache size for cpu"); >> + } >> + if (pcc->l1_icache_size) { >> + _FDT((fdt_property_cell(fdt, "i-cache-size", pcc->l1_icache_size))); >> + } else { >> + error_report("Warning: Unknown L1 icache size for cpu"); >> + } >> + >> + _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq))); >> + _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq))); >> + _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr))); >> + _FDT((fdt_property_string(fdt, "status", "okay"))); >> + _FDT((fdt_property(fdt, "64-bit", NULL, 0))); >> + >> + if (env->spr_cb[SPR_PURR].oea_read) { >> + _FDT((fdt_property(fdt, "ibm,purr", NULL, 0))); >> + } >> + >> + if (env->mmu_model & POWERPC_MMU_1TSEG) { >> + _FDT((fdt_property(fdt, "ibm,processor-segment-sizes", >> + segs, sizeof(segs)))); >> + } >> + >> + /* Advertise VMX/VSX (vector extensions) if available >> + * 0 / no property == no vector extensions >> + * 1 == VMX / Altivec available >> + * 2 == VSX available */ >> + if (env->insns_flags & PPC_ALTIVEC) { >> + uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; >> + >> + _FDT((fdt_property_cell(fdt, "ibm,vmx", vmx))); >> + } >> + >> + /* Advertise DFP (Decimal Floating Point) if available >> + * 0 / no property == no DFP >> + * 1 == DFP available */ >> + if (env->insns_flags2 & PPC2_DFP) { >> + _FDT((fdt_property_cell(fdt, "ibm,dfp", 1))); >> + } >> + >> + page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop, >> + sizeof(page_sizes_prop)); >> + if (page_sizes_prop_size) { >> + _FDT((fdt_property(fdt, "ibm,segment-page-sizes", >> + page_sizes_prop, page_sizes_prop_size))); >> + } >> + >> + /* XXX Just a hack for now */ >> + _FDT((fdt_property_cell(fdt, "ibm,chip-id", 0))); >> + >> + if (cpu->cpu_version) { >> + _FDT((fdt_property_cell(fdt, "cpu-version", cpu->cpu_version))); >> + } >> + >> + /* Build interrupt servers and gservers properties */ >> + for (i = 0; i < smt_threads; i++) { >> + servers_prop[i] = cpu_to_be32(index + i); >> + /* Hack, direct the group queues back to cpu 0 */ >> + gservers_prop[i * 2] = cpu_to_be32(index + i); >> + gservers_prop[i * 2 + 1] = 0; >> + } >> + _FDT((fdt_property(fdt, "ibm,ppc-interrupt-server#s", >> + servers_prop, sizeof(servers_prop)))); >> + _FDT((fdt_property(fdt, "ibm,ppc-interrupt-gserver#s", >> + gservers_prop, sizeof(gservers_prop)))); >> + >> + _FDT((fdt_end_node(fdt))); >> +} >> + >> +static void *powernv_create_fdt(PnvSystem *sys, const char *kernel_cmdline, >> + uint32_t initrd_base, uint32_t initrd_size) >> +{ >> + void *fdt; >> + CPUState *cs; >> + int smt = kvmppc_smt_threads(); >> + uint32_t start_prop = cpu_to_be32(initrd_base); >> + uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size); >> + char *buf; >> + const char plat_compat[] = "qemu,powernv\0ibm,powernv"; >> + >> + fdt = g_malloc0(FDT_MAX_SIZE); >> + _FDT((fdt_create(fdt, FDT_MAX_SIZE))); >> + _FDT((fdt_finish_reservemap(fdt))); >> + >> + /* Root node */ >> + _FDT((fdt_begin_node(fdt, ""))); >> + _FDT((fdt_property_string(fdt, "model", "IBM PowerNV (emulated by >> qemu)"))); >> + _FDT((fdt_property(fdt, "compatible", plat_compat, >> sizeof(plat_compat)))); >> + >> + /* >> + * Add info to guest to indentify which host is it being run on >> + * and what is the uuid of the guest >> + */ >> + if (kvmppc_get_host_model(&buf)) { >> + _FDT((fdt_property_string(fdt, "host-model", buf))); >> + g_free(buf); >> + } >> + if (kvmppc_get_host_serial(&buf)) { >> + _FDT((fdt_property_string(fdt, "host-serial", buf))); >> + g_free(buf); >> + } > > Those seem dubious for a non-paravirt platform. OK. I will remove all the KVM related parts. We can add them later on when we work on kvm-pr support. >> + >> + buf = g_strdup_printf(UUID_FMT, qemu_uuid[0], qemu_uuid[1], >> + qemu_uuid[2], qemu_uuid[3], qemu_uuid[4], >> + qemu_uuid[5], qemu_uuid[6], qemu_uuid[7], >> + qemu_uuid[8], qemu_uuid[9], qemu_uuid[10], >> + qemu_uuid[11], qemu_uuid[12], qemu_uuid[13], >> + qemu_uuid[14], qemu_uuid[15]); >> + >> + _FDT((fdt_property_string(fdt, "vm,uuid", buf))); >> + g_free(buf); >> + >> + _FDT((fdt_begin_node(fdt, "chosen"))); >> + if (kernel_cmdline) { >> + _FDT((fdt_property_string(fdt, "bootargs", kernel_cmdline))); >> + } >> + _FDT((fdt_property(fdt, "linux,initrd-start", >> + &start_prop, sizeof(start_prop)))); >> + _FDT((fdt_property(fdt, "linux,initrd-end", >> + &end_prop, sizeof(end_prop)))); >> + _FDT((fdt_end_node(fdt))); >> + >> + _FDT((fdt_property_cell(fdt, "#address-cells", 0x2))); >> + _FDT((fdt_property_cell(fdt, "#size-cells", 0x2))); >> + >> + /* cpus */ >> + _FDT((fdt_begin_node(fdt, "cpus"))); >> + _FDT((fdt_property_cell(fdt, "#address-cells", 0x1))); >> + _FDT((fdt_property_cell(fdt, "#size-cells", 0x0))); >> + >> + CPU_FOREACH(cs) { >> + powernv_create_cpu_node(fdt, cs, smt); >> + } >> + >> + _FDT((fdt_end_node(fdt))); >> + >> + /* Memory */ >> + _FDT((powernv_populate_memory(fdt))); >> + >> + /* /hypervisor node */ > > This really doesn't seem like it makes sense on a non-paravirt platform. > >> + if (kvm_enabled()) { >> + uint8_t hypercall[16]; >> + >> + /* indicate KVM hypercall interface */ >> + _FDT((fdt_begin_node(fdt, "hypervisor"))); >> + _FDT((fdt_property_string(fdt, "compatible", "linux,kvm"))); >> + if (kvmppc_has_cap_fixup_hcalls()) { >> + /* >> + * Older KVM versions with older guest kernels were broken with >> the >> + * magic page, don't allow the guest to map it. >> + */ >> + kvmppc_get_hypercall(first_cpu->env_ptr, hypercall, >> + sizeof(hypercall)); >> + _FDT((fdt_property(fdt, "hcall-instructions", hypercall, >> + sizeof(hypercall)))); >> + } >> + _FDT((fdt_end_node(fdt))); >> + } >> + >> + _FDT((fdt_end_node(fdt))); /* close root node */ >> + _FDT((fdt_finish(fdt))); >> + >> + return fdt; >> +} >> + >> +static void powernv_cpu_reset(void *opaque) >> +{ >> + PowerPCCPU *cpu = opaque; >> + CPUState *cs = CPU(cpu); >> + CPUPPCState *env = &cpu->env; >> + >> + cpu_reset(cs); >> + >> + env->spr[SPR_PIR] = ppc_get_vcpu_dt_id(cpu); >> + env->spr[SPR_HIOR] = 0; >> + env->gpr[3] = FDT_ADDR; >> + env->nip = 0x10; >> + env->msr |= MSR_HVB; >> +} >> + >> +static void pnv_create_chip(PnvSystem *sys, unsigned int chip_no) >> +{ >> + PnvChip *chip = &sys->chips[chip_no]; > > Hm, my inclination would be to make the pnv chips a full QOM type, > unless there's a reason not to. Yes. let's go that way and I will try to port the rest of the patchset before sending to catch any issues. >> + >> + if (chip_no >= PNV_MAX_CHIPS) { >> + return; >> + } >> + >> + /* XXX Improve chip numbering to better match HW */ >> + chip->chip_id = chip_no; >> +} >> + >> +static void ppc_powernv_init(MachineState *machine) >> +{ >> + ram_addr_t ram_size = machine->ram_size; >> + const char *cpu_model = machine->cpu_model; >> + const char *kernel_filename = machine->kernel_filename; >> + const char *initrd_filename = machine->initrd_filename; >> + uint32_t initrd_base = 0; >> + long initrd_size = 0; >> + PowerPCCPU *cpu; >> + CPUPPCState *env; >> + MemoryRegion *sysmem = get_system_memory(); >> + MemoryRegion *ram = g_new(MemoryRegion, 1); >> + sPowerNVMachineState *pnv_machine = POWERNV_MACHINE(machine); >> + PnvSystem *sys = &pnv_machine->sys; >> + long fw_size; >> + char *filename; >> + void *fdt; >> + int i; >> + >> + /* init CPUs */ >> + if (cpu_model == NULL) { >> + cpu_model = kvm_enabled() ? "host" : "POWER8"; >> + } >> + >> + for (i = 0; i < smp_cpus; i++) { >> + cpu = cpu_ppc_init(cpu_model); >> + if (cpu == NULL) { >> + error_report("Unable to find PowerPC CPU definition"); >> + exit(1); >> + } >> + env = &cpu->env; >> + >> + /* Set time-base frequency to 512 MHz */ >> + cpu_ppc_tb_init(env, TIMEBASE_FREQ); >> + >> + /* MSR[IP] doesn't exist nowadays */ >> + env->msr_mask &= ~(1 << 6); >> + >> + qemu_register_reset(powernv_cpu_reset, cpu); > > As noted above, I think we want to start powernv off with "new style" > cpu instantiation. So for powernv, I think you want the machine to > create chip objects, which will create core objects which will create > the vcpu/thread objects. ok. >> + } >> + >> + /* allocate RAM */ >> + memory_region_allocate_system_memory(ram, NULL, "ppc_powernv.ram", >> + ram_size); >> + memory_region_add_subregion(sysmem, 0, ram); >> + >> + /* XXX We should decide how many chips to create based on #cores and >> + * Venice vs. Murano vs. Naples chip type etc..., for now, just create >> + * one chip. Also creation of the CPUs should be done per-chip >> + */ >> + sys->num_chips = 1; >> + >> + /* Create only one PHB for now until I figure out what's wrong >> + * when I create more (resource assignment failures in Linux) >> + */ >> + pnv_create_chip(sys, 0); >> + >> + if (bios_name == NULL) { >> + bios_name = FW_FILE_NAME; >> + } >> + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name); >> + fw_size = load_image_targphys(filename, 0, FW_MAX_SIZE); >> + if (fw_size < 0) { >> + hw_error("qemu: could not load OPAL '%s'\n", filename); >> + exit(1); >> + } >> + g_free(filename); >> + >> + >> + if (kernel_filename == NULL) { >> + kernel_filename = KERNEL_FILE_NAME; >> + } >> + filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, kernel_filename); >> + fw_size = load_image_targphys(filename, 0x20000000, 0x2000000); >> + if (fw_size < 0) { >> + hw_error("qemu: could not load kernel'%s'\n", filename); >> + exit(1); >> + } >> + g_free(filename); >> + >> + /* load initrd */ >> + if (initrd_filename) { >> + /* Try to locate the initrd in the gap between the kernel >> + * and the firmware. Add a bit of space just in case >> + */ >> + initrd_base = 0x40000000; >> + initrd_size = load_image_targphys(initrd_filename, initrd_base, >> + 0x10000000); /* 128MB max */ >> + if (initrd_size < 0) { >> + error_report("qemu: could not load initial ram disk >> '%s'", >> + initrd_filename); >> + exit(1); >> + } >> + } else { >> + initrd_base = 0; >> + initrd_size = 0; >> + } >> + fdt = powernv_create_fdt(sys, machine->kernel_cmdline, >> + initrd_base, initrd_size); > > So the fact that spapr fdt creation is split between machine init and > reset time causes us a fair bit of pain. what do you mean ? we should not be following the same spapr pattern, that is to create a fdt skeleton in init and finalize the fdt creation in reset. I have a patch just doing that to populate the tree with some more devices for IPMI. So I guess I need to rework that part. So we should be using a realize ops to handle the last bits of the fdt creation ? >> + cpu_physical_memory_write(FDT_ADDR, fdt, fdt_totalsize(fdt)); >> +} >> + >> +static int powernv_kvm_type(const char *vm_type) >> +{ >> + /* Always force PR KVM */ >> + return 2; > > This doesn't really seem right to me. I think you should be giving an > error if the user tries to specify HV, rather than silently changing > to PR. OK. I will drop the whole KVM feature for the moment. >> +} >> + >> +static void ppc_cpu_do_nmi_on_cpu(void *arg) >> +{ >> + CPUState *cs = arg; >> + >> + cpu_synchronize_state(cs); >> + ppc_cpu_do_system_reset(cs); >> +} >> + >> +static void powernv_nmi(NMIState *n, int cpu_index, Error **errp) >> +{ >> + CPUState *cs; >> + >> + CPU_FOREACH(cs) { >> + async_run_on_cpu(cs, ppc_cpu_do_nmi_on_cpu, cs); >> + } >> +} > > It may be simpler to just leave the nmi stuff out for now, until you > have a real implementation. OK. > >> + >> +static void powernv_machine_class_init(ObjectClass *oc, void *data) >> +{ >> + MachineClass *mc = MACHINE_CLASS(oc); >> + NMIClass *nc = NMI_CLASS(oc); >> + >> + mc->init = ppc_powernv_init; >> + mc->block_default_type = IF_SCSI; >> + mc->max_cpus = MAX_CPUS; >> + mc->no_parallel = 1; >> + mc->default_boot_order = NULL; >> + mc->kvm_type = powernv_kvm_type; >> + >> + nc->nmi_monitor_handler = powernv_nmi; >> +} >> + >> +static const TypeInfo powernv_machine_info = { >> + .name = TYPE_POWERNV_MACHINE, >> + .parent = TYPE_MACHINE, >> + .abstract = true, >> + .instance_size = sizeof(sPowerNVMachineState), >> + .class_init = powernv_machine_class_init, >> + .interfaces = (InterfaceInfo[]) { >> + { TYPE_NMI }, >> + { } >> + }, >> +}; >> + >> +static void powernv_machine_2_5_class_init(ObjectClass *oc, void *data) >> +{ >> + MachineClass *mc = MACHINE_CLASS(oc); >> + >> + mc->name = "powernv-2.5"; > > Looks like the version numbthis needs an update for current qemu. I guess we should be using v1 as this is the first version of PowerNV, unless we want to tie that to Opal (v3). would that make sense ? Thanks, C. >> + mc->desc = "PowerNV v2.5"; >> + mc->alias = "powernv"; >> +} >> + >> +static const TypeInfo powernv_machine_2_5_info = { >> + .name = MACHINE_TYPE_NAME("powernv-2.5"), >> + .parent = TYPE_POWERNV_MACHINE, >> + .class_init = powernv_machine_2_5_class_init, >> +}; >> + >> +static void powernv_machine_register_types(void) >> +{ >> + type_register_static(&powernv_machine_info); >> + type_register_static(&powernv_machine_2_5_info); >> +} >> + >> +type_init(powernv_machine_register_types) >> diff --git a/include/hw/ppc/pnv.h b/include/hw/ppc/pnv.h >> new file mode 100644 >> index 000000000000..383a336e7cd3 >> --- /dev/null >> +++ b/include/hw/ppc/pnv.h >> @@ -0,0 +1,35 @@ >> +/* >> + * QEMU PowerNV various definitions >> + * >> + * Copyright (c) 2014-2016 BenH, IBM Corporation. >> + * >> + * This library is free software; you can redistribute it and/or >> + * modify it under the terms of the GNU Lesser General Public >> + * License as published by the Free Software Foundation; either >> + * version 2 of the License, or (at your option) any later version. >> + * >> + * This library is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU >> + * Lesser General Public License for more details. >> + * >> + * You should have received a copy of the GNU Lesser General Public >> + * License along with this library; if not, see >> <http://www.gnu.org/licenses/>. >> + */ >> +#ifndef _PPC_PNV_H >> +#define _PPC_PNV_H >> + >> +#include "hw/hw.h" >> + >> +/* Should we turn that into a QOjb of some sort ? */ >> +typedef struct PnvChip { >> + uint32_t chip_id; >> +} PnvChip; >> + >> +typedef struct PnvSystem { >> + uint32_t num_chips; >> +#define PNV_MAX_CHIPS 1 >> + PnvChip chips[PNV_MAX_CHIPS]; >> +} PnvSystem; >> + >> +#endif /* _PPC_PNV_H */ >