On 09/03/2014 11:06 PM, Bharata B Rao wrote: > Add support for cpu-add monitor command. Use the exising EPOW event > infrastructure to send CPU hotplug notification to the guest. > > Signed-off-by: Bharata B Rao <bhar...@linux.vnet.ibm.com> > --- > hw/ppc/spapr.c | 240 > ++++++++++++++++++++++++++++++++++++++++++++++++- > hw/ppc/spapr_events.c | 35 ++++++-- > include/hw/ppc/spapr.h | 1 + > 3 files changed, 269 insertions(+), 7 deletions(-) > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > index d128834..9a3d1ca 100644 > --- a/hw/ppc/spapr.c > +++ b/hw/ppc/spapr.c > @@ -618,6 +618,8 @@ static void add_str(GString *s, const gchar *s1) > g_string_append_len(s, s1, strlen(s1) + 1); > } > > +uint32_t cpus_per_socket; > + > static void *spapr_create_fdt_skel(hwaddr initrd_base, > hwaddr initrd_size, > hwaddr kernel_size, > @@ -638,9 +640,10 @@ static void *spapr_create_fdt_skel(hwaddr initrd_base, > unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80}; > QemuOpts *opts = qemu_opts_find(qemu_find_opts("smp-opts"), NULL); > unsigned sockets = opts ? qemu_opt_get_number(opts, "sockets", 0) : 0; > - uint32_t cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; > char *buf; > > + cpus_per_socket = sockets ? (smp_cpus / sockets) : 1; > + > add_str(hypertas, "hcall-pft"); > add_str(hypertas, "hcall-term"); > add_str(hypertas, "hcall-dabr"); > @@ -1603,6 +1606,208 @@ static SaveVMHandlers savevm_htab_handlers = { > .load_state = htab_load, > }; > > +Notifier cpu_added_notifier; > + > +/* TODO: Duplicates code from spapr_create_fdt_skel(), Fix this */ > +static int spapr_populate_cpu_dt(CPUState *cs, void *fdt, int offset) > +{ > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + CPUPPCState *env = &cpu->env; > + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); > + int index = ppc_get_vcpu_dt_id(cpu); > + uint32_t segs[] = {cpu_to_be32(28), cpu_to_be32(40), > + 0xffffffff, 0xffffffff}; > + uint32_t tbfreq = kvm_enabled() ? kvmppc_get_tbfreq() : TIMEBASE_FREQ; > + uint32_t cpufreq = kvm_enabled() ? kvmppc_get_clockfreq() : 1000000000; > + uint32_t page_sizes_prop[64]; > + size_t page_sizes_prop_size; > + sPAPRDrcEntry *drc_entry; > + int smpt = ppc_get_compat_smt_threads(cpu); > + uint32_t servers_prop[smpt]; > + uint32_t gservers_prop[smpt * 2]; > + int i; > + uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)}; > + > + _FDT((fdt_setprop_cell(fdt, offset, "reg", index))); > + _FDT((fdt_setprop_string(fdt, offset, "device_type", "cpu"))); > + > + _FDT((fdt_setprop_cell(fdt, offset, "cpu-version", env->spr[SPR_PVR]))); > + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-block-size", > + env->dcache_line_size))); > + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-line-size", > + env->dcache_line_size))); > + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-block-size", > + env->icache_line_size))); > + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-line-size", > + env->icache_line_size))); > + > + if (pcc->l1_dcache_size) { > + _FDT((fdt_setprop_cell(fdt, offset, "d-cache-size", > + pcc->l1_dcache_size))); > + } else { > + fprintf(stderr, "Warning: Unknown L1 dcache size for cpu\n"); > + } > + if (pcc->l1_icache_size) { > + _FDT((fdt_setprop_cell(fdt, offset, "i-cache-size", > + pcc->l1_icache_size))); > + } else { > + fprintf(stderr, "Warning: Unknown L1 icache size for cpu\n"); > + } > + > + _FDT((fdt_setprop_cell(fdt, offset, "timebase-frequency", tbfreq))); > + _FDT((fdt_setprop_cell(fdt, offset, "clock-frequency", cpufreq))); > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,slb-size", env->slb_nr))); > + _FDT((fdt_setprop_string(fdt, offset, "status", "okay"))); > + _FDT((fdt_setprop(fdt, offset, "64-bit", NULL, 0))); > + > + if (env->spr_cb[SPR_PURR].oea_read) { > + _FDT((fdt_setprop(fdt, offset, "ibm,purr", NULL, 0))); > + } > + > + if (env->mmu_model & POWERPC_MMU_1TSEG) { > + _FDT((fdt_setprop(fdt, offset, "ibm,processor-segment-sizes", > + segs, sizeof(segs)))); > + } > + > + /* Advertise VMX/VSX (vector extensions) if available > + * 0 / no property == no vector extensions > + * 1 == VMX / Altivec available > + * 2 == VSX available */ > + if (env->insns_flags & PPC_ALTIVEC) { > + uint32_t vmx = (env->insns_flags2 & PPC2_VSX) ? 2 : 1; > + > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,vmx", vmx))); > + } > + > + /* Advertise DFP (Decimal Floating Point) if available > + * 0 / no property == no DFP > + * 1 == DFP available */ > + if (env->insns_flags2 & PPC2_DFP) { > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,dfp", 1))); > + } > + > + page_sizes_prop_size = create_page_sizes_prop(env, page_sizes_prop, > + sizeof(page_sizes_prop)); > + if (page_sizes_prop_size) { > + _FDT((fdt_setprop(fdt, offset, "ibm,segment-page-sizes", > + page_sizes_prop, page_sizes_prop_size))); > + } > + > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,chip-id", > + cs->cpu_index / cpus_per_socket))); > + > + drc_entry = spapr_cpu_to_drc_entry(cpu->cpu_dt_id + > SPAPR_DRC_CPU_ID_BASE); > + g_assert(drc_entry); > + _FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", > + drc_entry->drc_index))); > + > + /* Build interrupt servers and gservers properties */ > + for (i = 0; i < smpt; i++) { > + servers_prop[i] = cpu_to_be32(index + i); > + /* Hack, direct the group queues back to cpu 0 */ > + gservers_prop[i*2] = cpu_to_be32(index + i); > + gservers_prop[i*2 + 1] = 0; > + } > + _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-server#s", > + servers_prop, sizeof(servers_prop))); > + _FDT(fdt_setprop(fdt, offset, "ibm,ppc-interrupt-gserver#s", > + gservers_prop, sizeof(gservers_prop))); > + _FDT(fdt_setprop(fdt, offset, "ibm,pft-size", > + pft_size_prop, sizeof(pft_size_prop))); > + return 0; > +} > + > +static void spapr_cpu_hotplug_add(CPUState *cs) > +{ > + int i, j; > + sPAPRDrcEntry *drc_entry; > + sPAPRConfigureConnectorState *ccs; > + int offset, ret; > + void *fdt_orig, *fdt; > + > + /* > + * TODO: Unlike PCI hotplug, we aren't marking the state as PRESENT > + * here since CPU hotplug code in the guest kernel expects the > + * state to be UNUSABLE. > + */
PCI hotplug is considered a physical entity and the state transitions are slightly different from that of cpus which are considered a logical entity. The state needs to start out as UNUSABLE as stated in your comment. The guest will call get-sensor-state to check the dr-entity-sensor to ensure that it is in UNUSABLE state (ie. not yet owned by the OS). Once this is confirmed the guest calls set-indicator to change the allocation state to USABLE. If that call is successful the dr-entity-sensor state for the cpu should now be PRESENT indicating that it is now owned by the OS. The guest then calls set-indicator to change the isolation state to unisolate which is then followed by the configure-connector call. -Tyrel > +#if 0 > + uint32_t encoded = ENCODE_DRC_STATE(INDICATOR_ENTITY_SENSE_PRESENT, > + INDICATOR_ENTITY_SENSE_MASK, > + INDICATOR_ENTITY_SENSE_SHIFT); > +#endif > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + DeviceClass *dc = DEVICE_GET_CLASS(cs); > + PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cs); > + char *nodename; > + int index = ppc_get_vcpu_dt_id(cpu); > + > + drc_entry = spapr_cpu_to_drc_entry(cpu->cpu_dt_id + > SPAPR_DRC_CPU_ID_BASE); > + g_assert(drc_entry); > + > +#if 0 > + drc_entry->state &= ~(uint32_t)INDICATOR_ENTITY_SENSE_MASK; > + drc_entry->state |= encoded; /* DR entity present */ > +#endif > + > + /* add OF node for CPU and required OF DT properties */ > + fdt_orig = g_malloc0(FDT_MAX_SIZE); > + offset = fdt_create(fdt_orig, FDT_MAX_SIZE); > + fdt_begin_node(fdt_orig, ""); > + fdt_end_node(fdt_orig); > + fdt_finish(fdt_orig); > + > + fdt = g_malloc0(FDT_MAX_SIZE); > + fdt_open_into(fdt_orig, fdt, FDT_MAX_SIZE); > + > + if (dc->fw_name == NULL) { > + ObjectClass *oc = OBJECT_CLASS(pcc); > + const char *typename; > + > + typename = object_class_get_name(oc); > + if (kvm_enabled() && > + strcmp(typename, "host-" TYPE_POWERPC_CPU) == 0) { > + typename = object_class_get_name(object_class_get_parent(oc)); > + } > + nodename = g_strndup(typename, > + strlen(typename) - strlen("-" > TYPE_POWERPC_CPU)); > + for (i = j = 0; j < strlen(nodename); i++, j++) { > + if (nodename[j] == '_') { > + j++; > + } > + if (j > i) { > + nodename[i] = nodename[j]; > + } > + } > + if (j > i) { > + nodename[i] = '\0'; > + } > + dc->fw_name = g_strdup_printf("PowerPC,%s", nodename); > + g_free(nodename); > + } > + nodename = g_strdup_printf("%s@%x", dc->fw_name, index); > + > + offset = fdt_add_subnode(fdt, offset, nodename); > + ret = spapr_populate_cpu_dt(cs, fdt, offset); > + g_assert(!ret); > + g_free(fdt_orig); > + g_free(nodename); > + > + /* hold on to node, configure_connector will pass it to the guest later > */ > + ccs = &drc_entry->cc_state; > + ccs->fdt = fdt; > + ccs->offset_start = offset; > + ccs->state = CC_STATE_PENDING; > +} > + > +static void spapr_cpu_added_req(Notifier *n, void *opaque) > +{ > + CPUState *cs = CPU(opaque); > + > + spapr_cpu_hotplug_add(cs); > + spapr_cpu_hotplug_add_event(cs); > + return; > +} > + > static const char *current_cpu_model; > > static PowerPCCPU *ppc_new_cpu(const char *cpu_model) > @@ -1736,6 +1941,10 @@ static void ppc_spapr_init(MachineState *machine) > ppc_new_cpu(current_cpu_model); > } > > + /* Register a handler for CPU hotplug */ > + cpu_added_notifier.notify = spapr_cpu_added_req; > + qemu_register_cpu_added_notifier(&cpu_added_notifier); > + > /* allocate RAM */ > spapr->ram_limit = ram_size; > spapr->maxram_limit = machine->maxram_size; > @@ -1980,6 +2189,34 @@ static void spapr_machine_initfn(Object *obj) > spapr_get_kvm_type, spapr_set_kvm_type, NULL); > } > > +static void ppc_hot_add_cpu(const int64_t id, Error **errp) > +{ > + CPUState *cs; > + PowerPCCPU *cpu; > + > + if (id < 0) { > + error_setg(errp, "Invalid CPU id: %" PRIi64, id); > + return; > + } > + > + CPU_FOREACH(cs) { > + if (cs->cpu_index == id) { > + error_setg(errp, "Unable to add CPU: %" PRIi64 > + ", it already exists", id); > + return; > + } > + } > + > + if (id >= max_cpus) { > + error_setg(errp, "Unable to add CPU: %" PRIi64 > + ", max allowed: %d", id, max_cpus - 1); > + return; > + } > + > + cpu = ppc_new_cpu(current_cpu_model); > + spapr_cpu_reset(cpu); > +} > + > static void spapr_machine_class_init(ObjectClass *oc, void *data) > { > MachineClass *mc = MACHINE_CLASS(oc); > @@ -1995,6 +2232,7 @@ static void spapr_machine_class_init(ObjectClass *oc, > void *data) > mc->no_parallel = 1; > mc->default_boot_order = NULL; > mc->kvm_type = spapr_kvm_type; > + mc->hot_add_cpu = ppc_hot_add_cpu; > > fwc->get_dev_path = spapr_get_fw_dev_path; > } > diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c > index bb80080..f772255 100644 > --- a/hw/ppc/spapr_events.c > +++ b/hw/ppc/spapr_events.c > @@ -324,8 +324,10 @@ static void spapr_powerdown_req(Notifier *n, void > *opaque) > } > > static void spapr_hotplug_req_event(uint8_t hp_type, uint8_t hp_action, > - sPAPRPHBState *phb, int slot) > + void *dev, int slot) > { > + sPAPRPHBState *phb; > + PowerPCCPU *cpu; > struct rtas_error_log *hdr; > struct rtas_event_log_v6 *v6hdr; > struct rtas_event_log_v6_maina *maina; > @@ -372,21 +374,42 @@ static void spapr_hotplug_req_event(uint8_t hp_type, > uint8_t hp_action, > > hp->hotplug_type = hp_type; > > - drc_entry = spapr_phb_to_drc_entry(phb->buid); > - if (!drc_entry) { > - drc_entry = spapr_add_phb_to_drc_table(phb->buid, 2 /* Unusable */); > - } > - > switch (hp_type) { > case RTAS_LOG_V6_HP_TYPE_PCI: > + phb = (sPAPRPHBState *)dev; > + drc_entry = spapr_phb_to_drc_entry(phb->buid); > + if (!drc_entry) { > + drc_entry = spapr_add_phb_to_drc_table(phb->buid, 2 /* Unusable > */); > + } > + > hp->drc.index = drc_entry->child_entries[slot].drc_index; > hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; > break; > + > + case RTAS_LOG_V6_HP_TYPE_CPU: > + cpu = (PowerPCCPU *)dev; > + drc_entry = spapr_cpu_to_drc_entry(cpu->cpu_dt_id + > + SPAPR_DRC_CPU_ID_BASE); > + if (!drc_entry) { > + drc_entry = spapr_add_cpu_to_drc_table(cpu->cpu_dt_id + > + SPAPR_DRC_CPU_ID_BASE, 2); > + } > + > + hp->drc.index = drc_entry->drc_index; > + hp->hotplug_identifier = RTAS_LOG_V6_HP_ID_DRC_INDEX; > + break; > } > > qemu_irq_pulse(xics_get_qirq(spapr->icp, spapr->check_exception_irq)); > } > > +void spapr_cpu_hotplug_add_event(CPUState *cs) > +{ > + PowerPCCPU *cpu = POWERPC_CPU(cs); > + return spapr_hotplug_req_event(RTAS_LOG_V6_HP_TYPE_CPU, > + RTAS_LOG_V6_HP_ACTION_ADD, cpu, 0); > +} > + > void spapr_pci_hotplug_add_event(DeviceState *qdev, int slot) > { > sPAPRPHBState *phb = SPAPR_PCI_HOST_BRIDGE(qdev); > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h > index d70e5ec..e5fa696 100644 > --- a/include/hw/ppc/spapr.h > +++ b/include/hw/ppc/spapr.h > @@ -567,5 +567,6 @@ sPAPRDrcEntry *spapr_cpu_to_drc_entry(uint64_t cpuid); > sPAPRDrcEntry *spapr_find_drc_entry(int drc_index); > void spapr_pci_hotplug_add_event(DeviceState *qdev, int slot); > void spapr_pci_hotplug_remove_event(DeviceState *qdev, int slot); > +void spapr_cpu_hotplug_add_event(CPUState *cs); > > #endif /* !defined (__HW_SPAPR_H__) */ >