On 13/05/14 20:02, Matthew Rosato wrote: > On 05/12/2014 03:35 AM, Christian Borntraeger wrote: >> On 07/05/14 20:05, Matthew Rosato wrote: >>> Add memory information to read SCP info and add handlers for >>> Read Storage Element Information, Attach Storage Element, >>> Assign Storage and Unassign Storage. >>> >>> Signed-off-by: Matthew Rosato <mjros...@linux.vnet.ibm.com> >>> --- >>> hw/s390x/sclp.c | 245 >>> ++++++++++++++++++++++++++++++++++++++++++++++++++-- >>> target-s390x/cpu.h | 15 ++++ >>> target-s390x/kvm.c | 5 ++ >>> 3 files changed, 258 insertions(+), 7 deletions(-) >>> >>> diff --git a/hw/s390x/sclp.c b/hw/s390x/sclp.c >>> index 338dbdf..b9425ca 100644 >>> --- a/hw/s390x/sclp.c >>> +++ b/hw/s390x/sclp.c >>> @@ -16,7 +16,8 @@ >>> #include "sysemu/kvm.h" >>> #include "exec/memory.h" >>> #include "sysemu/sysemu.h" >>> - >>> +#include "exec/address-spaces.h" >>> +#include "qemu/config-file.h" >>> #include "hw/s390x/sclp.h" >>> #include "hw/s390x/event-facility.h" >>> >>> @@ -33,10 +34,18 @@ static inline SCLPEventFacility >>> *get_event_facility(void) >>> static void read_SCP_info(SCCB *sccb) >>> { >>> ReadInfo *read_info = (ReadInfo *) sccb; >>> + sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev(); >> >> >> This only works for the ccw machine. The legacy machine does not yet have >> this device and qemu complains about the system bus not being hotplug >> capable. >> Cant you just initialize the device in the ccw machine and check here only >> for the existence? > > Agh, forgot about the legacy machine. Sure, I can split out creation > from the get_sclp_memory_hotplug_dev() code, and then rework the code here. > > But then I got to thinking -- should I not be setting > SCLP_FC_ASSIGN_ATTACH_READ_STOR below when we are running in the legacy > machine? Because without the sclp_memory_hotplug_dev, we really aren't > supporting these new functions (assign_storage, unassign_storage, > assign_storage_element, read_storage_element*_info). > Then, I can probably add some assert(mhd) for the remaining > get_sclp_memory_hotplug_dev() calls, since we shouldn't get getting > there for the legacy machine. > > What do you think? Alternatively, I'd have to add code to fudge returns > for each of these new fuctions.
Given our test coverage for the legacy machine, I agree to not add memory hotplug to this machine. Please make sure to enhance read_SCP_info to be able to handle the non-existence of the hotplug device. > >> >>> CPUState *cpu; >>> - int shift = 0; >>> int cpu_count = 0; >>> int i = 0; >>> + int rnsize, rnmax; >>> + QemuOpts *opts = qemu_opts_find(qemu_find_opts("memory"), NULL); >>> + int slots = qemu_opt_get_number(opts, "slots", 0); >>> + int max_avail_slots = s390_get_memslot_count(kvm_state); >>> + >>> + if (slots > max_avail_slots) { >>> + slots = max_avail_slots; >>> + } >>> >>> CPU_FOREACH(cpu) { >>> cpu_count++; >>> @@ -52,16 +61,222 @@ static void read_SCP_info(SCCB *sccb) >>> read_info->entries[i].type = 0; >>> } >>> >>> - read_info->facilities = cpu_to_be64(SCLP_HAS_CPU_INFO); >>> + /* >>> + * The storage increment size is a multiple of 1M and is a power of 2. >>> + * The number of storage increments must be MAX_STORAGE_INCREMENTS or >>> fewer. >>> + */ >>> + while ((ram_size >> mhd->increment_size) > MAX_STORAGE_INCREMENTS) { >>> + mhd->increment_size++; >>> + } >>> + while ((mhd->standby_mem_size >> mhd->increment_size) > >>> + MAX_STORAGE_INCREMENTS) { >>> + mhd->increment_size++; >>> + } >>> + >>> + mhd->standby_subregion_size = MEM_SECTION_SIZE; >>> + /* Deduct the memory slot already used for core */ >>> + if (slots > 0) { >>> + while ((mhd->standby_subregion_size * (slots - 1) >>> + < mhd->standby_mem_size)) { >>> + mhd->standby_subregion_size = mhd->standby_subregion_size << 1; >>> + } >>> + } >>> + /* >>> + * Initialize mapping of guest standby memory sections indicating which >>> + * are and are not online. Assume all standby memory begins offline. >>> + */ >>> + if (mhd->standby_state_map == 0) { >>> + if (mhd->standby_mem_size % mhd->standby_subregion_size) { >>> + mhd->standby_state_map = g_malloc0((mhd->standby_mem_size / >>> + mhd->standby_subregion_size + >>> 1) * >>> + (mhd->standby_subregion_size / >>> + MEM_SECTION_SIZE)); >>> + } else { >>> + mhd->standby_state_map = g_malloc0(mhd->standby_mem_size / >>> + MEM_SECTION_SIZE); >>> + } >>> + } >>> + >>> + mhd->padded_ram_size = ram_size + mhd->pad_size; >>> + mhd->rzm = 1 << mhd->increment_size; >>> + rnsize = 1 << (mhd->increment_size - 20); >>> + if (rnsize <= 128) { >>> + read_info->rnsize = rnsize; >>> + } else { >>> + read_info->rnsize = 0; >>> + read_info->rnsize2 = cpu_to_be32(rnsize); >>> + } >>> >>> - while ((ram_size >> (20 + shift)) > 65535) { >>> - shift++; >>> + rnmax = ((ram_size + mhd->standby_mem_size + mhd->pad_size) >>> + >> mhd->increment_size); >>> + if (rnmax < 0x10000) { >>> + read_info->rnmax = cpu_to_be16(rnmax); >>> + } else { >>> + read_info->rnmax = cpu_to_be16(0); >>> + read_info->rnmax2 = cpu_to_be64(rnmax); >>> } >>> - read_info->rnmax = cpu_to_be16(ram_size >> (20 + shift)); >>> - read_info->rnsize = 1 << shift; >>> + >>> + read_info->facilities = cpu_to_be64(SCLP_HAS_CPU_INFO | >>> + SCLP_FC_ASSIGN_ATTACH_READ_STOR); >>> + >>> sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION); >>> } >>> >>> +static void read_storage_element0_info(SCCB *sccb) >>> +{ >>> + int i, assigned; >>> + int subincrement_id = SCLP_STARTING_SUBINCREMENT_ID; >>> + ReadStorageElementInfo *storage_info = (ReadStorageElementInfo *) sccb; >>> + sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev(); >>> + >>> + if ((ram_size >> mhd->increment_size) >= 0x10000) { >>> + sccb->h.response_code = >>> cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); >>> + return; >>> + } >>> + >>> + /* Return information regarding core memory */ >>> + storage_info->max_id = cpu_to_be16(mhd->standby_mem_size ? 1 : 0); >>> + assigned = ram_size >> mhd->increment_size; >>> + storage_info->assigned = cpu_to_be16(assigned); >>> + >>> + for (i = 0; i < assigned; i++) { >>> + storage_info->entries[i] = cpu_to_be32(subincrement_id); >>> + subincrement_id += SCLP_INCREMENT_UNIT; >>> + } >>> + sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_READ_COMPLETION); >>> +} >>> + >>> +static void read_storage_element1_info(SCCB *sccb) >>> +{ >>> + ReadStorageElementInfo *storage_info = (ReadStorageElementInfo *) sccb; >>> + sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev(); >> >> Same here. >> >>> + >>> + if ((mhd->standby_mem_size >> mhd->increment_size) >= 0x10000) { >>> + sccb->h.response_code = >>> cpu_to_be16(SCLP_RC_SCCB_BOUNDARY_VIOLATION); >>> + return; >>> + } >>> + >>> + /* Return information regarding standby memory */ >>> + storage_info->max_id = cpu_to_be16(mhd->standby_mem_size ? 1 : 0); >>> + storage_info->assigned = cpu_to_be16(mhd->standby_mem_size >> >>> + mhd->increment_size); >>> + storage_info->standby = cpu_to_be16(mhd->standby_mem_size >> >>> + mhd->increment_size); >>> + sccb->h.response_code = cpu_to_be16(SCLP_RC_STANDBY_READ_COMPLETION); >>> +} >>> + >>> +static void attach_storage_element(SCCB *sccb, uint16_t element) >>> +{ >>> + int i, assigned, subincrement_id; >>> + AttachStorageElement *attach_info = (AttachStorageElement *) sccb; >>> + sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev(); >>> + >>> + if (element != 1) { >>> + sccb->h.response_code = cpu_to_be16(SCLP_RC_INVALID_SCLP_COMMAND); >>> + return; >>> + } >>> + >>> + assigned = mhd->standby_mem_size >> mhd->increment_size; >>> + attach_info->assigned = cpu_to_be16(assigned); >>> + subincrement_id = ((ram_size >> mhd->increment_size) << 16) >>> + + SCLP_STARTING_SUBINCREMENT_ID; >>> + for (i = 0; i < assigned; i++) { >>> + attach_info->entries[i] = cpu_to_be32(subincrement_id); >>> + subincrement_id += SCLP_INCREMENT_UNIT; >>> + } >>> + sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION); >>> +} >>> + >>> +static void assign_storage(SCCB *sccb) >>> +{ >>> + MemoryRegion *mr = NULL; >>> + int this_subregion_size; >>> + AssignStorage *assign_info = (AssignStorage *) sccb; >>> + sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev(); >>> + ram_addr_t assign_addr = (assign_info->rn - 1) * mhd->rzm; >>> + MemoryRegion *sysmem = get_system_memory(); >>> + >>> + if ((assign_addr % MEM_SECTION_SIZE == 0) && >>> + (assign_addr >= mhd->padded_ram_size)) { >>> + /* Re-use existing memory region if found */ >>> + mr = memory_region_find(sysmem, assign_addr, 1).mr; >>> + if (!mr) { >>> + >>> + MemoryRegion *standby_ram = g_new(MemoryRegion, 1); >>> + >>> + /* offset to align to standby_subregion_size for allocation */ >>> + ram_addr_t offset = assign_addr - >>> + (assign_addr - mhd->padded_ram_size) >>> + % mhd->standby_subregion_size; >>> + >>> + /* strlen("standby.ram") + 4 (Max of KVM_MEMORY_SLOTS) + NULL >>> */ >>> + char id[16]; >>> + snprintf(id, 16, "standby.ram%d", >>> + (int)((offset - mhd->padded_ram_size) / >>> + mhd->standby_subregion_size) + 1); >>> + >>> + /* Allocate a subregion of the calculated >>> standby_subregion_size */ >>> + if (offset + mhd->standby_subregion_size > >>> + mhd->padded_ram_size + mhd->standby_mem_size) { >>> + this_subregion_size = mhd->padded_ram_size + >>> + mhd->standby_mem_size - offset; >>> + } else { >>> + this_subregion_size = mhd->standby_subregion_size; >>> + } >>> + >>> + memory_region_init_ram(standby_ram, NULL, id, >>> this_subregion_size); >>> + vmstate_register_ram_global(standby_ram); >>> + memory_region_add_subregion(sysmem, offset, standby_ram); >>> + } >>> + /* The specified subregion is no longer in standby */ >>> + mhd->standby_state_map[(assign_addr - mhd->padded_ram_size) >>> + / MEM_SECTION_SIZE] = 1; >>> + } >>> + sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION); >>> +} >>> + >>> +static void unassign_storage(SCCB *sccb) >>> +{ >>> + MemoryRegion *mr = NULL; >>> + AssignStorage *assign_info = (AssignStorage *) sccb; >>> + sclpMemoryHotplugDev *mhd = get_sclp_memory_hotplug_dev(); >>> + ram_addr_t unassign_addr = (assign_info->rn - 1) * mhd->rzm; >>> + MemoryRegion *sysmem = get_system_memory(); >>> + >>> + /* if the addr is a multiple of 256 MB */ >>> + if ((unassign_addr % MEM_SECTION_SIZE == 0) && >>> + (unassign_addr >= mhd->padded_ram_size)) { >>> + mhd->standby_state_map[(unassign_addr - >>> + mhd->padded_ram_size) / MEM_SECTION_SIZE] = 0; >>> + >>> + /* find the specified memory region and destroy it */ >>> + mr = memory_region_find(sysmem, unassign_addr, 1).mr; >>> + if (mr) { >>> + int i; >>> + int is_removable = 1; >>> + ram_addr_t map_offset = (unassign_addr - mhd->padded_ram_size - >>> + (unassign_addr - mhd->padded_ram_size) >>> + % mhd->standby_subregion_size); >>> + /* Mark all affected subregions as 'standby' once again */ >>> + for (i = 0; >>> + i < (mhd->standby_subregion_size / MEM_SECTION_SIZE); >>> + i++) { >>> + >>> + if (mhd->standby_state_map[i + map_offset / >>> MEM_SECTION_SIZE]) { >>> + is_removable = 0; >>> + break; >>> + } >>> + } >>> + if (is_removable) { >>> + memory_region_del_subregion(sysmem, mr); >>> + memory_region_destroy(mr); >>> + g_free(mr); >>> + } >>> + } >>> + } >>> + sccb->h.response_code = cpu_to_be16(SCLP_RC_NORMAL_COMPLETION); >>> +} >>> + >>> /* Provide information about the CPU */ >>> static void sclp_read_cpu_info(SCCB *sccb) >>> { >>> @@ -103,6 +318,22 @@ static void sclp_execute(SCCB *sccb, uint32_t code) >>> case SCLP_CMDW_READ_CPU_INFO: >>> sclp_read_cpu_info(sccb); >>> break; >>> + case SCLP_READ_STORAGE_ELEMENT_INFO: >>> + if (code & 0xff00) { >>> + read_storage_element1_info(sccb); >>> + } else { >>> + read_storage_element0_info(sccb); >>> + } >>> + break; >>> + case SCLP_ATTACH_STORAGE_ELEMENT: >>> + attach_storage_element(sccb, (code & 0xff00) >> 8); >>> + break; >>> + case SCLP_ASSIGN_STORAGE: >>> + assign_storage(sccb); >>> + break; >>> + case SCLP_UNASSIGN_STORAGE: >>> + unassign_storage(sccb); >>> + break; >>> default: >>> efc->command_handler(ef, sccb, code); >>> break; >>> diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h >>> index 193eac3..ec9879a 100644 >>> --- a/target-s390x/cpu.h >>> +++ b/target-s390x/cpu.h >>> @@ -1049,6 +1049,7 @@ static inline void cpu_inject_crw_mchk(S390CPU *cpu) >>> >>> /* from s390-virtio-ccw */ >>> #define MEM_SECTION_SIZE 0x10000000UL >>> +#define MAX_AVAIL_SLOTS 32 >>> >>> /* fpu_helper.c */ >>> uint32_t set_cc_nz_f32(float32 v); >>> @@ -1072,6 +1073,7 @@ void kvm_s390_enable_css_support(S390CPU *cpu); >>> int kvm_s390_assign_subch_ioeventfd(EventNotifier *notifier, uint32_t sch, >>> int vq, bool assign); >>> int kvm_s390_cpu_restart(S390CPU *cpu); >>> +int kvm_s390_get_memslot_count(KVMState *s); >>> #else >>> static inline void kvm_s390_io_interrupt(S390CPU *cpu, >>> uint16_t subchannel_id, >>> @@ -1096,6 +1098,10 @@ static inline int kvm_s390_cpu_restart(S390CPU *cpu) >>> { >>> return -ENOSYS; >>> } >>> +static inline int kvm_s390_get_memslot_count(KVMState *s) >>> +{ >>> + return MAX_AVAIL_SLOTS; >>> +} >>> #endif >>> >>> static inline int s390_cpu_restart(S390CPU *cpu) >>> @@ -1106,6 +1112,15 @@ static inline int s390_cpu_restart(S390CPU *cpu) >>> return -ENOSYS; >>> } >>> >>> +static inline int s390_get_memslot_count(KVMState *s) >>> +{ >>> + if (kvm_enabled()) { >>> + return kvm_s390_get_memslot_count(s); >>> + } else { >>> + return MAX_AVAIL_SLOTS; >>> + } >>> +} >>> + >>> static inline void s390_io_interrupt(S390CPU *cpu, >>> uint16_t subchannel_id, >>> uint16_t subchannel_nr, >>> diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c >>> index b7b0edc..ea6123c 100644 >>> --- a/target-s390x/kvm.c >>> +++ b/target-s390x/kvm.c >>> @@ -959,3 +959,8 @@ int kvm_s390_assign_subch_ioeventfd(EventNotifier >>> *notifier, uint32_t sch, >>> } >>> return kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick); >>> } >>> + >>> +int kvm_s390_get_memslot_count(KVMState *s) >>> +{ >>> + return kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS); >>> +} >>> >> >> >> >> >