On Fri, Mar 10, 2017 at 03:37:16PM +0530, Bharata B Rao wrote: > On Fri, Mar 10, 2017 at 6:43 AM, David Gibson <da...@gibson.dropbear.id.au> > wrote: > > > This patch implements hypercalls allowing a PAPR guest to resize its own > > hash page table. This will eventually allow for more flexible memory > > hotplug. > > > > The implementation is partially asynchronous, handled in a special thread > > running the hpt_prepare_thread() function. The state of a pending resize > > is stored in SPAPR_MACHINE->pending_hpt. > > > > The H_RESIZE_HPT_PREPARE hypercall will kick off creation of a new HPT, or, > > if one is already in progress, monitor it for completion. If there is an > > existing HPT resize in progress that doesn't match the size specified in > > the call, it will cancel it, replacing it with a new one matching the > > given size. > > > > The H_RESIZE_HPT_COMMIT completes transition to a resized HPT, and can only > > be called successfully once H_RESIZE_HPT_PREPARE has successfully > > completed initialization of a new HPT. The guest must ensure that there > > are no concurrent accesses to the existing HPT while this is called (this > > effectively means stop_machine() for Linux guests). > > > > For now H_RESIZE_HPT_COMMIT goes through the whole old HPT, rehashing each > > HPTE into the new HPT. This can have quite high latency, but it seems to > > be of the order of typical migration downtime latencies for HPTs of size > > up to ~2GiB (which would be used in a 256GiB guest). > > > > In future we probably want to move more of the rehashing to the "prepare" > > phase, by having H_ENTER and other hcalls update both current and > > pending HPTs. That's a project for another day, but should be possible > > without any changes to the guest interface. > > > > Signed-off-by: David Gibson <da...@gibson.dropbear.id.au> > > --- > > hw/ppc/spapr.c | 4 +- > > hw/ppc/spapr_hcall.c | 338 ++++++++++++++++++++++++++++++ > > +++++++++++++++++- > > include/hw/ppc/spapr.h | 6 + > > target/ppc/mmu-hash64.h | 4 + > > 4 files changed, 346 insertions(+), 6 deletions(-) > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c > > index 06b436d..bf6ba64 100644 > > --- a/hw/ppc/spapr.c > > +++ b/hw/ppc/spapr.c > > @@ -94,8 +94,6 @@ > > > > #define PHANDLE_XICP 0x00001111 > > > > -#define HTAB_SIZE(spapr) (1ULL << ((spapr)->htab_shift)) > > - > > static int try_create_xics(sPAPRMachineState *spapr, const char > > *type_ics, > > const char *type_icp, int nr_servers, > > int nr_irqs, Error **errp) > > @@ -1169,7 +1167,7 @@ static void spapr_store_hpte(PPCVirtualHypervisor > > *vhyp, hwaddr ptex, > > } > > } > > > > -static int spapr_hpt_shift_for_ramsize(uint64_t ramsize) > > +int spapr_hpt_shift_for_ramsize(uint64_t ramsize) > > { > > int shift; > > > > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c > > index 9f88960..4c0b0fb 100644 > > --- a/hw/ppc/spapr_hcall.c > > +++ b/hw/ppc/spapr_hcall.c > > @@ -3,6 +3,7 @@ > > #include "sysemu/hw_accel.h" > > #include "sysemu/sysemu.h" > > #include "qemu/log.h" > > +#include "qemu/error-report.h" > > #include "cpu.h" > > #include "exec/exec-all.h" > > #include "helper_regs.h" > > @@ -352,20 +353,316 @@ static target_ulong h_read(PowerPCCPU *cpu, > > sPAPRMachineState *spapr, > > return H_SUCCESS; > > } > > > > +struct sPAPRPendingHPT { > > + /* These fields are read-only after initialization */ > > + int shift; > > + QemuThread thread; > > + > > + /* These fields are protected by the BQL */ > > + bool complete; > > + > > + /* These fields are private to the preparation thread if > > + * !complete, otherwise protected by the BQL */ > > + int ret; > > + void *hpt; > > +}; > > + > > +static void free_pending_hpt(sPAPRPendingHPT *pending) > > +{ > > + if (pending->hpt) { > > + qemu_vfree(pending->hpt); > > + } > > + > > + g_free(pending); > > +} > > + > > +static void *hpt_prepare_thread(void *opaque) > > +{ > > + sPAPRPendingHPT *pending = opaque; > > + size_t size = 1ULL << pending->shift; > > + > > + pending->hpt = qemu_memalign(size, size); > > + if (pending->hpt) { > > + memset(pending->hpt, 0, size); > > + pending->ret = H_SUCCESS; > > + } else { > > + pending->ret = H_NO_MEM; > > + } > > + > > + qemu_mutex_lock_iothread(); > > + > > + if (SPAPR_MACHINE(qdev_get_machine())->pending_hpt == pending) { > > + /* Ready to go */ > > + pending->complete = true; > > + } else { > > + /* We've been cancelled, clean ourselves up */ > > + free_pending_hpt(pending); > > + } > > + > > + qemu_mutex_unlock_iothread(); > > + return NULL; > > +} > > + > > +/* Must be called with BQL held */ > > +static void cancel_hpt_prepare(sPAPRMachineState *spapr) > > +{ > > + sPAPRPendingHPT *pending = spapr->pending_hpt; > > + > > + /* Let the thread know it's cancelled */ > > + spapr->pending_hpt = NULL; > > + > > + if (!pending) { > > + /* Nothing to do */ > > + return; > > + } > > + > > + if (!pending->complete) { > > + /* thread will clean itself up */ > > + return; > > + } > > + > > + free_pending_hpt(pending); > > +} > > + > > +static int build_dimm_list(Object *obj, void *opaque) > > +{ > > + GSList **list = opaque; > > + > > + if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { > > + DeviceState *dev = DEVICE(obj); > > + if (dev->realized) { /* only realized DIMMs matter */ > > + *list = g_slist_prepend(*list, dev); > > + } > > + } > > + > > + object_child_foreach(obj, build_dimm_list, opaque); > > + return 0; > > +} > > + > > +static ram_addr_t get_current_ram_size(void) > > +{ > > + GSList *list = NULL, *item; > > + ram_addr_t size = ram_size; > > + > > + build_dimm_list(qdev_get_machine(), &list); > > + for (item = list; item; item = g_slist_next(item)) { > > + Object *obj = OBJECT(item->data); > > + if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) { > > + size += object_property_get_int(obj, PC_DIMM_SIZE_PROP, > > + &error_abort); > > + } > > + } > > > > You could use the existing API pc_existing_dimms_capacity() for the > above.
Good idea, thanks. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature