On 10/02/17 11:41, David Gibson wrote: > On Thu, Feb 09, 2017 at 12:48:19PM +0100, Paolo Bonzini wrote: >> >> >> On 09/02/2017 06:43, Alexey Kardashevskiy wrote: >>> On 03/01/17 10:34, David Gibson wrote: >>>> On Thu, Dec 22, 2016 at 04:22:11PM +1100, Alexey Kardashevskiy wrote: >>>>> getrampagesize() returns the largest supported page size and mainly >>>>> used to know if huge pages are enabled. >>>>> >>>>> However is implemented in target-ppc/kvm.c and not available >>>>> in TCG or other architectures. >>>>> >>>>> This renames and moves gethugepagesize() to mmap-alloc.c where >>>>> fd-based analog of it is already implemented. This renames and moves >>>>> getrampagesize() to exec.c as it seems to be the common place for >>>>> helpers like this. >>>>> >>>>> This first user for it is going to be a spapr-pci-host-bridge which >>>>> needs to know the largest RAM page size so the guest could try >>>>> using bigger IOMMU pages to save memory. >>>>> >>>>> Signed-off-by: Alexey Kardashevskiy <a...@ozlabs.ru> >>>> >>>> Reviewed-by: David Gibson <da...@gibson.dropbear.id.au> >>>> >>>> Seems sensible to me, but I'm not comfortable merging this via my tree >>>> since it touches such core code. Probably should go via Paolo. >>> >>> Paolo, ping? >> >> It's just code movement, go ahead. > > Ok, I've merged this in my tree.
I cannot find it in any public tree, where did it go? > > >> >> Paolo >> >>> >>> >>>> >>>>> --- >>>>> include/exec/ram_addr.h | 1 + >>>>> include/qemu/mmap-alloc.h | 2 + >>>>> exec.c | 82 ++++++++++++++++++++++++++++++++++++ >>>>> target-ppc/kvm.c | 105 >>>>> ++-------------------------------------------- >>>>> util/mmap-alloc.c | 25 +++++++++++ >>>>> 5 files changed, 113 insertions(+), 102 deletions(-) >>>>> >>>>> diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h >>>>> index 54d7108a9e..3935cbcfcd 100644 >>>>> --- a/include/exec/ram_addr.h >>>>> +++ b/include/exec/ram_addr.h >>>>> @@ -91,6 +91,7 @@ typedef struct RAMList { >>>>> } RAMList; >>>>> extern RAMList ram_list; >>>>> >>>>> +long qemu_getrampagesize(void); >>>>> ram_addr_t last_ram_offset(void); >>>>> void qemu_mutex_lock_ramlist(void); >>>>> void qemu_mutex_unlock_ramlist(void); >>>>> diff --git a/include/qemu/mmap-alloc.h b/include/qemu/mmap-alloc.h >>>>> index 933c024ac5..50385e3f81 100644 >>>>> --- a/include/qemu/mmap-alloc.h >>>>> +++ b/include/qemu/mmap-alloc.h >>>>> @@ -5,6 +5,8 @@ >>>>> >>>>> size_t qemu_fd_getpagesize(int fd); >>>>> >>>>> +size_t qemu_mempath_getpagesize(const char *mem_path); >>>>> + >>>>> void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared); >>>>> >>>>> void qemu_ram_munmap(void *ptr, size_t size); >>>>> diff --git a/exec.c b/exec.c >>>>> index 08c558eecf..d73b477a70 100644 >>>>> --- a/exec.c >>>>> +++ b/exec.c >>>>> @@ -32,6 +32,7 @@ >>>>> #endif >>>>> #include "sysemu/kvm.h" >>>>> #include "sysemu/sysemu.h" >>>>> +#include "sysemu/numa.h" >>>>> #include "qemu/timer.h" >>>>> #include "qemu/config-file.h" >>>>> #include "qemu/error-report.h" >>>>> @@ -1218,6 +1219,87 @@ void qemu_mutex_unlock_ramlist(void) >>>>> } >>>>> >>>>> #ifdef __linux__ >>>>> +/* >>>>> + * FIXME TOCTTOU: this iterates over memory backends' mem-path, which >>>>> + * may or may not name the same files / on the same filesystem now as >>>>> + * when we actually open and map them. Iterate over the file >>>>> + * descriptors instead, and use qemu_fd_getpagesize(). >>>>> + */ >>>>> +static int find_max_supported_pagesize(Object *obj, void *opaque) >>>>> +{ >>>>> + char *mem_path; >>>>> + long *hpsize_min = opaque; >>>>> + >>>>> + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { >>>>> + mem_path = object_property_get_str(obj, "mem-path", NULL); >>>>> + if (mem_path) { >>>>> + long hpsize = qemu_mempath_getpagesize(mem_path); >>>>> + if (hpsize < *hpsize_min) { >>>>> + *hpsize_min = hpsize; >>>>> + } >>>>> + } else { >>>>> + *hpsize_min = getpagesize(); >>>>> + } >>>>> + } >>>>> + >>>>> + return 0; >>>>> +} >>>>> + >>>>> +long qemu_getrampagesize(void) >>>>> +{ >>>>> + long hpsize = LONG_MAX; >>>>> + long mainrampagesize; >>>>> + Object *memdev_root; >>>>> + >>>>> + if (mem_path) { >>>>> + mainrampagesize = qemu_mempath_getpagesize(mem_path); >>>>> + } else { >>>>> + mainrampagesize = getpagesize(); >>>>> + } >>>>> + >>>>> + /* it's possible we have memory-backend objects with >>>>> + * hugepage-backed RAM. these may get mapped into system >>>>> + * address space via -numa parameters or memory hotplug >>>>> + * hooks. we want to take these into account, but we >>>>> + * also want to make sure these supported hugepage >>>>> + * sizes are applicable across the entire range of memory >>>>> + * we may boot from, so we take the min across all >>>>> + * backends, and assume normal pages in cases where a >>>>> + * backend isn't backed by hugepages. >>>>> + */ >>>>> + memdev_root = object_resolve_path("/objects", NULL); >>>>> + if (memdev_root) { >>>>> + object_child_foreach(memdev_root, find_max_supported_pagesize, >>>>> &hpsize); >>>>> + } >>>>> + if (hpsize == LONG_MAX) { >>>>> + /* No additional memory regions found ==> Report main RAM page >>>>> size */ >>>>> + return mainrampagesize; >>>>> + } >>>>> + >>>>> + /* If NUMA is disabled or the NUMA nodes are not backed with a >>>>> + * memory-backend, then there is at least one node using "normal" >>>>> RAM, >>>>> + * so if its page size is smaller we have got to report that size >>>>> instead. >>>>> + */ >>>>> + if (hpsize > mainrampagesize && >>>>> + (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) { >>>>> + static bool warned; >>>>> + if (!warned) { >>>>> + error_report("Huge page support disabled (n/a for main >>>>> memory)."); >>>>> + warned = true; >>>>> + } >>>>> + return mainrampagesize; >>>>> + } >>>>> + >>>>> + return hpsize; >>>>> +} >>>>> +#else >>>>> +long qemu_getrampagesize(void) >>>>> +{ >>>>> + return getpagesize(); >>>>> +} >>>>> +#endif >>>>> + >>>>> +#ifdef __linux__ >>>>> static int64_t get_file_size(int fd) >>>>> { >>>>> int64_t size = lseek(fd, 0, SEEK_END); >>>>> diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c >>>>> index 6e91a4d8bb..e0abffa8ad 100644 >>>>> --- a/target-ppc/kvm.c >>>>> +++ b/target-ppc/kvm.c >>>>> @@ -42,6 +42,7 @@ >>>>> #include "trace.h" >>>>> #include "exec/gdbstub.h" >>>>> #include "exec/memattrs.h" >>>>> +#include "exec/ram_addr.h" >>>>> #include "sysemu/hostmem.h" >>>>> #include "qemu/cutils.h" >>>>> #if defined(TARGET_PPC64) >>>>> @@ -325,106 +326,6 @@ static void kvm_get_smmu_info(PowerPCCPU *cpu, >>>>> struct kvm_ppc_smmu_info *info) >>>>> kvm_get_fallback_smmu_info(cpu, info); >>>>> } >>>>> >>>>> -static long gethugepagesize(const char *mem_path) >>>>> -{ >>>>> - struct statfs fs; >>>>> - int ret; >>>>> - >>>>> - do { >>>>> - ret = statfs(mem_path, &fs); >>>>> - } while (ret != 0 && errno == EINTR); >>>>> - >>>>> - if (ret != 0) { >>>>> - fprintf(stderr, "Couldn't statfs() memory path: %s\n", >>>>> - strerror(errno)); >>>>> - exit(1); >>>>> - } >>>>> - >>>>> -#define HUGETLBFS_MAGIC 0x958458f6 >>>>> - >>>>> - if (fs.f_type != HUGETLBFS_MAGIC) { >>>>> - /* Explicit mempath, but it's ordinary pages */ >>>>> - return getpagesize(); >>>>> - } >>>>> - >>>>> - /* It's hugepage, return the huge page size */ >>>>> - return fs.f_bsize; >>>>> -} >>>>> - >>>>> -/* >>>>> - * FIXME TOCTTOU: this iterates over memory backends' mem-path, which >>>>> - * may or may not name the same files / on the same filesystem now as >>>>> - * when we actually open and map them. Iterate over the file >>>>> - * descriptors instead, and use qemu_fd_getpagesize(). >>>>> - */ >>>>> -static int find_max_supported_pagesize(Object *obj, void *opaque) >>>>> -{ >>>>> - char *mem_path; >>>>> - long *hpsize_min = opaque; >>>>> - >>>>> - if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { >>>>> - mem_path = object_property_get_str(obj, "mem-path", NULL); >>>>> - if (mem_path) { >>>>> - long hpsize = gethugepagesize(mem_path); >>>>> - if (hpsize < *hpsize_min) { >>>>> - *hpsize_min = hpsize; >>>>> - } >>>>> - } else { >>>>> - *hpsize_min = getpagesize(); >>>>> - } >>>>> - } >>>>> - >>>>> - return 0; >>>>> -} >>>>> - >>>>> -static long getrampagesize(void) >>>>> -{ >>>>> - long hpsize = LONG_MAX; >>>>> - long mainrampagesize; >>>>> - Object *memdev_root; >>>>> - >>>>> - if (mem_path) { >>>>> - mainrampagesize = gethugepagesize(mem_path); >>>>> - } else { >>>>> - mainrampagesize = getpagesize(); >>>>> - } >>>>> - >>>>> - /* it's possible we have memory-backend objects with >>>>> - * hugepage-backed RAM. these may get mapped into system >>>>> - * address space via -numa parameters or memory hotplug >>>>> - * hooks. we want to take these into account, but we >>>>> - * also want to make sure these supported hugepage >>>>> - * sizes are applicable across the entire range of memory >>>>> - * we may boot from, so we take the min across all >>>>> - * backends, and assume normal pages in cases where a >>>>> - * backend isn't backed by hugepages. >>>>> - */ >>>>> - memdev_root = object_resolve_path("/objects", NULL); >>>>> - if (memdev_root) { >>>>> - object_child_foreach(memdev_root, find_max_supported_pagesize, >>>>> &hpsize); >>>>> - } >>>>> - if (hpsize == LONG_MAX) { >>>>> - /* No additional memory regions found ==> Report main RAM page >>>>> size */ >>>>> - return mainrampagesize; >>>>> - } >>>>> - >>>>> - /* If NUMA is disabled or the NUMA nodes are not backed with a >>>>> - * memory-backend, then there is at least one node using "normal" >>>>> RAM, >>>>> - * so if its page size is smaller we have got to report that size >>>>> instead. >>>>> - */ >>>>> - if (hpsize > mainrampagesize && >>>>> - (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) { >>>>> - static bool warned; >>>>> - if (!warned) { >>>>> - error_report("Huge page support disabled (n/a for main >>>>> memory)."); >>>>> - warned = true; >>>>> - } >>>>> - return mainrampagesize; >>>>> - } >>>>> - >>>>> - return hpsize; >>>>> -} >>>>> - >>>>> static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t >>>>> shift) >>>>> { >>>>> if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) { >>>>> @@ -454,7 +355,7 @@ static void kvm_fixup_page_sizes(PowerPCCPU *cpu) >>>>> has_smmu_info = true; >>>>> } >>>>> >>>>> - rampagesize = getrampagesize(); >>>>> + rampagesize = qemu_getrampagesize(); >>>>> >>>>> /* Convert to QEMU form */ >>>>> memset(&env->sps, 0, sizeof(env->sps)); >>>>> @@ -2177,7 +2078,7 @@ uint64_t kvmppc_rma_size(uint64_t current_size, >>>>> unsigned int hash_shift) >>>>> /* Find the largest hardware supported page size that's less than >>>>> * or equal to the (logical) backing page size of guest RAM */ >>>>> kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info); >>>>> - rampagesize = getrampagesize(); >>>>> + rampagesize = qemu_getrampagesize(); >>>>> best_page_shift = 0; >>>>> >>>>> for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { >>>>> diff --git a/util/mmap-alloc.c b/util/mmap-alloc.c >>>>> index 5a85aa3c89..564c79109c 100644 >>>>> --- a/util/mmap-alloc.c >>>>> +++ b/util/mmap-alloc.c >>>>> @@ -39,6 +39,31 @@ size_t qemu_fd_getpagesize(int fd) >>>>> return getpagesize(); >>>>> } >>>>> >>>>> +size_t qemu_mempath_getpagesize(const char *mem_path) >>>>> +{ >>>>> +#ifdef CONFIG_LINUX >>>>> + struct statfs fs; >>>>> + int ret; >>>>> + >>>>> + do { >>>>> + ret = statfs(mem_path, &fs); >>>>> + } while (ret != 0 && errno == EINTR); >>>>> + >>>>> + if (ret != 0) { >>>>> + fprintf(stderr, "Couldn't statfs() memory path: %s\n", >>>>> + strerror(errno)); >>>>> + exit(1); >>>>> + } >>>>> + >>>>> + if (fs.f_type == HUGETLBFS_MAGIC) { >>>>> + /* It's hugepage, return the huge page size */ >>>>> + return fs.f_bsize; >>>>> + } >>>>> +#endif >>>>> + >>>>> + return getpagesize(); >>>>> +} >>>>> + >>>>> void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) >>>>> { >>>>> /* >>>> >>> >>> >> > > > > -- Alexey
signature.asc
Description: OpenPGP digital signature