Re: [Qemu-devel] [PATCH v3 24/25] tcg: Allocate a guard page after code_gen_buffer

Peter Maydell Wed, 23 Sep 2015 12:41:31 -0700

On 22 September 2015 at 13:25, Richard Henderson <r...@twiddle.net> wrote:
> This will catch any overflow of the buffer.
>
> Add a native win32 alternative for alloc_code_gen_buffer;
> remove the malloc alternative.
>
> Signed-off-by: Richard Henderson <r...@twiddle.net>
> ---
>  translate-all.c | 210 
> ++++++++++++++++++++++++++++++++------------------------
>  1 file changed, 119 insertions(+), 91 deletions(-)
>
> diff --git a/translate-all.c b/translate-all.c
> index 4c994bb..0049927 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -311,31 +311,6 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr)
>      return false;
>  }
>
> -#ifdef _WIN32
> -static __attribute__((unused)) void map_exec(void *addr, long size)
> -{
> -    DWORD old_protect;
> -    VirtualProtect(addr, size,
> -                   PAGE_EXECUTE_READWRITE, &old_protect);
> -}
> -#else
> -static __attribute__((unused)) void map_exec(void *addr, long size)
> -{
> -    unsigned long start, end, page_size;
> -
> -    page_size = getpagesize();
> -    start = (unsigned long)addr;
> -    start &= ~(page_size - 1);
> -
> -    end = (unsigned long)addr + size;
> -    end += page_size - 1;
> -    end &= ~(page_size - 1);
> -
> -    mprotect((void *)start, end - start,
> -             PROT_READ | PROT_WRITE | PROT_EXEC);
> -}
> -#endif
> -
>  void page_size_init(void)
>  {
>      /* NOTE: we can always suppose that qemu_host_page_size >=
> @@ -472,14 +447,6 @@ static inline PageDesc *page_find(tb_page_addr_t index)
>  #define USE_STATIC_CODE_GEN_BUFFER
>  #endif
>
> -/* ??? Should configure for this, not list operating systems here.  */
> -#if (defined(__linux__) \
> -    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
> -    || defined(__DragonFly__) || defined(__OpenBSD__) \
> -    || defined(__NetBSD__))
> -# define USE_MMAP
> -#endif
> -
>  /* Minimum size of the code gen buffer.  This number is randomly chosen,
>     but not so small that we can't have a fair number of TB's live.  */
>  #define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
> @@ -567,22 +534,102 @@ static inline void *split_cross_256mb(void *buf1, 
> size_t size1)
>  static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
>      __attribute__((aligned(CODE_GEN_ALIGN)));
>
> +# ifdef _WIN32


Why the space before ifdef here ?

> +static inline void do_protect(void *addr, long size, int prot)
> +{
> +    DWORD old_protect;
> +    VirtualProtect(addr, size, PAGE_EXECUTE_READWRITE, &old_protect);

The 'prot' argument isn't used -- did you mean to pass it
in as VirtualProtect argument 3 ?

> +}
> +
> +static inline void map_exec(void *addr, long size)
> +{
> +    do_protect(addr, size, PAGE_EXECUTE_READWRITE);
> +}
> +
> +static inline void map_none(void *addr, long size)
> +{
> +    do_protect(addr, size, PAGE_NOACCESS);
> +}
> +# else
> +static inline void do_protect(void *addr, long size, int prot)
> +{
> +    uintptr_t start, end;
> +
> +    start = (uintptr_t)addr;
> +    start &= qemu_real_host_page_mask;
> +
> +    end = (uintptr_t)addr + size;
> +    end = ROUND_UP(end, qemu_real_host_page_size);
> +
> +    mprotect((void *)start, end - start, prot);
> +}
> +
> +static inline void map_exec(void *addr, long size)
> +{
> +    do_protect(addr, size, PROT_READ | PROT_WRITE | PROT_EXEC);
> +}
> +
> +static inline void map_none(void *addr, long size)
> +{
> +    do_protect(addr, size, PROT_NONE);
> +}
> +# endif /* WIN32 */
> +
>  static inline void *alloc_code_gen_buffer(void)
>  {
>      void *buf = static_code_gen_buffer;
> +    size_t full_size, size;
> +
> +    /* The size of the buffer, rounded down to end on a page boundary.  */
> +    full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer))
> +                 & qemu_real_host_page_mask) - (uintptr_t)buf;
> +
> +    /* Reserve a guard page.  */
> +    size = full_size - qemu_real_host_page_size;
> +
> +    /* Honor a command-line option limiting the size of the buffer.  */
> +    if (size > tcg_ctx.code_gen_buffer_size) {
> +        size = (((uintptr_t)buf + tcg_ctx.code_gen_buffer_size)
> +                & qemu_real_host_page_mask) - (uintptr_t)buf;
> +    }
> +    tcg_ctx.code_gen_buffer_size = size;
> +
>  #ifdef __mips__
> -    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
> -        buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size);
> +    if (cross_256mb(buf, size)) {
> +        buf = split_cross_256mb(buf, size);
> +        size = tcg_ctx.code_gen_buffer_size;
>      }
>  #endif
> -    map_exec(buf, tcg_ctx.code_gen_buffer_size);
> +
> +    map_exec(buf, size);
> +    map_none(buf + size, qemu_real_host_page_size);
> +    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);

I think we're now doing the MADV_HUGEPAGE over "buffer size
minus a page" rather than "buffer size". Does that mean
we've gone from doing the madvise on a whole number of
hugepages to doing it on something that's not a whole number
of hugepages, and if so does the kernel decide not to use
hugepages here?

(aka, should we make the buffer size we allocate size + a
guard page, rather than taking the guard page out of the size?)


> +
>      return buf;
>  }
> -#elif defined(USE_MMAP)
> +#elif defined(_WIN32)
> +static inline void *alloc_code_gen_buffer(void)
> +{
> +    size_t size = tcg_ctx.code_gen_buffer_size;
> +    void *buf1, *buf2;
> +
> +    /* Perform the allocation in two steps, so that the guard page
> +       is reserved but uncommitted.  */
> +    buf1 = VirtualAlloc(NULL, size + qemu_real_host_page_size,
> +                        MEM_RESERVE, PAGE_NOACCESS);
> +    if (buf1 != NULL) {
> +        buf2 = VirtualAlloc(buf1, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
> +        assert(buf1 == buf2);
> +    }
> +
> +    return buf1;
> +}
> +#else
>  static inline void *alloc_code_gen_buffer(void)
>  {
>      int flags = MAP_PRIVATE | MAP_ANONYMOUS;
>      uintptr_t start = 0;
> +    size_t size = tcg_ctx.code_gen_buffer_size;
>      void *buf;
>
>      /* Constrain the position of the buffer based on the host cpu.
> @@ -598,86 +645,70 @@ static inline void *alloc_code_gen_buffer(void)
>         Leave the choice of exact location with the kernel.  */
>      flags |= MAP_32BIT;
>      /* Cannot expect to map more than 800MB in low memory.  */
> -    if (tcg_ctx.code_gen_buffer_size > 800u * 1024 * 1024) {
> -        tcg_ctx.code_gen_buffer_size = 800u * 1024 * 1024;
> +    if (size > 800u * 1024 * 1024) {
> +        tcg_ctx.code_gen_buffer_size = size = 800u * 1024 * 1024;
>      }
>  # elif defined(__sparc__)
>      start = 0x40000000ul;
>  # elif defined(__s390x__)
>      start = 0x90000000ul;
>  # elif defined(__mips__)
> -    /* ??? We ought to more explicitly manage layout for softmmu too.  */
> -#  ifdef CONFIG_USER_ONLY
> -    start = 0x68000000ul;
> -#  elif _MIPS_SIM == _ABI64
> +#  if _MIPS_SIM == _ABI64
>      start = 0x128000000ul;
>  #  else
>      start = 0x08000000ul;
>  #  endif
>  # endif
>
> -    buf = mmap((void *)start, tcg_ctx.code_gen_buffer_size,
> -               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
> +    buf = mmap((void *)start, size + qemu_real_host_page_size,
> +               PROT_NONE, flags, -1, 0);
>      if (buf == MAP_FAILED) {
>          return NULL;
>      }
>
>  #ifdef __mips__
> -    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
> +    if (cross_256mb(buf, size)) {
>          /* Try again, with the original still mapped, to avoid re-acquiring
>             that 256mb crossing.  This time don't specify an address.  */
> -        size_t size2, size1 = tcg_ctx.code_gen_buffer_size;
> -        void *buf2 = mmap(NULL, size1, PROT_WRITE | PROT_READ | PROT_EXEC,
> -                          flags, -1, 0);
> -        if (buf2 != MAP_FAILED) {
> -            if (!cross_256mb(buf2, size1)) {
> +        size_t size2;
> +        void *buf2 = mmap(NULL, size + qemu_real_host_page_size,
> +                          PROT_NONE, flags, -1, 0);
> +        switch (buf2 != MAP_FAILED) {
> +        case 1:
> +            if (!cross_256mb(buf2, size)) {
>                  /* Success!  Use the new buffer.  */
> -                munmap(buf, size1);
> -                return buf2;
> +                munmap(buf, size);
> +                break;
>              }
>              /* Failure.  Work with what we had.  */
> -            munmap(buf2, size1);
> +            munmap(buf2, size);
> +            /* fallthru */
> +        default:
> +            /* Split the original buffer.  Free the smaller half.  */
> +            buf2 = split_cross_256mb(buf, size);
> +            size2 = tcg_ctx.code_gen_buffer_size;
> +            if (buf == buf2) {
> +                munmap(buf + size2 + qemu_real_host_page_size, size - size2);
> +            } else {
> +                munmap(buf, size - size2);
> +            }
> +            size = size2;
> +            break;
>          }
> -
> -        /* Split the original buffer.  Free the smaller half.  */
> -        buf2 = split_cross_256mb(buf, size1);
> -        size2 = tcg_ctx.code_gen_buffer_size;
> -        munmap(buf + (buf == buf2 ? size2 : 0), size1 - size2);
> -        return buf2;
> +        buf = buf2;
>      }
>  #endif
>
> -    return buf;
> -}
> -#else
> -static inline void *alloc_code_gen_buffer(void)
> -{
> -    void *buf = g_try_malloc(tcg_ctx.code_gen_buffer_size);
> +    /* Make the final buffer accessable.  The guard page at the end
> +       will remain inaccessable with PROT_NONE.  */

"accessible"; "inaccessible".

> +    mprotect(buf, size, PROT_WRITE | PROT_READ | PROT_EXEC);
>
> -    if (buf == NULL) {
> -        return NULL;
> -    }
> +    /* Request large pages for the buffer.  */
> +    qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
>
> -#ifdef __mips__
> -    if (cross_256mb(buf, tcg_ctx.code_gen_buffer_size)) {
> -        void *buf2 = g_malloc(tcg_ctx.code_gen_buffer_size);
> -        if (buf2 != NULL && !cross_256mb(buf2, size1)) {
> -            /* Success!  Use the new buffer.  */
> -            free(buf);
> -            buf = buf2;
> -        } else {
> -            /* Failure.  Work with what we had.  Since this is malloc
> -               and not mmap, we can't free the other half.  */
> -            free(buf2);
> -            buf = split_cross_256mb(buf, tcg_ctx.code_gen_buffer_size);
> -        }
> -    }
> -#endif
> -
> -    map_exec(buf, tcg_ctx.code_gen_buffer_size);
>      return buf;
>  }
> -#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
> +#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
>
>  static inline void code_gen_alloc(size_t tb_size)
>  {
> @@ -688,9 +719,6 @@ static inline void code_gen_alloc(size_t tb_size)
>          exit(1);
>      }
>
> -    qemu_madvise(tcg_ctx.code_gen_buffer, tcg_ctx.code_gen_buffer_size,
> -                 QEMU_MADV_HUGEPAGE);
> -
>      /* Estimate a good size for the number of TBs we can support.  We
>         still haven't deducted the prologue from the buffer size here,
>         but that's minimal and won't affect the estimate much.  */
> @@ -708,8 +736,8 @@ static inline void code_gen_alloc(size_t tb_size)
>  void tcg_exec_init(unsigned long tb_size)
>  {
>      cpu_gen_init();
> -    code_gen_alloc(tb_size);
>      page_init();
> +    code_gen_alloc(tb_size);
>  #if defined(CONFIG_SOFTMMU)
>      /* There's no guest base to take into account, so go ahead and
>         initialize the prologue now.  */
> --
> 2.4.3
>

thanks
-- PMM

Re: [Qemu-devel] [PATCH v3 24/25] tcg: Allocate a guard page after code_gen_buffer

Reply via email to