On Mon, Nov 17, 2014 at 10:08:53PM +0200, Michael S. Tsirkin wrote: > Add API to manage on-device RAM. > This looks just like regular RAM from migration POV, > but has two special properties internally: > > - it is never exposed to guest > - block is sized on migration, making it easier to extend > without breaking migration compatibility or wasting > virtual memory > - callers must specify an upper bound on size > > Device is notified on resize, so it can adjust if necessary. > > qemu_ram_alloc_device allocates this memory, qemu_ram_resize resizes it. > > Signed-off-by: Michael S. Tsirkin <m...@redhat.com>
Minor clarification: the need to supply max size helps simplify code, but it's also a security feature: the next patch uses that to validate incoming stream, preventing DOS attacks by making qemu allocate huge amounts of RAM. > --- > include/exec/cpu-all.h | 8 +++- > include/exec/ram_addr.h | 7 +++ > exec.c | 113 > +++++++++++++++++++++++++++++++++++++++++++----- > 3 files changed, 115 insertions(+), 13 deletions(-) > > diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h > index 62f5581..26eb9b2 100644 > --- a/include/exec/cpu-all.h > +++ b/include/exec/cpu-all.h > @@ -299,11 +299,15 @@ CPUArchState *cpu_copy(CPUArchState *env); > > /* memory API */ > > -typedef struct RAMBlock { > +typedef struct RAMBlock RAMBlock; > + > +struct RAMBlock { > struct MemoryRegion *mr; > uint8_t *host; > ram_addr_t offset; > ram_addr_t length; > + ram_addr_t max_length; > + void (*resized)(const char*, uint64_t length, void *host); > uint32_t flags; > char idstr[256]; > /* Reads can take either the iothread or the ramlist lock. > @@ -311,7 +315,7 @@ typedef struct RAMBlock { > */ > QTAILQ_ENTRY(RAMBlock) next; > int fd; > -} RAMBlock; > +}; > > static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset) > { > diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h > index d7e5238..72ab12b 100644 > --- a/include/exec/ram_addr.h > +++ b/include/exec/ram_addr.h > @@ -28,12 +28,19 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, > MemoryRegion *mr, > ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, > MemoryRegion *mr, Error **errp); > ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp); > +ram_addr_t qemu_ram_alloc_device(ram_addr_t size, ram_addr_t max_size, > + void (*resized)(const char*, > + uint64_t length, > + void *host), > + MemoryRegion *mr, Error **errp); > int qemu_get_ram_fd(ram_addr_t addr); > void *qemu_get_ram_block_host_ptr(ram_addr_t addr); > void *qemu_get_ram_ptr(ram_addr_t addr); > void qemu_ram_free(ram_addr_t addr); > void qemu_ram_free_from_ptr(ram_addr_t addr); > > +int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp); > + > static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, > ram_addr_t length, > unsigned client) > diff --git a/exec.c b/exec.c > index 9648669..a177816 100644 > --- a/exec.c > +++ b/exec.c > @@ -75,6 +75,11 @@ static MemoryRegion io_mem_unassigned; > /* RAM is mmap-ed with MAP_SHARED */ > #define RAM_SHARED (1 << 1) > > +/* On-device RAM allocated with g_malloc: supports realloc, > + * not accessible to vcpu on kvm. > + */ > +#define RAM_DEVICE (1 << 2) > + > #endif > > struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); > @@ -1186,7 +1191,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size) > QTAILQ_FOREACH(block, &ram_list.blocks, next) { > ram_addr_t end, next = RAM_ADDR_MAX; > > - end = block->offset + block->length; > + end = block->offset + block->max_length; > > QTAILQ_FOREACH(next_block, &ram_list.blocks, next) { > if (next_block->offset >= end) { > @@ -1214,7 +1219,7 @@ ram_addr_t last_ram_offset(void) > ram_addr_t last = 0; > > QTAILQ_FOREACH(block, &ram_list.blocks, next) > - last = MAX(last, block->offset + block->length); > + last = MAX(last, block->offset + block->max_length); > > return last; > } > @@ -1296,6 +1301,50 @@ static int memory_try_enable_merging(void *addr, > size_t len) > return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE); > } > > +int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp) > +{ > + RAMBlock *block = find_ram_block(base); > + > + assert(block); > + > + if (block->length == newsize) { > + return 0; > + } > + > + if (!(block->flags & RAM_DEVICE)) { > + error_setg_errno(errp, EINVAL, > + "Length mismatch: %s: 0x" RAM_ADDR_FMT > + " in != 0x" RAM_ADDR_FMT, block->idstr, > + newsize, block->length); > + return -EINVAL; > + } > + > + if (block->max_length < newsize) { > + error_setg_errno(errp, EINVAL, > + "Length too large: %s: 0x" RAM_ADDR_FMT > + " > 0x" RAM_ADDR_FMT, block->idstr, > + newsize, block->max_length); > + return -EINVAL; > + } > + > + block->host = g_realloc(block->host, newsize); > + if (!block->host) { > + error_setg_errno(errp, errno, > + "cannot allocate guest memory '%s'", > + memory_region_name(block->mr)); > + return -ENOMEM; > + } > + > + cpu_physical_memory_clear_dirty_range_nocode(block->offset, > block->length); > + block->length = newsize; > + memset(block->host, 0, block->length); > + cpu_physical_memory_set_dirty_range_nocode(block->offset, block->length); > + if (block->resized) { > + block->resized(block->idstr, newsize, block->host); > + } > + return 0; > +} > + > static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp) > { > RAMBlock *block; > @@ -1308,7 +1357,16 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, > Error **errp) > new_block->offset = find_ram_offset(new_block->length); > > if (!new_block->host) { > - if (xen_enabled()) { > + if (new_block->flags & RAM_DEVICE) { > + new_block->host = g_malloc0(new_block->length); > + if (!new_block->host) { > + error_setg_errno(errp, errno, > + "cannot allocate guest memory '%s'", > + memory_region_name(new_block->mr)); > + qemu_mutex_unlock_ramlist(); > + return -1; > + } > + } else if (xen_enabled()) { > xen_ram_alloc(new_block->offset, new_block->length, > new_block->mr); > } else { > new_block->host = phys_mem_alloc(new_block->length, > @@ -1352,12 +1410,14 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, > Error **errp) > } > cpu_physical_memory_set_dirty_range(new_block->offset, > new_block->length); > > - qemu_ram_setup_dump(new_block->host, new_block->length); > - qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE); > - qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK); > + if (!(new_block->flags & RAM_DEVICE)) { > + qemu_ram_setup_dump(new_block->host, new_block->length); > + qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE); > + qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK); > > - if (kvm_enabled()) { > - kvm_setup_guest_memory(new_block->host, new_block->length); > + if (kvm_enabled()) { > + kvm_setup_guest_memory(new_block->host, new_block->length); > + } > } > > return new_block->offset; > @@ -1392,6 +1452,7 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, > MemoryRegion *mr, > new_block = g_malloc0(sizeof(*new_block)); > new_block->mr = mr; > new_block->length = size; > + new_block->max_length = size; > new_block->flags = share ? RAM_SHARED : 0; > new_block->host = file_ram_alloc(new_block, size, > mem_path, errp); > @@ -1410,7 +1471,12 @@ ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, > MemoryRegion *mr, > } > #endif > > -ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, > +static > +ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size, > + void (*resized)(const char*, > + uint64_t length, > + void *host), > + void *host, bool device, > MemoryRegion *mr, Error **errp) > { > RAMBlock *new_block; > @@ -1418,14 +1484,21 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, > void *host, > Error *local_err = NULL; > > size = TARGET_PAGE_ALIGN(size); > + max_size = TARGET_PAGE_ALIGN(max_size); > new_block = g_malloc0(sizeof(*new_block)); > new_block->mr = mr; > + new_block->resized = resized; > new_block->length = size; > + new_block->max_length = max_size; > + assert(max_size >= size); > new_block->fd = -1; > new_block->host = host; > if (host) { > new_block->flags |= RAM_PREALLOC; > } > + if (device) { > + new_block->flags |= RAM_DEVICE; > + } > addr = ram_block_add(new_block, &local_err); > if (local_err) { > g_free(new_block); > @@ -1435,9 +1508,24 @@ ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, > void *host, > return addr; > } > > +ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host, > + MemoryRegion *mr, Error **errp) > +{ > + return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp); > +} > + > ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp) > { > - return qemu_ram_alloc_from_ptr(size, NULL, mr, errp); > + return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp); > +} > + > +ram_addr_t qemu_ram_alloc_device(ram_addr_t size, ram_addr_t maxsz, > + void (*resized)(const char*, > + uint64_t length, > + void *host), > + MemoryRegion *mr, Error **errp) > +{ > + return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, > errp); > } > > void qemu_ram_free_from_ptr(ram_addr_t addr) > @@ -1471,6 +1559,8 @@ void qemu_ram_free(ram_addr_t addr) > ram_list.version++; > if (block->flags & RAM_PREALLOC) { > ; > + } else if (block->flags & RAM_DEVICE) { > + g_free(block->host); > } else if (xen_enabled()) { > xen_invalidate_map_cache_entry(block->host); > #ifndef _WIN32 > @@ -1501,7 +1591,8 @@ void qemu_ram_remap(ram_addr_t addr, ram_addr_t length) > offset = addr - block->offset; > if (offset < block->length) { > vaddr = ramblock_ptr(block, offset); > - if (block->flags & RAM_PREALLOC) { > + if (block->flags & RAM_PREALLOC || > + block->flags & RAM_DEVICE) { > ; > } else if (xen_enabled()) { > abort(); > -- > MST >