This adds an adjustable limit for the total memory usage of the overlap prevention structures.
Signed-off-by: Max Reitz <mre...@redhat.com> --- block/qcow2-overlap.c | 183 +++++++++++++++++++++++++++++++++++++++++++++++--- block/qcow2.c | 2 +- block/qcow2.h | 2 +- 3 files changed, 177 insertions(+), 10 deletions(-) diff --git a/block/qcow2-overlap.c b/block/qcow2-overlap.c index c0a3d6d..a2ba5c7 100644 --- a/block/qcow2-overlap.c +++ b/block/qcow2-overlap.c @@ -23,9 +23,11 @@ */ #include "block/block_int.h" +#include "sysemu/block-backend.h" #include "qemu-common.h" #include "qemu/range.h" #include "qcow2.h" +#include "qapi-event.h" /* Number of clusters which are covered by each metadata window; * note that this may not exceed 2^16 as long as @@ -68,12 +70,50 @@ struct Qcow2MetadataList { unsigned current_age; + size_t mem_usage, max_mem_usage; + /* Index into the windows array */ int *cached_windows; size_t nb_cached_windows; }; /** + * Logs newly allocated memory in the @mdl->mem_usage field. Returns true iff + * @mdl->mem_usage + @size * @nmemb <= @mdl->max_mem_usage, that is, if + * allocating that memory is still within limits. In that case, @mdl->mem_usage + * is increased by @size * @nmemb; otherwise, it is left unchanged. + */ +static bool increase_mdl_mem_usage(Qcow2MetadataList *mdl, size_t size, + size_t nmemb) +{ + if (nmemb && SIZE_MAX / nmemb < size) { + /* Overflow */ + return false; + } + + size *= nmemb; + if (mdl->mem_usage + size < mdl->mem_usage || + mdl->mem_usage + size > mdl->max_mem_usage) + { + return false; + } + + mdl->mem_usage += size; + return true; +} + +static void signal_memory_excess(BlockDriverState *bs, uint64_t start_cluster, + uint64_t nb_clusters) +{ + BDRVQcowState *s = bs->opaque; + const char *ref = bs->blk ? blk_name(bs->blk) : bs->node_name; + + qapi_event_send_qcow2_overlap_check_memory_limit_reached(ref, true, + start_cluster * s->cluster_size, true, nb_clusters * s->cluster_size, + &error_abort); +} + +/** * Destroys the cached window bitmap. If it has been modified, the fragment list * will be rebuilt accordingly. */ @@ -81,11 +121,16 @@ static void destroy_window_bitmap(Qcow2MetadataList *mdl, Qcow2MetadataWindow *window) { Qcow2MetadataFragment *new_fragments; + bool increase_ret; if (!window->bitmap) { return; } + /* Treat the bitmap as freed already; while not really correct, this gives + * us space for the fragment list */ + mdl->mem_usage -= WINDOW_SIZE; + if (window->bitmap_modified) { int bitmap_i, fragment_i = 0; QCow2MetadataOverlap current_types = 0; @@ -105,6 +150,9 @@ static void destroy_window_bitmap(Qcow2MetadataList *mdl, } else { if (current_types && current_nb_clusters) { if (fragment_i >= window->fragments_array_size) { + mdl->mem_usage -= sizeof(Qcow2MetadataFragment) + * window->fragments_array_size; + window->fragments_array_size = 3 * window->fragments_array_size / 2 + 1; if (sizeof(Qcow2MetadataFragment) @@ -115,6 +163,16 @@ static void destroy_window_bitmap(Qcow2MetadataList *mdl, goto fail; } + increase_ret = increase_mdl_mem_usage(mdl, + sizeof(Qcow2MetadataFragment), + window->fragments_array_size); + /* This can never be false: Because mem_usage was + * reduced by WINDOW_SIZE at the beginning of this + * function and the size of the fragment list is certain + * to be less than WINDOW_SIZE, there must be enough + * memory available. */ + assert(increase_ret); + new_fragments = g_try_renew(Qcow2MetadataFragment, window->fragments, @@ -148,6 +206,7 @@ static void destroy_window_bitmap(Qcow2MetadataList *mdl, new_fragments = g_try_renew(Qcow2MetadataFragment, window->fragments, window->nb_fragments); if (new_fragments) { + mdl->mem_usage -= window->fragments_array_size - window->nb_fragments; window->fragments = new_fragments; window->fragments_array_size = window->nb_fragments; } @@ -163,12 +222,50 @@ fail: window->fragments = NULL; window->nb_fragments = 0; window->fragments_array_size = 0; + + /* window->bitmap is not destroyed, so we need to account for it (because + * we "borrowed" the space before) */ + increase_ret = increase_mdl_mem_usage(mdl, sizeof(uint8_t), WINDOW_SIZE); + /* If we land here, no additional memory was allocated (only + * window->fragments may grow in this function, but at this point it has + * been freed), so we must be able to reclaim the space */ + assert(increase_ret); +} + +/** + * Destroys the oldest bitmap. + * Returns the index in mdl->cached_windows[] of the window whose bitmap was + * destroyed, or -1 if the cache is empty. + */ +static int destroy_oldest_bitmap(Qcow2MetadataList *mdl) +{ + int cache_i, oldest_cache_i = -1; + unsigned oldest_cache_age = 0; + + for (cache_i = 0; cache_i < mdl->nb_cached_windows; cache_i++) { + if (mdl->cached_windows[cache_i] >= 0) { + unsigned age = mdl->current_age + - mdl->windows[mdl->cached_windows[cache_i]].age; + if (age > oldest_cache_age) { + oldest_cache_age = age; + oldest_cache_i = cache_i; + } + } + } + + if (oldest_cache_i < 0) { + return -1; + } + + destroy_window_bitmap(mdl, + &mdl->windows[mdl->cached_windows[oldest_cache_i]]); + return oldest_cache_i; } /** * Creates a bitmap from the fragment list. */ -static void build_window_bitmap(Qcow2MetadataList *mdl, +static bool build_window_bitmap(Qcow2MetadataList *mdl, Qcow2MetadataWindow *window) { int cache_i, oldest_cache_i = -1, i; @@ -202,9 +299,21 @@ static void build_window_bitmap(Qcow2MetadataList *mdl, /* Maybe there already is a bitmap because it was more space-efficient than * the range list representation */ if (window->bitmap) { - return; + return true; } + while (!increase_mdl_mem_usage(mdl, sizeof(uint8_t), WINDOW_SIZE)) { + /* It is better to try to decrease memory usage until this bitmap can + * fit than just to give up. Otherwise, we may end up at a point where + * the area covered by the cache remains unchanged forever. */ + mdl->cached_windows[cache_i] = -1; + cache_i = destroy_oldest_bitmap(mdl); + if (cache_i < 0) { + window->bitmap = NULL; + return false; + } + mdl->cached_windows[cache_i] = window - mdl->windows; + } window->bitmap = g_new0(uint8_t, WINDOW_SIZE); for (i = 0; i < window->nb_fragments; i++) { @@ -215,6 +324,8 @@ static void build_window_bitmap(Qcow2MetadataList *mdl, } window->bitmap_modified = false; + + return true; } /** @@ -251,10 +362,24 @@ void qcow2_metadata_list_enter(BlockDriverState *bs, uint64_t offset, * the array to exactly the required size */ Qcow2MetadataWindow *new_windows; + if (!increase_mdl_mem_usage(s->metadata_list, + sizeof(Qcow2MetadataWindow), + window_i + 1 + - s->metadata_list->nb_windows)) + { + /* This will happen for any cluster from here until end_cluster, + * so we can just abort immediately */ + signal_memory_excess(bs, window_i * WINDOW_SIZE, + end_cluster - current_cluster); + return; + } + new_windows = g_try_renew(Qcow2MetadataWindow, s->metadata_list->windows, window_i + 1); if (!new_windows) { + signal_memory_excess(bs, window_i * WINDOW_SIZE, + end_cluster - current_cluster); return; } @@ -268,7 +393,10 @@ void qcow2_metadata_list_enter(BlockDriverState *bs, uint64_t offset, window = &s->metadata_list->windows[window_i]; if (!window->bitmap) { - build_window_bitmap(s->metadata_list, window); + if (!build_window_bitmap(s->metadata_list, window)) { + signal_memory_excess(bs, window_i * WINDOW_SIZE, WINDOW_SIZE); + goto next_window; + } } for (bitmap_i = bitmap_i_start; bitmap_i < bitmap_i_end; bitmap_i++) { @@ -278,6 +406,7 @@ void qcow2_metadata_list_enter(BlockDriverState *bs, uint64_t offset, window->age = s->metadata_list->current_age++; window->bitmap_modified = true; +next_window: /* Go to the next window */ current_cluster += WINDOW_SIZE - bitmap_i_start; } @@ -320,7 +449,17 @@ void qcow2_metadata_list_remove(BlockDriverState *bs, uint64_t offset, window = &s->metadata_list->windows[window_i]; if (!window->bitmap) { - build_window_bitmap(s->metadata_list, window); + if (!build_window_bitmap(s->metadata_list, window)) { + signal_memory_excess(bs, window_i * WINDOW_SIZE, WINDOW_SIZE); + /* We *must* drop the given metadata types from the list, no + * matter what; therefore, the best thing we can do is empty + * the fragment list */ + g_free(window->fragments); + window->fragments = NULL; + window->nb_fragments = 0; + window->fragments_array_size = 0; + goto next_window; + } } for (bitmap_i = bitmap_i_start; bitmap_i < bitmap_i_end; bitmap_i++) { @@ -330,12 +469,14 @@ void qcow2_metadata_list_remove(BlockDriverState *bs, uint64_t offset, window->age = s->metadata_list->current_age++; window->bitmap_modified = true; +next_window: /* Go to the next window */ current_cluster += WINDOW_SIZE - bitmap_i_start; } } -static int single_check_metadata_overlap(Qcow2MetadataList *mdl, int ign, +static int single_check_metadata_overlap(BlockDriverState *bs, + Qcow2MetadataList *mdl, int ign, uint64_t cluster) { uint64_t window_i = cluster / WINDOW_SIZE; @@ -348,7 +489,10 @@ static int single_check_metadata_overlap(Qcow2MetadataList *mdl, int ign, window = &mdl->windows[window_i]; if (!window->bitmap) { - build_window_bitmap(mdl, window); + if (!build_window_bitmap(mdl, window)) { + signal_memory_excess(bs, window_i * WINDOW_SIZE, WINDOW_SIZE); + return 0; + } } window->age = mdl->current_age++; @@ -372,7 +516,7 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, for (current_cluster = start_cluster; current_cluster < end_cluster; current_cluster++) { - ret |= single_check_metadata_overlap(s->metadata_list, ign, + ret |= single_check_metadata_overlap(bs, s->metadata_list, ign, current_cluster); } @@ -380,16 +524,33 @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset, } int qcow2_create_empty_metadata_list(BlockDriverState *bs, size_t cache_size, - Error **errp) + size_t max_total_mem_size, Error **errp) { BDRVQcowState *s = bs->opaque; int ret; size_t cache_entries, i; + if (max_total_mem_size < sizeof(Qcow2MetadataList)) { + error_setg(errp, "Cannot allocate metadata list"); + ret = -ENOMEM; + goto fail; + } + s->metadata_list = g_new0(Qcow2MetadataList, 1); + s->metadata_list->max_mem_usage = max_total_mem_size; + s->metadata_list->mem_usage = sizeof(Qcow2MetadataList); + s->metadata_list->nb_windows = DIV_ROUND_UP(bdrv_nb_sectors(bs->file), (uint64_t)s->cluster_sectors * WINDOW_SIZE); + if (!increase_mdl_mem_usage(s->metadata_list, sizeof(Qcow2MetadataWindow), + s->metadata_list->nb_windows)) + { + error_setg(errp, "Cannot allocate metadata overlap check windows"); + ret = -ENOMEM; + goto fail; + } + s->metadata_list->windows = g_try_new0(Qcow2MetadataWindow, s->metadata_list->nb_windows); if (s->metadata_list->nb_windows && !s->metadata_list->windows) { @@ -408,6 +569,12 @@ int qcow2_create_empty_metadata_list(BlockDriverState *bs, size_t cache_size, } s->metadata_list->nb_cached_windows = cache_entries; + if (!increase_mdl_mem_usage(s->metadata_list, sizeof(int), cache_entries)) { + error_setg(errp, "Cannot allocate metadata overlap cache pointers"); + ret = -ENOMEM; + goto fail; + } + s->metadata_list->cached_windows = g_try_new(int, cache_entries); if (!s->metadata_list->cached_windows) { error_setg(errp, "Could not allocate metadata overlap cache pointers"); diff --git a/block/qcow2.c b/block/qcow2.c index 0e7b646..d1257b1 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -768,7 +768,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags, if (s->overlap_check) { /* TODO: Let the user override this default */ - ret = qcow2_create_empty_metadata_list(bs, 65536, errp); + ret = qcow2_create_empty_metadata_list(bs, 65536, SIZE_MAX, errp); if (ret < 0) { goto fail; } diff --git a/block/qcow2.h b/block/qcow2.h index e48dcdc..7f4059e 100644 --- a/block/qcow2.h +++ b/block/qcow2.h @@ -595,7 +595,7 @@ void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table); /* qcow2-overlap.c functions */ int qcow2_create_empty_metadata_list(BlockDriverState *bs, size_t cache_size, - Error **errp); + size_t max_total_mem_size, Error **errp); void qcow2_metadata_list_destroy(BlockDriverState *bs); void qcow2_metadata_list_enter(BlockDriverState *bs, uint64_t offset, int nb_clusters, QCow2MetadataOverlap type); -- 2.4.6