For storing requests use an rbtree, here are add basic operations on the rbtree to work with cache nodes.
Signed-off-by: Pavel Butsykin <pbutsy...@virtuozzo.com> --- block/pcache.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 189 insertions(+), 1 deletion(-) diff --git a/block/pcache.c b/block/pcache.c index 7f221d6..f5022f9 100644 --- a/block/pcache.c +++ b/block/pcache.c @@ -27,6 +27,7 @@ #include "block/raw-aio.h" #include "qapi/error.h" #include "qapi/qmp/qstring.h" +#include "qemu/rbtree.h" #define PCACHE_DEBUG @@ -37,9 +38,53 @@ #define DPRINTF(fmt, ...) do { } while (0) #endif +typedef struct RbNodeKey { + uint64_t num; + uint32_t size; +} RbNodeKey; + +typedef struct BlockNode { + struct RbNode rb_node; + union { + RbNodeKey key; + struct { + uint64_t sector_num; + uint32_t nb_sectors; + }; + }; + QTAILQ_ENTRY(BlockNode) entry; +} BlockNode; + +typedef struct PCNode { + BlockNode cm; + + uint8_t *data; +} PCNode; + +typedef struct ReqStor { + struct { + struct RbRoot root; + CoMutex lock; + } tree; + + uint32_t curr_size; +} ReqStor; + +typedef struct BDRVPCacheState { + BlockDriverState **bs; + + ReqStor pcache; + + struct { + QTAILQ_HEAD(pcache_head, BlockNode) head; + CoMutex lock; + } list; +} BDRVPCacheState; + typedef struct PrefCacheAIOCB { BlockAIOCB common; + BDRVPCacheState *s; QEMUIOVector *qiov; uint64_t sector_num; uint32_t nb_sectors; @@ -64,6 +109,124 @@ static QemuOptsList runtime_opts = { }, }; +#define PCNODE(_n) ((PCNode *)(_n)) + +static int pcache_key_cmp(const RbNodeKey *key1, const RbNodeKey *key2) +{ + assert(key1 != NULL); + assert(key2 != NULL); + + if (key1->num >= key2->num + key2->size) { + return 1; + } + if (key1->num + key1->size <= key2->num) { + return -1; + } + + return 0; +} + +static void *node_insert(struct RbRoot *root, BlockNode *node) +{ + struct RbNode **new = &(root->rb_node), *parent = NULL; + + /* Figure out where to put new node */ + while (*new) { + BlockNode *this = container_of(*new, BlockNode, rb_node); + int result = pcache_key_cmp(&node->key, &this->key); + if (result == 0) { + return this; + } + parent = *new; + new = result < 0 ? &((*new)->rb_left) : &((*new)->rb_right); + } + /* Add new node and rebalance tree. */ + rb_link_node(&node->rb_node, parent, new); + rb_insert_color(&node->rb_node, root); + + return node; +} + +static inline PCNode *pcache_node_insert(struct RbRoot *root, PCNode *node) +{ + return node_insert(root, &node->cm); +} + +static inline void pcache_node_free(PCNode *node) +{ + g_free(node->data); + g_slice_free1(sizeof(*node), node); +} + +static inline void *pcache_node_alloc(RbNodeKey* key) +{ + PCNode *node = g_slice_alloc(sizeof(*node)); + + node->cm.sector_num = key->num; + node->cm.nb_sectors = key->size; + node->data = g_malloc(node->cm.nb_sectors << BDRV_SECTOR_BITS); + + return node; +} + +static bool pcache_node_find_and_create(PrefCacheAIOCB *acb, RbNodeKey *key, + PCNode **out_node) +{ + BDRVPCacheState *s = acb->s; + PCNode *new_node = pcache_node_alloc(key); + PCNode *found; + + qemu_co_mutex_lock(&s->pcache.tree.lock); + found = pcache_node_insert(&s->pcache.tree.root, new_node); + qemu_co_mutex_unlock(&s->pcache.tree.lock); + if (found != new_node) { + pcache_node_free(new_node); + *out_node = found; + return false; + } + atomic_add(&s->pcache.curr_size, new_node->cm.nb_sectors); + + qemu_co_mutex_lock(&s->list.lock); + QTAILQ_INSERT_HEAD(&s->list.head, &new_node->cm, entry); + qemu_co_mutex_unlock(&s->list.lock); + + *out_node = new_node; + return true; +} + +static inline void prefetch_init_key(PrefCacheAIOCB *acb, RbNodeKey* key) +{ + key->num = acb->sector_num; + key->size = acb->nb_sectors; +} + +enum { + PREFETCH_NEW_NODE = 0, + PREFETCH_FULL_UP = 1, + PREFETCH_PART_UP = 2 +}; + +static int32_t pcache_prefetch(PrefCacheAIOCB *acb) +{ + RbNodeKey key; + PCNode *node = NULL; + + prefetch_init_key(acb, &key); + if (pcache_node_find_and_create(acb, &key, &node)) { + return PREFETCH_NEW_NODE; + } + + /* Node covers the whole request */ + if (node->cm.sector_num <= acb->sector_num && + node->cm.sector_num + node->cm.nb_sectors >= acb->sector_num + + acb->nb_sectors) + { + return PREFETCH_FULL_UP; + } + + return PREFETCH_PART_UP; +} + static void pcache_aio_cb(void *opaque, int ret) { PrefCacheAIOCB *acb = opaque; @@ -80,6 +243,7 @@ static PrefCacheAIOCB *pcache_aio_get(BlockDriverState *bs, int64_t sector_num, { PrefCacheAIOCB *acb = qemu_aio_get(&pcache_aiocb_info, bs, cb, opaque); + acb->s = bs->opaque; acb->sector_num = sector_num; acb->nb_sectors = nb_sectors; acb->qiov = qiov; @@ -99,6 +263,8 @@ static BlockAIOCB *pcache_aio_readv(BlockDriverState *bs, PrefCacheAIOCB *acb = pcache_aio_get(bs, sector_num, qiov, nb_sectors, cb, opaque, QEMU_AIO_READ); + pcache_prefetch(acb); + bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, pcache_aio_cb, acb); return &acb->common; @@ -119,6 +285,17 @@ static BlockAIOCB *pcache_aio_writev(BlockDriverState *bs, return &acb->common; } +static void pcache_state_init(QemuOpts *opts, BDRVPCacheState *s) +{ + DPRINTF("pcache configure:\n"); + + s->pcache.tree.root = RB_ROOT; + qemu_co_mutex_init(&s->pcache.tree.lock); + QTAILQ_INIT(&s->list.head); + qemu_co_mutex_init(&s->list.lock); + s->pcache.curr_size = 0; +} + static int pcache_file_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -140,7 +317,9 @@ static int pcache_file_open(BlockDriverState *bs, QDict *options, int flags, if (local_err) { ret = -EINVAL; error_propagate(errp, local_err); + goto fail; } + pcache_state_init(opts, bs->opaque); fail: qemu_opts_del(opts); return ret; @@ -148,6 +327,15 @@ fail: static void pcache_close(BlockDriverState *bs) { + uint32_t cnt = 0; + BDRVPCacheState *s = bs->opaque; + BlockNode *node, *next; + QTAILQ_FOREACH_SAFE(node, &s->list.head, entry, next) { + QTAILQ_REMOVE(&s->list.head, node, entry); + pcache_node_free(PCNODE(node)); + cnt++; + } + DPRINTF("used %d nodes\n", cnt); } static void pcache_parse_filename(const char *filename, QDict *options, @@ -170,7 +358,7 @@ static bool pcache_recurse_is_first_non_filter(BlockDriverState *bs, static BlockDriver bdrv_pcache = { .format_name = "pcache", .protocol_name = "pcache", - .instance_size = 0, + .instance_size = sizeof(BDRVPCacheState), .bdrv_parse_filename = pcache_parse_filename, .bdrv_file_open = pcache_file_open, -- 2.8.3