diff mbox

[RFC,17/22] block/pcache: skip readahead for non-sequential requests

Message ID 20160825134421.20231-18-pbutsykin@virtuozzo.com (mailing list archive)
State New, archived
Headers show

Commit Message

Pavel Butsykin Aug. 25, 2016, 1:44 p.m. UTC
When randomly reading data will be a lot of readahead, resulting in a loss of
productivity. In order to avoid added checking the requests line before
making the readahead. It also makes no sense to cache new requests,
because a cache hit on this data is very unlikely.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
---
 block/pcache.c | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 138 insertions(+), 3 deletions(-)
diff mbox

Patch

diff --git a/block/pcache.c b/block/pcache.c
index ae7ac8d..7a317fc 100644
--- a/block/pcache.c
+++ b/block/pcache.c
@@ -73,6 +73,10 @@  typedef struct PCNode {
     CoMutex                  lock;
 } PCNode;
 
+typedef struct LRNode {
+    BlockNode cm;
+} LRNode;
+
 typedef struct ReqStor {
     struct {
         struct RbRoot root;
@@ -91,10 +95,12 @@  typedef struct BDRVPCacheState {
     BlockDriverState **bs;
 
     ReqStor pcache;
+    ReqStor lreq;
 
     struct {
         uint32_t cache_size;
         uint32_t readahead_size;
+        uint32_t lreq_pool_size;
     } cfg;
 
 #ifdef PCACHE_DEBUG
@@ -166,6 +172,7 @@  static QemuOptsList runtime_opts = {
 #define MB_BITS 20
 #define PCACHE_DEFAULT_CACHE_SIZE (4 << MB_BITS)
 #define PCACHE_DEFAULT_READAHEAD_SIZE (128 << KB_BITS)
+#define PCACHE_DEFAULT_POOL_STAT_SIZE (1 << MB_BITS)
 
 enum {
     NODE_SUCCESS_STATUS = 0,
@@ -181,6 +188,7 @@  enum {
 };
 
 #define PCNODE(_n) ((PCNode *)(_n))
+#define LRNODE(_n) ((LRNode *)(_n))
 
 static inline void pcache_node_unref(BDRVPCacheState *s, PCNode *node)
 {
@@ -262,6 +270,11 @@  static PCNode *pcache_node_search(struct RbRoot *root, RbNodeKey *key)
     return node == NULL ? NULL : pcache_node_ref(node);
 }
 
+static inline LRNode *lreq_node_search(struct RbRoot *root, RbNodeKey *key)
+{
+    return node_search(root, key);
+}
+
 static void *node_insert(struct RbRoot *root, BlockNode *node)
 {
     struct RbNode **new = &(root->rb_node), *parent = NULL;
@@ -288,6 +301,11 @@  static inline PCNode *pcache_node_insert(struct RbRoot *root, PCNode *node)
     return pcache_node_ref(node_insert(root, &node->cm));
 }
 
+static inline LRNode *lreq_node_insert(struct RbRoot *root, LRNode *node)
+{
+    return node_insert(root, &node->cm);
+}
+
 static inline void *pcache_node_alloc(RbNodeKey* key)
 {
     PCNode *node = g_slice_alloc(sizeof(*node));
@@ -364,6 +382,34 @@  static void pcache_try_shrink(BDRVPCacheState *s)
     }
 }
 
+static void lreq_try_shrink(BDRVPCacheState *s)
+{
+    while (s->lreq.curr_size > s->cfg.lreq_pool_size) {
+        LRNode *rmv_node;
+            /* XXX: need to filter large requests */
+        if (QTAILQ_EMPTY(&s->lreq.lru.list)) {
+            DPRINTF("lru lreq list is empty, but curr_size: %d\n",
+                    s->lreq.curr_size);
+            break;
+        }
+
+        qemu_co_mutex_lock(&s->lreq.lru.lock);
+        rmv_node = LRNODE(QTAILQ_LAST(&s->lreq.lru.list, lru_head));
+        qemu_co_mutex_unlock(&s->lreq.lru.lock);
+
+        atomic_sub(&s->lreq.curr_size, rmv_node->cm.nb_sectors);
+
+        qemu_co_mutex_lock(&s->lreq.lru.lock);
+        QTAILQ_REMOVE(&s->lreq.lru.list, &rmv_node->cm, entry);
+        qemu_co_mutex_unlock(&s->lreq.lru.lock);
+
+        qemu_co_mutex_lock(&s->lreq.tree.lock);
+        rb_erase(&rmv_node->cm.rb_node, &s->lreq.tree.root);
+        qemu_co_mutex_unlock(&s->lreq.tree.lock);
+        g_slice_free1(sizeof(*rmv_node), rmv_node);
+    }
+}
+
 static PrefCachePartReq *pcache_req_get(PrefCacheAIOCB *acb, PCNode *node)
 {
     PrefCachePartReq *req = g_slice_alloc(sizeof(*req));
@@ -437,6 +483,34 @@  static inline PCNode *pcache_node_add(PrefCacheAIOCB *acb, RbNodeKey *key)
     return node;
 }
 
+static LRNode *lreq_node_add(PrefCacheAIOCB *acb, RbNodeKey *key)
+{
+    BDRVPCacheState *s = acb->s;
+    LRNode *new_node = g_slice_alloc(sizeof(*new_node));
+    LRNode *found;
+
+    new_node->cm.sector_num = key->num;
+    new_node->cm.nb_sectors = key->size;
+
+    qemu_co_mutex_lock(&s->lreq.tree.lock);
+    found = lreq_node_insert(&s->lreq.tree.root, new_node);
+    qemu_co_mutex_unlock(&s->lreq.tree.lock);
+    if (found != new_node) {
+        g_slice_free1(sizeof(*new_node), new_node);
+        return NULL;
+    }
+
+    atomic_add(&s->lreq.curr_size, new_node->cm.nb_sectors);
+
+    lreq_try_shrink(s);
+
+    qemu_co_mutex_lock(&s->lreq.lru.lock);
+    QTAILQ_INSERT_HEAD(&s->lreq.lru.list, &new_node->cm, entry);
+    qemu_co_mutex_unlock(&s->lreq.lru.lock);
+
+    return new_node;
+}
+
 static uint64_t ranges_overlap_size(uint64_t node1, uint32_t size1,
                                     uint64_t node2, uint32_t size2)
 {
@@ -552,13 +626,24 @@  enum {
 
 static int32_t pcache_prefetch(PrefCacheAIOCB *acb)
 {
+    BDRVPCacheState *s = acb->s;
     RbNodeKey key;
-    PCNode *node = NULL;
+    PCNode *node;
 
     prefetch_init_key(acb, &key);
-    if (pcache_node_find_and_create(acb, &key, &node)) {
+
+    /* add request statistics */
+    lreq_node_add(acb, &key);
+
+    qemu_co_mutex_lock(&s->pcache.tree.lock); /* XXX: use get_next_node */
+    node = pcache_node_search(&s->pcache.tree.root, &key);
+    qemu_co_mutex_unlock(&s->pcache.tree.lock);
+    if (node == NULL) {
         return PREFETCH_NEW_NODE;
     }
+    if (node->status == NODE_SUCCESS_STATUS) {
+        pcache_lru_node_up(s, node);
+    }
 
     /* Node covers the whole request */
     if (node->cm.sector_num <= acb->sector_num &&
@@ -795,6 +880,30 @@  static bool check_allocated_blocks(BlockDriverState *bs, int64_t sector_num,
     return true;
 }
 
+static bool check_lreq_sequence(BDRVPCacheState *s, uint64_t sector_num)
+{
+    RbNodeKey key;
+    LRNode *node;
+    uint32_t cache_line_sz = s->cfg.readahead_size;
+
+    if (sector_num <= cache_line_sz) {
+        return false;
+    }
+            /* check is a previous cache block */
+    key.num = sector_num - cache_line_sz;
+    key.size = cache_line_sz;
+
+    qemu_co_mutex_lock(&s->lreq.tree.lock);
+    node = lreq_node_search(&s->lreq.tree.root, &key);
+    qemu_co_mutex_unlock(&s->lreq.tree.lock);
+    if (node == NULL) { /* requests isn't consistent,
+                         * most likely there is no sense to make readahead.
+                         */
+        return false;
+    }
+    return node->cm.sector_num > key.num ? false : true;
+}
+
 static void pcache_readahead_request(BlockDriverState *bs, PrefCacheAIOCB *acb)
 {
     BDRVPCacheState *s = acb->s;
@@ -803,6 +912,9 @@  static void pcache_readahead_request(BlockDriverState *bs, PrefCacheAIOCB *acb)
     uint64_t total_sectors = bdrv_nb_sectors(bs);
     PCNode *node = NULL;
 
+    if (!check_lreq_sequence(acb->s, acb->sector_num)) {
+        return;
+    }
     prefetch_init_key(acb, &key);
 
     key.num = key.num + key.size;
@@ -841,7 +953,13 @@  static BlockAIOCB *pcache_aio_readv(BlockDriverState *bs,
     PrefCacheAIOCB *acb = pcache_aio_get(bs, sector_num, qiov, nb_sectors, cb,
                                          opaque, PCACHE_AIO_READ);
     int32_t status = pcache_prefetch(acb);
-    if (status == PREFETCH_FULL_UP) {
+    if (status == PREFETCH_NEW_NODE) {
+        BlockAIOCB *ret = bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
+                                         cb, opaque);
+        pcache_readahead_request(bs, acb);
+        qemu_aio_unref(acb); /* XXX: fix superfluous alloc */
+        return ret;
+    } else if (status == PREFETCH_FULL_UP) {
         assert(acb->requests.cnt == 0);
         complete_aio_request(acb);
     } else {
@@ -885,8 +1003,15 @@  static void pcache_state_init(QemuOpts *opts, BDRVPCacheState *s)
     qemu_co_mutex_init(&s->pcache.lru.lock);
     s->pcache.curr_size = 0;
 
+    s->lreq.tree.root = RB_ROOT;
+    qemu_co_mutex_init(&s->lreq.tree.lock);
+    QTAILQ_INIT(&s->lreq.lru.list);
+    qemu_co_mutex_init(&s->lreq.lru.lock);
+    s->lreq.curr_size = 0;
+
     s->cfg.cache_size = cache_size >> BDRV_SECTOR_BITS;
     s->cfg.readahead_size = readahead_size >> BDRV_SECTOR_BITS;
+    s->cfg.lreq_pool_size = PCACHE_DEFAULT_POOL_STAT_SIZE >> BDRV_SECTOR_BITS;
 
 #ifdef PCACHE_DEBUG
     QTAILQ_INIT(&s->death_node_list);
@@ -946,6 +1071,16 @@  static void pcache_close(BlockDriverState *bs)
     }
     DPRINTF("used %d nodes\n", cnt);
 
+    cnt = 0;
+    if (!QTAILQ_EMPTY(&s->lreq.lru.list)) {
+        QTAILQ_FOREACH_SAFE(node, &s->lreq.lru.list, entry, next) {
+            QTAILQ_REMOVE(&s->lreq.lru.list, node, entry);
+            g_slice_free1(sizeof(*node), node);
+            cnt++;
+        }
+    }
+    DPRINTF("used %d lreq nodes\n", cnt);
+
 #ifdef PCACHE_DEBUG
     if (!QTAILQ_EMPTY(&s->death_node_list)) {
         cnt = 0;