diff mbox series

[v5,2/6] Discard blocks while copy-on-read

Message ID 1546200557-774583-3-git-send-email-andrey.shinkevich@virtuozzo.com (mailing list archive)
State New, archived
Headers show
Series Discrad blocks during block-stream operation | expand

Commit Message

Andrey Shinkevich Dec. 30, 2018, 8:09 p.m. UTC
Discards the block duplicated in an intermediate backing file
after the block have been copied into the active layer during
QMP block-stream operation.
It saves the disk space while merging external snapshots.

Signed-off-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
---
 block/stream.c | 207 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 195 insertions(+), 12 deletions(-)
diff mbox series

Patch

diff --git a/block/stream.c b/block/stream.c
index 20e768e..af2eebf 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -12,6 +12,7 @@ 
  */
 
 #include "qemu/osdep.h"
+#include "qemu/cutils.h"
 #include "trace.h"
 #include "block/block_int.h"
 #include "block/blockjob_int.h"
@@ -37,14 +38,68 @@  typedef struct StreamBlockJob {
     char *backing_file_str;
     bool bs_read_only;
     BlockDriverState *cor_filter_bs;
+    bool discard;
+    GSList *im_nodes;
 } StreamBlockJob;
 
+typedef struct IntermediateNode {
+    BlockBackend *blk;
+    bool bs_read_only;
+} IntermediateNode;
+
 static BlockDriverState *child_file_bs(BlockDriverState *bs)
 {
     return bs->file ? bs->file->bs : NULL;
 }
 
-static int coroutine_fn stream_populate(BlockBackend *blk,
+static void restore_all_im_nodes(StreamBlockJob *s)
+{
+    GSList *l;
+    BlockDriverState *bs_active;
+    BlockDriverState *bs_im;
+    IntermediateNode *im_node;
+    QDict *opts;
+    BlockReopenQueue *queue = NULL;
+    Error *local_err = NULL;
+
+    assert(s->cor_filter_bs);
+    bs_active = child_file_bs(s->cor_filter_bs);
+    assert(bs_active && backing_bs(bs_active));
+
+    bdrv_subtree_drained_begin(backing_bs(bs_active));
+
+    for (l = s->im_nodes; l; l = l->next) {
+        im_node = l->data;
+        if (im_node->blk) {
+            bs_im = blk_bs(im_node->blk);
+
+            if (im_node->bs_read_only && bs_im && !bdrv_is_read_only(bs_im)) {
+                opts = qdict_new();
+                qdict_put_bool(opts, BDRV_OPT_READ_ONLY, true);
+                queue = bdrv_reopen_queue(queue, bs_im, opts);
+            }
+            /* Give up write permissions before making it read-only */
+            blk_set_perm(im_node->blk, 0, BLK_PERM_ALL, &error_abort);
+            blk_unref(im_node->blk);
+            bdrv_unref(bs_im);
+        }
+        g_free(im_node);
+    }
+    g_slist_free(s->im_nodes);
+    s->im_nodes = NULL;
+
+    if (queue) {
+        bdrv_reopen_multiple(bdrv_get_aio_context(bs_active), queue,
+                             &local_err);
+        if (local_err != NULL) {
+            error_report_err(local_err);
+        }
+    }
+
+    bdrv_subtree_drained_end(backing_bs(bs_active));
+}
+
+static int coroutine_fn stream_populate(const StreamBlockJob *s,
                                         int64_t offset, uint64_t bytes,
                                         void *buf)
 {
@@ -53,12 +108,28 @@  static int coroutine_fn stream_populate(BlockBackend *blk,
         .iov_len  = bytes,
     };
     QEMUIOVector qiov;
+    GSList *l;
+    IntermediateNode *im_node;
+    int ret;
 
+    assert(s);
     assert(bytes < SIZE_MAX);
     qemu_iovec_init_external(&qiov, &iov, 1);
 
     /* Copy-on-read the unallocated clusters */
-    return blk_co_preadv(blk, offset, qiov.size, &qiov, BDRV_REQ_COPY_ON_READ);
+    ret = blk_co_preadv(s->common.blk, offset, qiov.size, &qiov,
+                        BDRV_REQ_COPY_ON_READ);
+
+    if (ret < 0 || !s->discard) {
+        return ret;
+    }
+
+    for (l = s->im_nodes; l; l = l->next) {
+        im_node = l->data;
+        blk_co_pdiscard(im_node->blk, offset, bytes);
+    }
+
+    return ret;
 }
 
 static int stream_change_backing_file(Job *job)
@@ -109,6 +180,8 @@  static void stream_exit(Job *job)
     if (s->cor_filter_bs == NULL) {
         return;
     }
+    /* Reopen intermediate images back in read-only mode */
+    restore_all_im_nodes(s);
     /* Remove the filter driver from the graph */
     remove_filter(s->cor_filter_bs);
     s->cor_filter_bs = NULL;
@@ -145,7 +218,6 @@  static void stream_clean(Job *job)
 static int coroutine_fn stream_run(Job *job, Error **errp)
 {
     StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
-    BlockBackend *blk = s->common.blk;
     BlockDriverState *bs = child_file_bs(s->cor_filter_bs);
     BlockDriverState *base = s->base;
     int64_t len;
@@ -209,7 +281,7 @@  static int coroutine_fn stream_run(Job *job, Error **errp)
         }
         trace_stream_one_iteration(s, offset, n, ret);
         if (copy) {
-            ret = stream_populate(blk, offset, n, buf);
+            ret = stream_populate(s, offset, n, buf);
         }
         if (ret < 0) {
             BlockErrorAction action =
@@ -250,22 +322,27 @@  out:
     return ret;
 }
 
-static BlockDriverState *create_filter_node(BlockDriverState *bs, Error **errp)
+static BlockDriverState *create_filter_node(BlockDriverState *bs, bool discard,
+                                            Error **errp)
 {
     QDict *opts = qdict_new();
 
     qdict_put_str(opts, "driver", "copy-on-read");
     qdict_put_str(opts, "file", bdrv_get_node_name(bs));
+    if (discard) {
+        qdict_put_bool(opts, "driver.discard", true);
+    }
 
     return bdrv_open(NULL, NULL, opts, BDRV_O_RDWR, errp);
 }
 
-static BlockDriverState *insert_filter(BlockDriverState *bs, Error **errp)
+static BlockDriverState *insert_filter(BlockDriverState *bs, bool discard,
+                                       Error **errp)
 {
     BlockDriverState *cor_filter_bs;
     Error *local_err = NULL;
 
-    cor_filter_bs = create_filter_node(bs, errp);
+    cor_filter_bs = create_filter_node(bs, discard, errp);
     if (cor_filter_bs == NULL) {
         error_prepend(errp, "Could not create filter node: ");
         return NULL;
@@ -286,6 +363,92 @@  static BlockDriverState *insert_filter(BlockDriverState *bs, Error **errp)
     return cor_filter_bs;
 }
 
+/* Makes intermediate block chain writable */
+static int init_intermediate_nodes(StreamBlockJob *s,
+                                   BlockDriverState *bs,
+                                   BlockDriverState *base, Error **errp)
+{
+    BlockDriverState *iter;
+    bool bs_read_only;
+    IntermediateNode *im_node;
+    BlockBackend *blk;
+    QDict *opts;
+    BlockReopenQueue *queue = NULL;
+    Error *local_err = NULL;
+    int ret;
+
+    /* Sanity check */
+    if (!backing_bs(bs)) {
+        error_setg(errp, "Top BDS does not have a backing file.");
+        return -EINVAL;
+    }
+    if (base && !bdrv_chain_contains(bs, base)) {
+        error_setg(errp, "The backing chain does not contain the base file.");
+        return -EINVAL;
+    }
+
+    /* Reopen intermediate images in read-write mode */
+    bdrv_subtree_drained_begin(backing_bs(bs));
+
+    for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
+        bs_read_only = bdrv_is_read_only(iter);
+        im_node = g_new0(IntermediateNode, 1);
+        im_node->blk = NULL;
+        im_node->bs_read_only = bs_read_only;
+        bdrv_ref(iter);
+        s->im_nodes = g_slist_prepend(s->im_nodes, im_node);
+
+        if (bs_read_only) {
+            opts = qdict_new();
+            qdict_put_bool(opts, BDRV_OPT_READ_ONLY, false);
+            queue = bdrv_reopen_queue(queue, iter, opts);
+        }
+    }
+
+    if (queue) {
+        ret = bdrv_reopen_multiple(bdrv_get_aio_context(bs), queue, &local_err);
+        if (local_err != NULL) {
+            error_propagate(errp, local_err);
+            bdrv_subtree_drained_end(backing_bs(bs));
+            restore_all_im_nodes(s);
+            return -1;
+        }
+    }
+
+    bdrv_subtree_drained_end(backing_bs(bs));
+
+    s->im_nodes = g_slist_reverse(s->im_nodes);
+    GSList *l = s->im_nodes;
+
+    for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
+        blk = blk_new(BLK_PERM_WRITE, BLK_PERM_CONSISTENT_READ |
+                      BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED |
+                      BLK_PERM_GRAPH_MOD);
+        if (!blk) {
+            error_setg(errp,
+                       "Block Stream: failed to create new Block Backend.");
+            goto fail;
+        }
+
+        ret = blk_insert_bs(blk, iter, errp);
+        if (ret < 0) {
+            goto fail;
+        }
+
+        assert(l);
+        im_node = l->data;
+        im_node->blk = blk;
+        l = l->next;
+    }
+
+    return 0;
+
+fail:
+    restore_all_im_nodes(s);
+
+    return -1;
+}
+
 static const BlockJobDriver stream_job_driver = {
     .job_driver = {
         .instance_size = sizeof(StreamBlockJob),
@@ -308,6 +471,9 @@  void stream_start(const char *job_id, BlockDriverState *bs,
     StreamBlockJob *s;
     BlockDriverState *iter;
     bool bs_read_only;
+    const bool discard = false;
+    int node_shared_flags = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
+    int ret;
 
     /* Make sure that the image is opened in read-write mode */
     bs_read_only = bdrv_is_read_only(bs);
@@ -330,21 +496,34 @@  void stream_start(const char *job_id, BlockDriverState *bs,
         goto fail;
     }
 
-    /* Block all intermediate nodes between bs and base, because they will
+    /*
+     * Block all intermediate nodes between bs and base, because they will
      * disappear from the chain after this operation. The streaming job reads
-     * every block only once, assuming that it doesn't change, so block writes
-     * and resizes. */
+     * every block only once, assuming that it doesn't change, so forbid writes
+     * and resizes. Allow writing in case of discard.
+     */
+    if (discard) {
+        node_shared_flags |= BLK_PERM_WRITE;
+    }
     for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) {
         block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
-                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED,
+                           node_shared_flags,
                            &error_abort);
     }
 
-    s->cor_filter_bs = insert_filter(bs, errp);
+    s->cor_filter_bs = insert_filter(bs, discard, errp);
     if (s->cor_filter_bs == NULL) {
         goto fail;
     }
 
+    if (discard) {
+        ret = init_intermediate_nodes(s, bs, base, errp);
+        if (ret < 0) {
+            goto fail;
+        }
+    }
+
+    s->discard = discard;
     s->base = base;
     s->backing_file_str = g_strdup(backing_file_str);
     s->bs_read_only = bs_read_only;
@@ -355,6 +534,10 @@  void stream_start(const char *job_id, BlockDriverState *bs,
     return;
 
 fail:
+    if (s && s->cor_filter_bs) {
+        remove_filter(s->cor_filter_bs);
+        job_early_fail(&s->common.job);
+    }
     if (bs_read_only) {
         bdrv_reopen_set_read_only(bs, true, NULL);
     }