diff mbox

[v3,5/5] replay: introduce block devices record/replay

Message ID 20160301110803.10104.77305.stgit@PASHA-ISP (mailing list archive)
State New, archived
Headers show

Commit Message

Pavel Dovgalyuk March 1, 2016, 11:08 a.m. UTC
This patch introduces block driver that implement recording
and replaying of block devices' operations.
All block completion operations are added to the queue.
Queue is flushed at checkpoints and information about processed requests
is recorded to the log. In replay phase the queue is matched with
events read from the log. Therefore block devices requests are processed
deterministically.

Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
---
 block/Makefile.objs      |    2 -
 block/blkreplay.c        |  156 ++++++++++++++++++++++++++++++++++++++++++++++
 docs/replay.txt          |   20 ++++++
 include/sysemu/replay.h  |    2 +
 replay/replay-events.c   |   24 ++++++-
 replay/replay-internal.h |    1 
 stubs/replay.c           |    4 +
 7 files changed, 206 insertions(+), 3 deletions(-)
 create mode 100755 block/blkreplay.c

Comments

Kevin Wolf March 9, 2016, 11:43 a.m. UTC | #1
Am 01.03.2016 um 12:08 hat Pavel Dovgalyuk geschrieben:
> This patch introduces block driver that implement recording
> and replaying of block devices' operations.
> All block completion operations are added to the queue.
> Queue is flushed at checkpoints and information about processed requests
> is recorded to the log. In replay phase the queue is matched with
> events read from the log. Therefore block devices requests are processed
> deterministically.
> 
> Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>

I like this new version much better than the old one. :-)

> +static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
> +                          Error **errp)
> +{
> +    Error *local_err = NULL;
> +    int ret;
> +
> +    /* Open the image file */
> +    bs->file = bdrv_open_child(NULL, options, "image",
> +                               bs, &child_file, false, &local_err);
> +    if (local_err) {
> +        ret = -EINVAL;
> +        error_propagate(errp, local_err);
> +        goto fail;
> +    }
> +
> +    ret = 0;
> +fail:
> +    if (ret < 0) {
> +        bdrv_unref_child(bs, bs->file);

This is unnecessary because in error cases, bdrv_open_child() returns
NULL. On the other hand, bdrv_unref_child() accepts a NULL child and
just doesn't do anything then, so it's harmless.

> +    }
> +    return ret;
> +}
> +
> +static void blkreplay_close(BlockDriverState *bs)
> +{
> +}
> +
> +static int64_t blkreplay_getlength(BlockDriverState *bs)
> +{
> +    return bdrv_getlength(bs->file->bs);
> +}
> +
> +static void blkreplay_bh_cb(void *opaque)
> +{
> +    Request *req = opaque;
> +    qemu_coroutine_enter(req->co, NULL);
> +    qemu_bh_delete(req->bh);
> +    g_free(req);
> +}

Maybe add a comment here that explains why the BH exists (forcing the
coroutine into its I/O thread, if I understand correctly).

> +static void block_request_create(uint64_t reqid, BlockDriverState *bs,
> +                                 Coroutine *co)
> +{
> +    Request *req = g_malloc0(sizeof(Request));
> +    req->co = co;
> +    req->bh = aio_bh_new(bdrv_get_aio_context(bs), blkreplay_bh_cb, req);

g_malloc0() zeroes all fields and then you immediately overwrite them,
so this is useless work. If you just want to be protected when adding
new fields, you could consider this instead:

Request *req = g_new(Request, 1);
*req = (Request) {
    .co = co,
    .bh = aio_bh_new(bdrv_get_aio_context(bs), blkreplay_bh_cb, req),
};

It's just a suggestion, though, feel free to change it or leave it.

> +    replay_block_event(req->bh, reqid);
> +}
> +
> +static int coroutine_fn blkreplay_co_readv(BlockDriverState *bs,
> +    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
> +{
> +    uint64_t reqid = request_id++;
> +
> +    bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
> +    block_request_create(reqid, bs, qemu_coroutine_self());
> +    qemu_coroutine_yield();
> +
> +    return 0;
> +}

Error handling is broken here. You need to forward the return value of
bdrv_co_readv() to the caller.

> +
> +static int coroutine_fn blkreplay_co_writev(BlockDriverState *bs,
> +    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
> +{
> +    uint64_t reqid = request_id++;
> +
> +    bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
> +    block_request_create(reqid, bs, qemu_coroutine_self());
> +    qemu_coroutine_yield();
> +
> +    return 0;
> +}

Same here.

> +static int coroutine_fn blkreplay_co_write_zeroes(BlockDriverState *bs,
> +    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
> +{
> +    uint64_t reqid = request_id++;
> +
> +    bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
> +    block_request_create(reqid, bs, qemu_coroutine_self());
> +    qemu_coroutine_yield();
> +
> +    return 0;
> +}

And here.

> +static int coroutine_fn blkreplay_co_discard(BlockDriverState *bs,
> +    int64_t sector_num, int nb_sectors)
> +{
> +    uint64_t reqid = request_id++;
> +
> +    bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
> +    block_request_create(reqid, bs, qemu_coroutine_self());
> +    qemu_coroutine_yield();
> +
> +    return 0;
> +}

Here, too.

> +static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs)
> +{
> +    uint64_t reqid = request_id++;
> +
> +    bdrv_co_flush(bs->file->bs);
> +    block_request_create(reqid, bs, qemu_coroutine_self());
> +    qemu_coroutine_yield();
> +
> +    return 0;
> +}

And finally here as well.

Kevin
Pavel Dovgalyuk March 10, 2016, 6:15 a.m. UTC | #2
> From: Kevin Wolf [mailto:kwolf@redhat.com]
> Am 01.03.2016 um 12:08 hat Pavel Dovgalyuk geschrieben:
> > This patch introduces block driver that implement recording
> > and replaying of block devices' operations.
> > All block completion operations are added to the queue.
> > Queue is flushed at checkpoints and information about processed requests
> > is recorded to the log. In replay phase the queue is matched with
> > events read from the log. Therefore block devices requests are processed
> > deterministically.
> >
> > Signed-off-by: Pavel Dovgalyuk <pavel.dovgaluk@ispras.ru>
> 
> I like this new version much better than the old one. :-)

Thanks for reviewing!

> > +static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
> > +                          Error **errp)
> > +{
> > +    Error *local_err = NULL;
> > +    int ret;
> > +
> > +    /* Open the image file */
> > +    bs->file = bdrv_open_child(NULL, options, "image",
> > +                               bs, &child_file, false, &local_err);
> > +    if (local_err) {
> > +        ret = -EINVAL;
> > +        error_propagate(errp, local_err);
> > +        goto fail;
> > +    }
> > +
> > +    ret = 0;
> > +fail:
> > +    if (ret < 0) {
> > +        bdrv_unref_child(bs, bs->file);
> 
> This is unnecessary because in error cases, bdrv_open_child() returns
> NULL. On the other hand, bdrv_unref_child() accepts a NULL child and
> just doesn't do anything then, so it's harmless.

Right, but this may be useful in case of future changes (if something else
will be initialized here).

Pavel Dovgalyuk
diff mbox

Patch

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 58ef2ef..38fea16 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -4,7 +4,7 @@  block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-y += quorum.o
-block-obj-y += parallels.o blkdebug.o blkverify.o
+block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
 block-obj-y += block-backend.o snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/blkreplay.c b/block/blkreplay.c
new file mode 100755
index 0000000..96c189a
--- /dev/null
+++ b/block/blkreplay.c
@@ -0,0 +1,156 @@ 
+/*
+ * Block protocol for record/replay
+ *
+ * Copyright (c) 2010-2016 Institute for System Programming
+ *                         of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "sysemu/replay.h"
+
+typedef struct Request {
+    Coroutine *co;
+    QEMUBH *bh;
+} Request;
+
+/* Next request id.
+   This counter is global, because requests from different
+   block devices should not get overlapping ids. */
+static uint64_t request_id;
+
+static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
+                          Error **errp)
+{
+    Error *local_err = NULL;
+    int ret;
+
+    /* Open the image file */
+    bs->file = bdrv_open_child(NULL, options, "image",
+                               bs, &child_file, false, &local_err);
+    if (local_err) {
+        ret = -EINVAL;
+        error_propagate(errp, local_err);
+        goto fail;
+    }
+
+    ret = 0;
+fail:
+    if (ret < 0) {
+        bdrv_unref_child(bs, bs->file);
+    }
+    return ret;
+}
+
+static void blkreplay_close(BlockDriverState *bs)
+{
+}
+
+static int64_t blkreplay_getlength(BlockDriverState *bs)
+{
+    return bdrv_getlength(bs->file->bs);
+}
+
+static void blkreplay_bh_cb(void *opaque)
+{
+    Request *req = opaque;
+    qemu_coroutine_enter(req->co, NULL);
+    qemu_bh_delete(req->bh);
+    g_free(req);
+}
+
+static void block_request_create(uint64_t reqid, BlockDriverState *bs,
+                                 Coroutine *co)
+{
+    Request *req = g_malloc0(sizeof(Request));
+    req->co = co;
+    req->bh = aio_bh_new(bdrv_get_aio_context(bs), blkreplay_bh_cb, req);
+    replay_block_event(req->bh, reqid);
+}
+
+static int coroutine_fn blkreplay_co_readv(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+    uint64_t reqid = request_id++;
+
+    bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
+    block_request_create(reqid, bs, qemu_coroutine_self());
+    qemu_coroutine_yield();
+
+    return 0;
+}
+
+static int coroutine_fn blkreplay_co_writev(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+    uint64_t reqid = request_id++;
+
+    bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
+    block_request_create(reqid, bs, qemu_coroutine_self());
+    qemu_coroutine_yield();
+
+    return 0;
+}
+
+static int coroutine_fn blkreplay_co_write_zeroes(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+{
+    uint64_t reqid = request_id++;
+
+    bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
+    block_request_create(reqid, bs, qemu_coroutine_self());
+    qemu_coroutine_yield();
+
+    return 0;
+}
+
+static int coroutine_fn blkreplay_co_discard(BlockDriverState *bs,
+    int64_t sector_num, int nb_sectors)
+{
+    uint64_t reqid = request_id++;
+
+    bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
+    block_request_create(reqid, bs, qemu_coroutine_self());
+    qemu_coroutine_yield();
+
+    return 0;
+}
+
+static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs)
+{
+    uint64_t reqid = request_id++;
+
+    bdrv_co_flush(bs->file->bs);
+    block_request_create(reqid, bs, qemu_coroutine_self());
+    qemu_coroutine_yield();
+
+    return 0;
+}
+
+static BlockDriver bdrv_blkreplay = {
+    .format_name            = "blkreplay",
+    .protocol_name          = "blkreplay",
+    .instance_size          = 0,
+
+    .bdrv_file_open         = blkreplay_open,
+    .bdrv_close             = blkreplay_close,
+    .bdrv_getlength         = blkreplay_getlength,
+
+    .bdrv_co_readv          = blkreplay_co_readv,
+    .bdrv_co_writev         = blkreplay_co_writev,
+
+    .bdrv_co_write_zeroes   = blkreplay_co_write_zeroes,
+    .bdrv_co_discard        = blkreplay_co_discard,
+    .bdrv_co_flush          = blkreplay_co_flush,
+};
+
+static void bdrv_blkreplay_init(void)
+{
+    bdrv_register(&bdrv_blkreplay);
+}
+
+block_init(bdrv_blkreplay_init);
diff --git a/docs/replay.txt b/docs/replay.txt
index 149727e..acf5031 100644
--- a/docs/replay.txt
+++ b/docs/replay.txt
@@ -166,3 +166,23 @@  Sometimes the block layer uses asynchronous callbacks for its internal purposes
 (like reading or writing VM snapshots or disk image cluster tables). In this
 case bottom halves are not marked as "replayable" and do not saved
 into the log.
+
+Block devices
+-------------
+
+Block devices record/replay module intercepts calls of
+bdrv coroutine functions at the top of block drivers stack.
+To record and replay block operations the drive must be configured
+as following:
+ -drive file=disk.qcow,if=none,id=img-direct
+ -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay
+ -device ide-hd,drive=img-blkreplay
+
+blkreplay driver should be inserted between disk image and virtual driver
+controller. Therefore all disk requests may be recorded and replayed.
+
+All block completion operations are added to the queue in the coroutines.
+Queue is flushed at checkpoints and information about processed requests
+is recorded to the log. In replay phase the queue is matched with
+events read from the log. Therefore block devices requests are processed
+deterministically.
diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index c879231..0817dc5 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -117,6 +117,8 @@  void replay_bh_schedule_event(QEMUBH *bh);
 void replay_input_event(QemuConsole *src, InputEvent *evt);
 /*! Adds input sync event to the queue */
 void replay_input_sync_event(void);
+/*! Adds block layer event to the queue */
+void replay_block_event(QEMUBH *bh, uint64_t id);
 
 /* Character device */
 
diff --git a/replay/replay-events.c b/replay/replay-events.c
index 59d467f..5a7042b 100644
--- a/replay/replay-events.c
+++ b/replay/replay-events.c
@@ -51,6 +51,9 @@  static void replay_run_event(Event *event)
     case REPLAY_ASYNC_EVENT_CHAR:
         replay_event_char_run(event->opaque);
         break;
+    case REPLAY_ASYNC_EVENT_BLOCK:
+        aio_bh_call(event->opaque);
+        break;
     default:
         error_report("Replay: invalid async event ID (%d) in the queue",
                     event->event_kind);
@@ -135,7 +138,7 @@  void replay_add_event(ReplayAsyncEventKind event_kind,
 
 void replay_bh_schedule_event(QEMUBH *bh)
 {
-    if (replay_mode != REPLAY_MODE_NONE) {
+    if (replay_mode != REPLAY_MODE_NONE && events_enabled) {
         uint64_t id = replay_get_current_step();
         replay_add_event(REPLAY_ASYNC_EVENT_BH, bh, NULL, id);
     } else {
@@ -153,6 +156,15 @@  void replay_add_input_sync_event(void)
     replay_add_event(REPLAY_ASYNC_EVENT_INPUT_SYNC, NULL, NULL, 0);
 }
 
+void replay_block_event(QEMUBH *bh, uint64_t id)
+{
+    if (replay_mode != REPLAY_MODE_NONE && events_enabled) {
+        replay_add_event(REPLAY_ASYNC_EVENT_BLOCK, bh, NULL, id);
+    } else {
+        qemu_bh_schedule(bh);
+    }
+}
+
 static void replay_save_event(Event *event, int checkpoint)
 {
     if (replay_mode != REPLAY_MODE_PLAY) {
@@ -174,8 +186,11 @@  static void replay_save_event(Event *event, int checkpoint)
         case REPLAY_ASYNC_EVENT_CHAR:
             replay_event_char_save(event->opaque);
             break;
+        case REPLAY_ASYNC_EVENT_BLOCK:
+            replay_put_qword(event->id);
+            break;
         default:
-            error_report("Unknown ID %d of replay event", read_event_kind);
+            error_report("Unknown ID %d of replay event", event->id);
             exit(1);
         }
     }
@@ -232,6 +247,11 @@  static Event *replay_read_event(int checkpoint)
         event->event_kind = read_event_kind;
         event->opaque = replay_event_char_read();
         return event;
+    case REPLAY_ASYNC_EVENT_BLOCK:
+        if (read_id == -1) {
+            read_id = replay_get_qword();
+        }
+        break;
     default:
         error_report("Unknown ID %d of replay event", read_event_kind);
         exit(1);
diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index 34eee5b..0363bb5 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -47,6 +47,7 @@  enum ReplayAsyncEventKind {
     REPLAY_ASYNC_EVENT_INPUT,
     REPLAY_ASYNC_EVENT_INPUT_SYNC,
     REPLAY_ASYNC_EVENT_CHAR,
+    REPLAY_ASYNC_EVENT_BLOCK,
     REPLAY_ASYNC_COUNT
 };
 
diff --git a/stubs/replay.c b/stubs/replay.c
index 00ca01f..6f4a8e8 100644
--- a/stubs/replay.c
+++ b/stubs/replay.c
@@ -29,3 +29,7 @@  bool replay_events_enabled(void)
 void replay_finish(void)
 {
 }
+
+void replay_block_event(QEMUBH *bh, uint64_t id)
+{
+}