diff mbox

[10/20] block: Move enable_write_cache to BB level

Message ID 1458325289-17848-11-git-send-email-kwolf@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kevin Wolf March 18, 2016, 6:21 p.m. UTC
Whether a write cache is used or not is a decision that concerns the
user (e.g. the guest device) rather than the backend. It was already
logically part of the BB level as bdrv_move_feature_fields() always kept
it on top of the BDS tree; with this patch, the core of it (the actual
flag and the additional flushes) is also implemented there.

Direct callers of bdrv_open() must pass BDRV_O_CACHE_WB now if bs
doesn't have a BlockBackend attached.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c                    | 26 +++++++++++++++++---------
 block/block-backend.c      | 42 +++++++++++++++++++++++++++---------------
 block/io.c                 |  2 +-
 block/iscsi.c              |  2 +-
 include/block/block.h      |  1 +
 include/block/block_int.h  |  3 ---
 tests/qemu-iotests/142     |  4 ++--
 tests/qemu-iotests/142.out |  8 ++++----
 8 files changed, 53 insertions(+), 35 deletions(-)

Comments

Max Reitz March 26, 2016, 7:54 p.m. UTC | #1
On 18.03.2016 19:21, Kevin Wolf wrote:
> Whether a write cache is used or not is a decision that concerns the
> user (e.g. the guest device) rather than the backend. It was already
> logically part of the BB level as bdrv_move_feature_fields() always kept
> it on top of the BDS tree; with this patch, the core of it (the actual
> flag and the additional flushes) is also implemented there.
> 
> Direct callers of bdrv_open() must pass BDRV_O_CACHE_WB now if bs
> doesn't have a BlockBackend attached.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  block.c                    | 26 +++++++++++++++++---------
>  block/block-backend.c      | 42 +++++++++++++++++++++++++++---------------
>  block/io.c                 |  2 +-
>  block/iscsi.c              |  2 +-
>  include/block/block.h      |  1 +
>  include/block/block_int.h  |  3 ---
>  tests/qemu-iotests/142     |  4 ++--
>  tests/qemu-iotests/142.out |  8 ++++----
>  8 files changed, 53 insertions(+), 35 deletions(-)

Reviewed-by: Max Reitz <mreitz@redhat.com>

I'm not so sure about the state bdrv_{set_,}enable_write_cache() are in
after this patch (e.g. the NBD client will always think the write cache
is enabled; and bdrv_set_enable_write_cache() can be used to unset
BDRV_O_CACHE_WB on BDSs), but looking at the following patches' titles,
they'll clear that up.

It appears to me that multiwrite will ignore the writethrough status,
but then again, qemu-io seems to be the only multiwrite user.

> diff --git a/block.c b/block.c
> index 172f865..9271dbb 100644
> --- a/block.c
> +++ b/block.c

[...]

> @@ -3618,8 +3626,8 @@ void bdrv_img_create(const char *filename, const char *fmt,
>              }
>  
>              /* backing files always opened read-only */
> -            back_flags =
> -                flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
> +            back_flags = flags | BDRV_O_CACHE_WB;
> +            back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);

Actually, this is the only thing the @flags parameter of this function
is used for. Maybe it can be dropped since we already regulate the
back_flags pretty strictly.

>  
>              if (backing_fmt) {
>                  backing_options = qdict_new();
diff mbox

Patch

diff --git a/block.c b/block.c
index 172f865..9271dbb 100644
--- a/block.c
+++ b/block.c
@@ -2038,6 +2038,11 @@  int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
             goto error;
         }
     }
+    if (!reopen_state->bs->blk && !(reopen_state->flags & BDRV_O_CACHE_WB)) {
+        error_setg(errp, "Cannot disable cache.writeback: No BlockBackend");
+        ret = -EINVAL;
+        goto error;
+    }
 
     /* node-name and driver must be unchanged. Put them back into the QDict, so
      * that they are checked at the end of this function. */
@@ -2138,10 +2143,10 @@  void bdrv_reopen_commit(BDRVReopenState *reopen_state)
 
     reopen_state->bs->explicit_options   = reopen_state->explicit_options;
     reopen_state->bs->open_flags         = reopen_state->flags;
-    reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
-                                              BDRV_O_CACHE_WB);
     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
 
+    bdrv_set_enable_write_cache(reopen_state->bs,
+                                !!(reopen_state->flags & BDRV_O_CACHE_WB));
     bdrv_refresh_limits(reopen_state->bs, NULL);
 }
 
@@ -2271,9 +2276,6 @@  static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
                                      BlockDriverState *bs_src)
 {
     /* move some fields that need to stay attached to the device */
-
-    /* dev info */
-    bs_dest->enable_write_cache = bs_src->enable_write_cache;
 }
 
 static void change_parent_backing_link(BlockDriverState *from,
@@ -2753,12 +2755,18 @@  int bdrv_is_sg(BlockDriverState *bs)
 
 int bdrv_enable_write_cache(BlockDriverState *bs)
 {
-    return bs->enable_write_cache;
+    if (bs->blk) {
+        return blk_enable_write_cache(bs->blk);
+    } else {
+        return true;
+    }
 }
 
 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
 {
-    bs->enable_write_cache = wce;
+    if (bs->blk) {
+        blk_set_enable_write_cache(bs->blk, wce);
+    }
 
     /* so a reopen() will preserve wce */
     if (wce) {
@@ -3618,8 +3626,8 @@  void bdrv_img_create(const char *filename, const char *fmt,
             }
 
             /* backing files always opened read-only */
-            back_flags =
-                flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+            back_flags = flags | BDRV_O_CACHE_WB;
+            back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
 
             if (backing_fmt) {
                 backing_options = qdict_new();
diff --git a/block/block-backend.c b/block/block-backend.c
index ffa5856..4ef4b03 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -46,6 +46,8 @@  struct BlockBackend {
      * can be used to restore those options in the new BDS on insert) */
     BlockBackendRootState root_state;
 
+    bool enable_write_cache;
+
     /* I/O stats (display with "info blockstats"). */
     BlockAcctStats stats;
 
@@ -715,11 +717,17 @@  static int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
                                       unsigned int bytes, QEMUIOVector *qiov,
                                       BdrvRequestFlags flags)
 {
-    int ret = blk_check_byte_request(blk, offset, bytes);
+    int ret;
+
+    ret = blk_check_byte_request(blk, offset, bytes);
     if (ret < 0) {
         return ret;
     }
 
+    if (!blk->enable_write_cache) {
+        flags |= BDRV_REQ_FUA;
+    }
+
     return bdrv_co_do_pwritev(blk_bs(blk), offset, bytes, qiov, flags);
 }
 
@@ -1226,26 +1234,19 @@  int blk_is_sg(BlockBackend *blk)
 
 int blk_enable_write_cache(BlockBackend *blk)
 {
-    BlockDriverState *bs = blk_bs(blk);
-
-    if (bs) {
-        return bdrv_enable_write_cache(bs);
-    } else {
-        return !!(blk->root_state.open_flags & BDRV_O_CACHE_WB);
-    }
+    return blk->enable_write_cache;
 }
 
 void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
 {
-    BlockDriverState *bs = blk_bs(blk);
+    blk->enable_write_cache = wce;
 
-    if (bs) {
-        bdrv_set_enable_write_cache(bs, wce);
-    } else {
+    /* TODO Remove this when BDRV_O_CACHE_WB isn't used any more */
+    if (blk->root) {
         if (wce) {
-            blk->root_state.open_flags |= BDRV_O_CACHE_WB;
+            blk->root->bs->open_flags |= BDRV_O_CACHE_WB;
         } else {
-            blk->root_state.open_flags &= ~BDRV_O_CACHE_WB;
+            blk->root->bs->open_flags &= ~BDRV_O_CACHE_WB;
         }
     }
 }
@@ -1508,11 +1509,22 @@  int blk_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors)
 int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
                      int64_t pos, int size)
 {
+    int ret;
+
     if (!blk_is_available(blk)) {
         return -ENOMEDIUM;
     }
 
-    return bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
+    ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (ret == size && !blk->enable_write_cache) {
+        ret = bdrv_flush(blk_bs(blk));
+    }
+
+    return ret < 0 ? ret : size;
 }
 
 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
diff --git a/block/io.c b/block/io.c
index 575da22..14f12c8 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1156,7 +1156,7 @@  static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
     }
     bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
 
-    if (ret == 0 && !bs->enable_write_cache) {
+    if (ret == 0 && (flags & BDRV_REQ_FUA)) {
         ret = bdrv_co_flush(bs);
     }
 
diff --git a/block/iscsi.c b/block/iscsi.c
index 128ea79..3b54536 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -476,7 +476,7 @@  static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
     num_sectors = sector_qemu2lun(nb_sectors, iscsilun);
     iscsi_co_init_iscsitask(iscsilun, &iTask);
 retry:
-    fua = iscsilun->dpofua && !bs->enable_write_cache;
+    fua = iscsilun->dpofua && !bdrv_enable_write_cache(bs);
     iTask.force_next_flush = !fua;
     if (iscsilun->use_16_for_rw) {
         iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
diff --git a/include/block/block.h b/include/block/block.h
index 823f4d7..1b8a1c7 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -64,6 +64,7 @@  typedef enum {
      */
     BDRV_REQ_MAY_UNMAP          = 0x4,
     BDRV_REQ_NO_SERIALISING     = 0x8,
+    BDRV_REQ_FUA                = 0x10,
 } BdrvRequestFlags;
 
 typedef struct BlockSizes {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index ba6e9ac..ce9d764 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -435,9 +435,6 @@  struct BlockDriverState {
     /* Alignment requirement for offset/length of I/O requests */
     unsigned int request_alignment;
 
-    /* do we need to tell the quest if we have a volatile write cache? */
-    int enable_write_cache;
-
     /* the following member gives a name to every node on the bs graph. */
     char node_name[32];
     /* element of the list of named nodes building the graph */
diff --git a/tests/qemu-iotests/142 b/tests/qemu-iotests/142
index 80834b5..517fb30 100755
--- a/tests/qemu-iotests/142
+++ b/tests/qemu-iotests/142
@@ -338,8 +338,8 @@  echo
 # TODO Implement node-name support for 'qemu-io' HMP command for -c
 # Can use only -o to access child node options for now
 
-hmp_cmds="qemu-io none0 \"reopen -o file.cache.writeback=off,file.cache.direct=off,file.cache.no-flush=off\"
-qemu-io none0 \"reopen -o backing.file.cache.writeback=on,backing.file.cache.direct=off,backing.file.cache.no-flush=on\"
+hmp_cmds="qemu-io none0 \"reopen -o file.cache.direct=off,file.cache.no-flush=off\"
+qemu-io none0 \"reopen -o backing.file.cache.direct=off,backing.file.cache.no-flush=on\"
 qemu-io none0 \"reopen -c none\"
 info block image
 info block file
diff --git a/tests/qemu-iotests/142.out b/tests/qemu-iotests/142.out
index 5dd5bd0..32dc802 100644
--- a/tests/qemu-iotests/142.out
+++ b/tests/qemu-iotests/142.out
@@ -132,7 +132,7 @@  cache.direct=on on backing-file
 
 cache.writeback=off on none0
     Cache mode:       writethrough
-    Cache mode:       writethrough
+    Cache mode:       writeback
     Cache mode:       writeback
     Cache mode:       writeback
     Cache mode:       writeback
@@ -342,7 +342,7 @@  cache.direct=on on backing-file
 
 cache.writeback=off on none0
     Cache mode:       writeback, direct
-    Cache mode:       writethrough
+    Cache mode:       writeback
     Cache mode:       writeback
     Cache mode:       writeback
     Cache mode:       writeback
@@ -503,7 +503,7 @@  cache.direct=on on backing-file
 
 cache.writeback=off on blk
     Cache mode:       writethrough
-    Cache mode:       writethrough
+    Cache mode:       writeback
     Cache mode:       writeback
     Cache mode:       writeback
     Cache mode:       writeback
@@ -707,7 +707,7 @@  cache.no-flush=on on backing-file
 --- Change cache mode after reopening child ---
 
     Cache mode:       writeback, direct
-    Cache mode:       writethrough
+    Cache mode:       writeback
     Cache mode:       writeback, direct
     Cache mode:       writeback, ignore flushes
 *** done