diff mbox series

block/io: accept NULL qiov in bdrv_pad_request

Message ID 20240319091341.303414-1-f.ebner@proxmox.com (mailing list archive)
State New, archived
Headers show
Series block/io: accept NULL qiov in bdrv_pad_request | expand

Commit Message

Fiona Ebner March 19, 2024, 9:13 a.m. UTC
From: Stefan Reiter <s.reiter@proxmox.com>

Some operations, e.g. block-stream, perform reads while discarding the
results (only copy-on-read matters). In this case, they will pass NULL
as the target QEMUIOVector, which will however trip bdrv_pad_request,
since it wants to extend its passed vector. In particular, this is the
case for the blk_co_preadv() call in stream_populate().

If there is no qiov, no operation can be done with it, but the bytes
and offset still need to be updated, so the subsequent aligned read
will actually be aligned and not run into an assertion failure.

In particular, this can happen when the request alignment of the top
node is larger than the allocated part of the bottom node, in which
case padding becomes necessary. For example:

> ./qemu-img create /tmp/backing.qcow2 -f qcow2 64M -o cluster_size=32768
> ./qemu-io -c "write -P42 0x0 0x1" /tmp/backing.qcow2
> ./qemu-img create /tmp/top.qcow2 -f qcow2 64M -b /tmp/backing.qcow2 -F qcow2
> ./qemu-system-x86_64 --qmp stdio \
> --blockdev qcow2,node-name=node0,file.driver=file,file.filename=/tmp/top.qcow2 \
> <<EOF
> {"execute": "qmp_capabilities"}
> {"execute": "blockdev-add", "arguments": { "driver": "compress", "file": "node0", "node-name": "node1" } }
> {"execute": "block-stream", "arguments": { "job-id": "stream0", "device": "node1" } }
> EOF

Originally-by: Stefan Reiter <s.reiter@proxmox.com>
Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
[FE: do update bytes and offset in any case
     add reproducer to commit message]
Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
---
 block/io.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

Comments

Stefan Hajnoczi March 19, 2024, 6:50 p.m. UTC | #1
On Tue, Mar 19, 2024 at 10:13:41AM +0100, Fiona Ebner wrote:
> From: Stefan Reiter <s.reiter@proxmox.com>
> 
> Some operations, e.g. block-stream, perform reads while discarding the
> results (only copy-on-read matters). In this case, they will pass NULL
> as the target QEMUIOVector, which will however trip bdrv_pad_request,
> since it wants to extend its passed vector. In particular, this is the
> case for the blk_co_preadv() call in stream_populate().
> 
> If there is no qiov, no operation can be done with it, but the bytes
> and offset still need to be updated, so the subsequent aligned read
> will actually be aligned and not run into an assertion failure.
> 
> In particular, this can happen when the request alignment of the top
> node is larger than the allocated part of the bottom node, in which
> case padding becomes necessary. For example:
> 
> > ./qemu-img create /tmp/backing.qcow2 -f qcow2 64M -o cluster_size=32768
> > ./qemu-io -c "write -P42 0x0 0x1" /tmp/backing.qcow2
> > ./qemu-img create /tmp/top.qcow2 -f qcow2 64M -b /tmp/backing.qcow2 -F qcow2
> > ./qemu-system-x86_64 --qmp stdio \
> > --blockdev qcow2,node-name=node0,file.driver=file,file.filename=/tmp/top.qcow2 \
> > <<EOF
> > {"execute": "qmp_capabilities"}
> > {"execute": "blockdev-add", "arguments": { "driver": "compress", "file": "node0", "node-name": "node1" } }
> > {"execute": "block-stream", "arguments": { "job-id": "stream0", "device": "node1" } }
> > EOF

Hi Fiona,
Can you add a qemu-iotests test case for this issue?

Thanks,
Stefan

> 
> Originally-by: Stefan Reiter <s.reiter@proxmox.com>
> Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
> [FE: do update bytes and offset in any case
>      add reproducer to commit message]
> Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
> ---
>  block/io.c | 31 +++++++++++++++++++------------
>  1 file changed, 19 insertions(+), 12 deletions(-)
> 
> diff --git a/block/io.c b/block/io.c
> index 33150c0359..395bea3bac 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -1726,22 +1726,29 @@ static int bdrv_pad_request(BlockDriverState *bs,
>          return 0;
>      }
>  
> -    sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
> -                                  &sliced_head, &sliced_tail,
> -                                  &sliced_niov);
> +    /*
> +     * For prefetching in stream_populate(), no qiov is passed along, because
> +     * only copy-on-read matters.
> +     */
> +    if (qiov && *qiov) {
> +        sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
> +                                      &sliced_head, &sliced_tail,
> +                                      &sliced_niov);
>  
> -    /* Guaranteed by bdrv_check_request32() */
> -    assert(*bytes <= SIZE_MAX);
> -    ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
> -                                  sliced_head, *bytes);
> -    if (ret < 0) {
> -        bdrv_padding_finalize(pad);
> -        return ret;
> +        /* Guaranteed by bdrv_check_request32() */
> +        assert(*bytes <= SIZE_MAX);
> +        ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
> +                                      sliced_head, *bytes);
> +        if (ret < 0) {
> +            bdrv_padding_finalize(pad);
> +            return ret;
> +        }
> +        *qiov = &pad->local_qiov;
> +        *qiov_offset = 0;
>      }
> +
>      *bytes += pad->head + pad->tail;
>      *offset -= pad->head;
> -    *qiov = &pad->local_qiov;
> -    *qiov_offset = 0;
>      if (padded) {
>          *padded = true;
>      }
> -- 
> 2.39.2
> 
>
diff mbox series

Patch

diff --git a/block/io.c b/block/io.c
index 33150c0359..395bea3bac 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1726,22 +1726,29 @@  static int bdrv_pad_request(BlockDriverState *bs,
         return 0;
     }
 
-    sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
-                                  &sliced_head, &sliced_tail,
-                                  &sliced_niov);
+    /*
+     * For prefetching in stream_populate(), no qiov is passed along, because
+     * only copy-on-read matters.
+     */
+    if (qiov && *qiov) {
+        sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
+                                      &sliced_head, &sliced_tail,
+                                      &sliced_niov);
 
-    /* Guaranteed by bdrv_check_request32() */
-    assert(*bytes <= SIZE_MAX);
-    ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
-                                  sliced_head, *bytes);
-    if (ret < 0) {
-        bdrv_padding_finalize(pad);
-        return ret;
+        /* Guaranteed by bdrv_check_request32() */
+        assert(*bytes <= SIZE_MAX);
+        ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
+                                      sliced_head, *bytes);
+        if (ret < 0) {
+            bdrv_padding_finalize(pad);
+            return ret;
+        }
+        *qiov = &pad->local_qiov;
+        *qiov_offset = 0;
     }
+
     *bytes += pad->head + pad->tail;
     *offset -= pad->head;
-    *qiov = &pad->local_qiov;
-    *qiov_offset = 0;
     if (padded) {
         *padded = true;
     }