diff mbox series

[v2,1/4] block: nbd: Fix convert qcow2 compressed to nbd

Message ID 20200727215846.395443-2-nsoffer@redhat.com (mailing list archive)
State New, archived
Headers show
Series Fix convert to qcow2 compressed to NBD | expand

Commit Message

Nir Soffer July 27, 2020, 9:58 p.m. UTC
When converting to qcow2 compressed format, the last step is a special
zero length compressed write, ending in call to bdrv_co_truncate(). This
call always fails for the nbd driver since it does not implement
bdrv_co_truncate().

For block devices, which have the same limits, the call succeeds since
file driver implements bdrv_co_truncate(). If the caller asked to
truncate to the same or smaller size with exact=false, the truncate
succeeds. Implement the same logic for nbd.

Example failing without this change:

In one shell starts qemu-nbd:

$ truncate -s 1g test.tar
$ qemu-nbd --socket=/tmp/nbd.sock --persistent --format=raw --offset 1536 test.tar

In another shell convert an image to qcow2 compressed via NBD:

$ echo "disk data" > disk.raw
$ truncate -s 1g disk.raw
$ qemu-img convert -f raw -O qcow2 -c disk1.raw nbd+unix:///?socket=/tmp/nbd.sock; echo $?
1

qemu-img failed, but the conversion was successful:

$ qemu-img info nbd+unix:///?socket=/tmp/nbd.sock
image: nbd+unix://?socket=/tmp/nbd.sock
file format: qcow2
virtual size: 1 GiB (1073741824 bytes)
...

$ qemu-img check nbd+unix:///?socket=/tmp/nbd.sock
No errors were found on the image.
1/16384 = 0.01% allocated, 100.00% fragmented, 100.00% compressed clusters
Image end offset: 393216

$ qemu-img compare disk.raw nbd+unix:///?socket=/tmp/nbd.sock
Images are identical.

Fixes: https://bugzilla.redhat.com/1860627
Signed-off-by: Nir Soffer <nsoffer@redhat.com>
---
 block/nbd.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

Comments

Vladimir Sementsov-Ogievskiy July 28, 2020, 1:15 p.m. UTC | #1
28.07.2020 00:58, Nir Soffer wrote:
> When converting to qcow2 compressed format, the last step is a special
> zero length compressed write, ending in call to bdrv_co_truncate(). This
> call always fails for the nbd driver since it does not implement
> bdrv_co_truncate().
> 
> For block devices, which have the same limits, the call succeeds since
> file driver implements bdrv_co_truncate(). If the caller asked to
> truncate to the same or smaller size with exact=false, the truncate
> succeeds. Implement the same logic for nbd.
> 
> Example failing without this change:
> 
> In one shell starts qemu-nbd:
> 
> $ truncate -s 1g test.tar
> $ qemu-nbd --socket=/tmp/nbd.sock --persistent --format=raw --offset 1536 test.tar
> 
> In another shell convert an image to qcow2 compressed via NBD:
> 
> $ echo "disk data" > disk.raw
> $ truncate -s 1g disk.raw
> $ qemu-img convert -f raw -O qcow2 -c disk1.raw nbd+unix:///?socket=/tmp/nbd.sock; echo $?
> 1
> 
> qemu-img failed, but the conversion was successful:
> 
> $ qemu-img info nbd+unix:///?socket=/tmp/nbd.sock
> image: nbd+unix://?socket=/tmp/nbd.sock
> file format: qcow2
> virtual size: 1 GiB (1073741824 bytes)
> ...
> 
> $ qemu-img check nbd+unix:///?socket=/tmp/nbd.sock
> No errors were found on the image.
> 1/16384 = 0.01% allocated, 100.00% fragmented, 100.00% compressed clusters
> Image end offset: 393216
> 
> $ qemu-img compare disk.raw nbd+unix:///?socket=/tmp/nbd.sock
> Images are identical.
> 
> Fixes: https://bugzilla.redhat.com/1860627
> Signed-off-by: Nir Soffer <nsoffer@redhat.com>
> ---
>   block/nbd.c | 30 ++++++++++++++++++++++++++++++
>   1 file changed, 30 insertions(+)
> 
> diff --git a/block/nbd.c b/block/nbd.c
> index 65a4f56924..dcb0b03641 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -1966,6 +1966,33 @@ static void nbd_close(BlockDriverState *bs)
>       nbd_clear_bdrvstate(s);
>   }
>   
> +/*
> + * NBD cannot truncate, but if the caller asks to truncate to the same size, or
> + * to a smaller size with exact=false, there is no reason to fail the
> + * operation.
> + *
> + * Preallocation mode is ignored since it does not seems useful to fail when
> + * when never change anything.
> + */
> +static int coroutine_fn nbd_co_truncate(BlockDriverState *bs, int64_t offset,
> +                                        bool exact, PreallocMode prealloc,
> +                                        BdrvRequestFlags flags, Error **errp)
> +{
> +    BDRVNBDState *s = bs->opaque;
> +
> +    if (offset != s->info.size && exact) {
> +        error_setg(errp, "Cannot resize NBD nodes");
> +        return -ENOTSUP;
> +    }
> +
> +    if (offset > s->info.size) {
> +        error_setg(errp, "Cannot grow NBD nodes");
> +        return -EINVAL;
> +    }

I think that ENOTSUP actually is valid error code for both cases.. NBD protocol has experimental extension NBD_CMD_RESIZE, so one day we'll implement this. So, I think, it's not invalid, but just not supported yet. Still, not a big deal, so with ENOTSUP or EINVAL:
Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

Also, may be better to manage it in generic layer:
If driver doesn't implement bdrv_co_truncate handler (or return ENOTSUP), and we are truncating to the same size, or to smaller size with exact=false, we just report success? Then we'll drop same code from file-posix.c

> +
> +    return 0;
> +}
> +
>   static int64_t nbd_getlength(BlockDriverState *bs)
>   {
>       BDRVNBDState *s = bs->opaque;
> @@ -2045,6 +2072,7 @@ static BlockDriver bdrv_nbd = {
>       .bdrv_co_flush_to_os        = nbd_co_flush,
>       .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
>       .bdrv_refresh_limits        = nbd_refresh_limits,
> +    .bdrv_co_truncate           = nbd_co_truncate,
>       .bdrv_getlength             = nbd_getlength,
>       .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
>       .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
> @@ -2072,6 +2100,7 @@ static BlockDriver bdrv_nbd_tcp = {
>       .bdrv_co_flush_to_os        = nbd_co_flush,
>       .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
>       .bdrv_refresh_limits        = nbd_refresh_limits,
> +    .bdrv_co_truncate           = nbd_co_truncate,
>       .bdrv_getlength             = nbd_getlength,
>       .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
>       .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
> @@ -2099,6 +2128,7 @@ static BlockDriver bdrv_nbd_unix = {
>       .bdrv_co_flush_to_os        = nbd_co_flush,
>       .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
>       .bdrv_refresh_limits        = nbd_refresh_limits,
> +    .bdrv_co_truncate           = nbd_co_truncate,
>       .bdrv_getlength             = nbd_getlength,
>       .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
>       .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
>
Eric Blake July 28, 2020, 2:28 p.m. UTC | #2
On 7/27/20 4:58 PM, Nir Soffer wrote:
> When converting to qcow2 compressed format, the last step is a special
> zero length compressed write, ending in call to bdrv_co_truncate(). This

in a call

> call always fails for the nbd driver since it does not implement
> bdrv_co_truncate().
> 
> For block devices, which have the same limits, the call succeeds since
> file driver implements bdrv_co_truncate(). If the caller asked to

since the file

> truncate to the same or smaller size with exact=false, the truncate
> succeeds. Implement the same logic for nbd.
> 
> Example failing without this change:
> 
> In one shell starts qemu-nbd:

start

> 
> $ truncate -s 1g test.tar
> $ qemu-nbd --socket=/tmp/nbd.sock --persistent --format=raw --offset 1536 test.tar
> 
> In another shell convert an image to qcow2 compressed via NBD:
> 
> $ echo "disk data" > disk.raw
> $ truncate -s 1g disk.raw
> $ qemu-img convert -f raw -O qcow2 -c disk1.raw nbd+unix:///?socket=/tmp/nbd.sock; echo $?
> 1
> 
> qemu-img failed, but the conversion was successful:
> 
> $ qemu-img info nbd+unix:///?socket=/tmp/nbd.sock
> image: nbd+unix://?socket=/tmp/nbd.sock
> file format: qcow2
> virtual size: 1 GiB (1073741824 bytes)
> ...
> 
> $ qemu-img check nbd+unix:///?socket=/tmp/nbd.sock
> No errors were found on the image.
> 1/16384 = 0.01% allocated, 100.00% fragmented, 100.00% compressed clusters
> Image end offset: 393216
> 
> $ qemu-img compare disk.raw nbd+unix:///?socket=/tmp/nbd.sock
> Images are identical.
> 
> Fixes: https://bugzilla.redhat.com/1860627
> Signed-off-by: Nir Soffer <nsoffer@redhat.com>
> ---
>   block/nbd.c | 30 ++++++++++++++++++++++++++++++
>   1 file changed, 30 insertions(+)

Reviewed-by: Eric Blake <eblake@redhat.com>

> 
> diff --git a/block/nbd.c b/block/nbd.c
> index 65a4f56924..dcb0b03641 100644
> --- a/block/nbd.c
> +++ b/block/nbd.c
> @@ -1966,6 +1966,33 @@ static void nbd_close(BlockDriverState *bs)
>       nbd_clear_bdrvstate(s);
>   }
>   
> +/*
> + * NBD cannot truncate, but if the caller asks to truncate to the same size, or
> + * to a smaller size with exact=false, there is no reason to fail the
> + * operation.
> + *
> + * Preallocation mode is ignored since it does not seems useful to fail when
> + * when never change anything.

s/when when/when we/

I tested with a quick hack to qemu-io-cmds.c:

diff --git i/qemu-io-cmds.c w/qemu-io-cmds.c
index 851f07e8f8b9..baeae86d8c85 100644
--- i/qemu-io-cmds.c
+++ w/qemu-io-cmds.c
@@ -1715,7 +1715,7 @@ static int truncate_f(BlockBackend *blk, int argc, 
char **argv)
       * exact=true.  It is better to err on the "emit more errors" side
       * than to be overly permissive.
       */
-    ret = blk_truncate(blk, offset, true, PREALLOC_MODE_OFF, 0, 
&local_err);
+    ret = blk_truncate(blk, offset, false, PREALLOC_MODE_OFF, 0, 
&local_err);
      if (ret < 0) {
          error_report_err(local_err);
          return ret;

[We should _really_ improve that command to take options, so you can 
control whether to be exact and what prealloc mode on the fly rather 
than hard-coded, but that's a different patch for a later day]

and with that hack in place, I observed:
$ truncate --size=3m file
$ ./qemu-nbd -f raw file
$ ./qemu-io -f raw nbd://localhost:10809
qemu-io> length
3 MiB
qemu-io> truncate 2m
qemu-io> length
3 MiB
qemu-io> truncate 4m
qemu-io: Cannot grow NBD nodes
qemu-io> length
3 MiB

so my initial worry that qemu would see the shrunken size, and therefore 
a later resize back up to the actual NBD limit would have to pay 
attention to preallocation mode for that portion of the file, appears to 
not matter.  But if we can find a case where it does matter, I guess 
it's still appropriate for a followup for -rc3.  Meanwhile, I'm queuing 
this for -rc2.
Eric Blake July 28, 2020, 2:30 p.m. UTC | #3
On 7/28/20 8:15 AM, Vladimir Sementsov-Ogievskiy wrote:
> 28.07.2020 00:58, Nir Soffer wrote:
>> When converting to qcow2 compressed format, the last step is a special
>> zero length compressed write, ending in call to bdrv_co_truncate(). This
>> call always fails for the nbd driver since it does not implement
>> bdrv_co_truncate().
>>

>> +static int coroutine_fn nbd_co_truncate(BlockDriverState *bs, int64_t 
>> offset,
>> +                                        bool exact, PreallocMode 
>> prealloc,
>> +                                        BdrvRequestFlags flags, Error 
>> **errp)
>> +{
>> +    BDRVNBDState *s = bs->opaque;
>> +
>> +    if (offset != s->info.size && exact) {
>> +        error_setg(errp, "Cannot resize NBD nodes");
>> +        return -ENOTSUP;
>> +    }
>> +
>> +    if (offset > s->info.size) {
>> +        error_setg(errp, "Cannot grow NBD nodes");
>> +        return -EINVAL;
>> +    }
> 
> I think that ENOTSUP actually is valid error code for both cases.. NBD 
> protocol has experimental extension NBD_CMD_RESIZE, so one day we'll 
> implement this. So, I think, it's not invalid, but just not supported 
> yet. Still, not a big deal, so with ENOTSUP or EINVAL:
> Reviewed-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>

Indeed, if we ever get around to fully specifying how NBD_CMD_RESIZE 
should even work, we'll be revisiting this code to implement that.

> 
> Also, may be better to manage it in generic layer:
> If driver doesn't implement bdrv_co_truncate handler (or return 
> ENOTSUP), and we are truncating to the same size, or to smaller size 
> with exact=false, we just report success? Then we'll drop same code from 
> file-posix.c

That was also my question on v1; but given the closeness to the release, 
this is a minimal change appropriate for -rc2, while changing the 
generic layer may have unintended consequences.
diff mbox series

Patch

diff --git a/block/nbd.c b/block/nbd.c
index 65a4f56924..dcb0b03641 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -1966,6 +1966,33 @@  static void nbd_close(BlockDriverState *bs)
     nbd_clear_bdrvstate(s);
 }
 
+/*
+ * NBD cannot truncate, but if the caller asks to truncate to the same size, or
+ * to a smaller size with exact=false, there is no reason to fail the
+ * operation.
+ *
+ * Preallocation mode is ignored since it does not seems useful to fail when
+ * when never change anything.
+ */
+static int coroutine_fn nbd_co_truncate(BlockDriverState *bs, int64_t offset,
+                                        bool exact, PreallocMode prealloc,
+                                        BdrvRequestFlags flags, Error **errp)
+{
+    BDRVNBDState *s = bs->opaque;
+
+    if (offset != s->info.size && exact) {
+        error_setg(errp, "Cannot resize NBD nodes");
+        return -ENOTSUP;
+    }
+
+    if (offset > s->info.size) {
+        error_setg(errp, "Cannot grow NBD nodes");
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
 static int64_t nbd_getlength(BlockDriverState *bs)
 {
     BDRVNBDState *s = bs->opaque;
@@ -2045,6 +2072,7 @@  static BlockDriver bdrv_nbd = {
     .bdrv_co_flush_to_os        = nbd_co_flush,
     .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
     .bdrv_refresh_limits        = nbd_refresh_limits,
+    .bdrv_co_truncate           = nbd_co_truncate,
     .bdrv_getlength             = nbd_getlength,
     .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
     .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
@@ -2072,6 +2100,7 @@  static BlockDriver bdrv_nbd_tcp = {
     .bdrv_co_flush_to_os        = nbd_co_flush,
     .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
     .bdrv_refresh_limits        = nbd_refresh_limits,
+    .bdrv_co_truncate           = nbd_co_truncate,
     .bdrv_getlength             = nbd_getlength,
     .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
     .bdrv_attach_aio_context    = nbd_client_attach_aio_context,
@@ -2099,6 +2128,7 @@  static BlockDriver bdrv_nbd_unix = {
     .bdrv_co_flush_to_os        = nbd_co_flush,
     .bdrv_co_pdiscard           = nbd_client_co_pdiscard,
     .bdrv_refresh_limits        = nbd_refresh_limits,
+    .bdrv_co_truncate           = nbd_co_truncate,
     .bdrv_getlength             = nbd_getlength,
     .bdrv_detach_aio_context    = nbd_client_detach_aio_context,
     .bdrv_attach_aio_context    = nbd_client_attach_aio_context,