diff mbox series

[v5,11/12] nbd: Expose actual depth in qemu:allocation-depth

Message ID 20201023183652.478921-12-eblake@redhat.com (mailing list archive)
State New, archived
Headers show
Series Exposing backing-chain allocation over NBD | expand

Commit Message

Eric Blake Oct. 23, 2020, 6:36 p.m. UTC
Preserve the tri-state encoding in the low bits, as that still remains
a valuable way to utilize qemu-img map with x-dirty-bitmap for
accessing quick information without needing a third-party NBD client.
But now that the block layer gives us an actual depth, we can easily
expose it in the remaining bits of our metadata context (leaving two
bits reserved, to make it easier to read depth out of a raw hex
number).  This assumes no one runs a backing chain larger than 256M
elements.

iotest 309 remains unchanged (an example of the above-mentioned
x-dirty-bitmap hack); actually testing the new bits requires libnbd or
a similar client, and I didn't want to make iotests depend on libnbd
at this point in time; rather, see the libnbd project for interop
tests that exercise this new feature.

Signed-off-by: Eric Blake <eblake@redhat.com>
---
 docs/interop/nbd.txt |  6 +++++-
 include/block/nbd.h  |  2 ++
 nbd/server.c         | 27 +++++++++++++++------------
 3 files changed, 22 insertions(+), 13 deletions(-)

Comments

Vladimir Sementsov-Ogievskiy Oct. 24, 2020, 9:59 a.m. UTC | #1
23.10.2020 21:36, Eric Blake wrote:
> Preserve the tri-state encoding in the low bits, as that still remains
> a valuable way to utilize qemu-img map with x-dirty-bitmap for
> accessing quick information without needing a third-party NBD client.

Hmm.. that doesn't sound as a good reason for redundant information in the protocol. Previously good reason was additional effort needed to implement sever part, but you've implemented it. And if we export depth anyway, it seems better to hack a bit nbd_client_co_block_status to convert extent.flags appropriately if metadata context is "qemu:allocation-depth" (to keep x-dirty-bitmap working), than have a workaround at the protocol layer.


> But now that the block layer gives us an actual depth, we can easily
> expose it in the remaining bits of our metadata context (leaving two
> bits reserved, to make it easier to read depth out of a raw hex
> number).  This assumes no one runs a backing chain larger than 256M
> elements.
> 
> iotest 309 remains unchanged (an example of the above-mentioned
> x-dirty-bitmap hack); actually testing the new bits requires libnbd or
> a similar client, and I didn't want to make iotests depend on libnbd
> at this point in time; rather, see the libnbd project for interop
> tests that exercise this new feature.
> 
> Signed-off-by: Eric Blake <eblake@redhat.com>
> ---
>   docs/interop/nbd.txt |  6 +++++-
>   include/block/nbd.h  |  2 ++
>   nbd/server.c         | 27 +++++++++++++++------------
>   3 files changed, 22 insertions(+), 13 deletions(-)
> 
> diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt
> index 7e948bd42218..d90723ffe991 100644
> --- a/docs/interop/nbd.txt
> +++ b/docs/interop/nbd.txt
> @@ -34,7 +34,8 @@ the image, with a single metadata context named:
> 
>       qemu:allocation-depth
> 
> -In the allocation depth context, bits 0 and 1 form a tri-state value:
> +In the allocation depth context, bits 0 and 1 form a tri-state value,
> +along with 28 bits giving an actual depth:
> 
>       bits 0-1: 00: NBD_STATE_DEPTH_UNALLOC, the extent is unallocated
>                 01: NBD_STATE_DEPTH_LOCAL, the extent is allocated in the
> @@ -42,6 +43,9 @@ In the allocation depth context, bits 0 and 1 form a tri-state value:
>                 10: NBD_STATE_DEPTH_BACKING, the extent is inherited from a
>                     backing layer
>                 11: invalid, never returned
> +    bits 2-3: reserved, always 0
> +    bits 4-31: NBD_STATE_DEPTH_RAW, the backing layer depth (0 if
> +               UNALLOC, 1 for LOCAL, 2 or more for BACKING)
> 
>   For NBD_OPT_LIST_META_CONTEXT the following queries are supported
>   in addition to the specific "qemu:allocation-depth" and
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index 956687f5c368..3c0692aec642 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -264,6 +264,8 @@ enum {
>   #define NBD_STATE_DEPTH_UNALLOC      0x0
>   #define NBD_STATE_DEPTH_LOCAL        0x1
>   #define NBD_STATE_DEPTH_BACKING      0x2
> +#define NBD_STATE_DEPTH_RAW_MASK     0xfffffff0
> +#define NBD_STATE_DEPTH_RAW_SHIFT    4
> 
>   static inline bool nbd_reply_type_is_error(int type)
>   {
> diff --git a/nbd/server.c b/nbd/server.c
> index 53526090b0a2..afa79e63a7a6 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -2037,22 +2037,25 @@ static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset,
>       while (bytes) {
>           uint32_t flags;
>           int64_t num;
> -        int ret = bdrv_is_allocated(bs, offset, bytes, &num);
> +        int depth = bdrv_is_allocated_above(bs, NULL, false, offset, bytes,
> +                                            &num);
> 
> -        if (ret < 0) {
> -            return ret;
> -        }
> -
> -        if (ret == 1) {
> +        switch (depth) {
> +        case 0:
> +            flags = NBD_STATE_DEPTH_UNALLOC;
> +            break;
> +        case 1:
>               flags = NBD_STATE_DEPTH_LOCAL;
> -        } else {
> -            ret = bdrv_is_allocated_above(bs, NULL, false, offset, num,
> -                                          &num);
> -            if (ret < 0) {
> -                return ret;
> +            break;
> +        default:
> +            if (depth < 0) {
> +                return depth;
>               }
> -            flags = ret ? NBD_STATE_DEPTH_BACKING : NBD_STATE_DEPTH_UNALLOC;
> +            flags = NBD_STATE_DEPTH_BACKING;
> +            break;
>           }
> +        assert(depth <= UINT32_MAX >> NBD_STATE_DEPTH_RAW_SHIFT);
> +        flags |= depth << NBD_STATE_DEPTH_RAW_SHIFT;
> 
>           if (nbd_extent_array_add(ea, num, flags) < 0) {
>               return 0;
>
Eric Blake Oct. 26, 2020, 12:31 p.m. UTC | #2
On 10/24/20 4:59 AM, Vladimir Sementsov-Ogievskiy wrote:
> 23.10.2020 21:36, Eric Blake wrote:
>> Preserve the tri-state encoding in the low bits, as that still remains
>> a valuable way to utilize qemu-img map with x-dirty-bitmap for
>> accessing quick information without needing a third-party NBD client.
> 
> Hmm.. that doesn't sound as a good reason for redundant information in
> the protocol. Previously good reason was additional effort needed to
> implement sever part, but you've implemented it. And if we export depth
> anyway, it seems better to hack a bit nbd_client_co_block_status to
> convert extent.flags appropriately if metadata context is
> "qemu:allocation-depth" (to keep x-dirty-bitmap working), than have a
> workaround at the protocol layer.

I'm happy to respin this to expose JUST a depth rather than redundant
information, but time is short if we want it in 5.2 (as soft freeze is
this week).  I'll see what I can get to today; I'll rearrange the series
to put multiple bitmap exports first (as that appears ready), while
saving 'qemu-nbd -A' until we're happy with the qemu:allocation-depth
semantics.  After all, once we release something, we've committed to
that user interface.
diff mbox series

Patch

diff --git a/docs/interop/nbd.txt b/docs/interop/nbd.txt
index 7e948bd42218..d90723ffe991 100644
--- a/docs/interop/nbd.txt
+++ b/docs/interop/nbd.txt
@@ -34,7 +34,8 @@  the image, with a single metadata context named:

     qemu:allocation-depth

-In the allocation depth context, bits 0 and 1 form a tri-state value:
+In the allocation depth context, bits 0 and 1 form a tri-state value,
+along with 28 bits giving an actual depth:

     bits 0-1: 00: NBD_STATE_DEPTH_UNALLOC, the extent is unallocated
               01: NBD_STATE_DEPTH_LOCAL, the extent is allocated in the
@@ -42,6 +43,9 @@  In the allocation depth context, bits 0 and 1 form a tri-state value:
               10: NBD_STATE_DEPTH_BACKING, the extent is inherited from a
                   backing layer
               11: invalid, never returned
+    bits 2-3: reserved, always 0
+    bits 4-31: NBD_STATE_DEPTH_RAW, the backing layer depth (0 if
+               UNALLOC, 1 for LOCAL, 2 or more for BACKING)

 For NBD_OPT_LIST_META_CONTEXT the following queries are supported
 in addition to the specific "qemu:allocation-depth" and
diff --git a/include/block/nbd.h b/include/block/nbd.h
index 956687f5c368..3c0692aec642 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -264,6 +264,8 @@  enum {
 #define NBD_STATE_DEPTH_UNALLOC      0x0
 #define NBD_STATE_DEPTH_LOCAL        0x1
 #define NBD_STATE_DEPTH_BACKING      0x2
+#define NBD_STATE_DEPTH_RAW_MASK     0xfffffff0
+#define NBD_STATE_DEPTH_RAW_SHIFT    4

 static inline bool nbd_reply_type_is_error(int type)
 {
diff --git a/nbd/server.c b/nbd/server.c
index 53526090b0a2..afa79e63a7a6 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -2037,22 +2037,25 @@  static int blockalloc_to_extents(BlockDriverState *bs, uint64_t offset,
     while (bytes) {
         uint32_t flags;
         int64_t num;
-        int ret = bdrv_is_allocated(bs, offset, bytes, &num);
+        int depth = bdrv_is_allocated_above(bs, NULL, false, offset, bytes,
+                                            &num);

-        if (ret < 0) {
-            return ret;
-        }
-
-        if (ret == 1) {
+        switch (depth) {
+        case 0:
+            flags = NBD_STATE_DEPTH_UNALLOC;
+            break;
+        case 1:
             flags = NBD_STATE_DEPTH_LOCAL;
-        } else {
-            ret = bdrv_is_allocated_above(bs, NULL, false, offset, num,
-                                          &num);
-            if (ret < 0) {
-                return ret;
+            break;
+        default:
+            if (depth < 0) {
+                return depth;
             }
-            flags = ret ? NBD_STATE_DEPTH_BACKING : NBD_STATE_DEPTH_UNALLOC;
+            flags = NBD_STATE_DEPTH_BACKING;
+            break;
         }
+        assert(depth <= UINT32_MAX >> NBD_STATE_DEPTH_RAW_SHIFT);
+        flags |= depth << NBD_STATE_DEPTH_RAW_SHIFT;

         if (nbd_extent_array_add(ea, num, flags) < 0) {
             return 0;