diff mbox

[RFC,1/8] block: Introduce bdrv_co_map_range API

Message ID 20180329110914.20888-2-famz@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Fam Zheng March 29, 2018, 11:09 a.m. UTC
A little similar to bdrv_co_block_status but with the capability to do
allocation depending on the BDRV_REQ_ALLOCATE flag, this API is the
building block needed by copy offloading to work for format drivers.

The idea is the format drivers return "where" to copy from/to in its
underlying "file", then the block layer will drill down until it reaches
a protocol layer that can do offloaded copying.

Signed-off-by: Fam Zheng <famz@redhat.com>
---
 block/io.c                | 44 ++++++++++++++++++++++++++++++++++++++++++++
 include/block/block.h     |  8 +++++++-
 include/block/block_int.h | 21 +++++++++++++++++++++
 3 files changed, 72 insertions(+), 1 deletion(-)

Comments

Stefan Hajnoczi April 4, 2018, 12:57 p.m. UTC | #1
On Thu, Mar 29, 2018 at 07:09:07PM +0800, Fam Zheng wrote:
> diff --git a/block/io.c b/block/io.c
> index bd9a19a9c4..1b4cfcacb1 100644
> --- a/block/io.c
> +++ b/block/io.c
> @@ -2826,3 +2826,47 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host)
>          bdrv_unregister_buf(child->bs, host);
>      }
>  }
> +
> +int coroutine_fn bdrv_co_map_range(BdrvChild *child, int64_t offset,
> +                                   int64_t bytes, int64_t *pnum, int64_t *map,

s/map/file_offset/ is clearer, since this function has self-expanatory
'offset' and 'file' arguments.  This is the "offset within the file" ->
file_offset.

> +    /**
> +     * Get or create offset mappings for a virtual range.
> +     *
> +     * @offset: the offset of the beginning of the range.

Adding the units and avoiding reusing "offset" in the description:

"byte position of the beginning of the range"

> +     * @bytes: the maximum bytes of the range to map.
> +     * @pnum: at return, this will point to the number of bytes in the returned
> +     * mapping status
> +     * @map: at return, this will point to the offset which the range maps to
> +     * @file: at return, this will point to the file where the data is mapped.
> +     * If it is set to bs, it means the mapping is terminal and the result can
> +     * be used for read/write and copy_range; if it is NULL, it means the range
> +     * doesn't map to a file, or no I/O to any file is necessary; otherwise, it
> +     * means the query should be passed down to an underlying format/protocol.

What does "the query should be passed down to an underlying
format/protocol" mean?

> +     * @flags: flags for the request. BDRV_REQ_ALLOCATE means the range
> +     * should be allocated if necessary.
> +     */

What are return values?

Please include a statement describing how long the mapping is considered
stable.  For example, a qcow2 copy-on-write could "move" the data
somewhere else.  Therefore it's not safe to store the value of "map" and
use it again later on.

Additionally, please explain what the caller is allowed to do with the
returned information.  Using qcow2 copy-on-write as an example again, it
is not okay to write to "map" since that bypasses qcow2's COW mechanism.
Stefan Hajnoczi April 4, 2018, 1:01 p.m. UTC | #2
On Thu, Mar 29, 2018 at 07:09:07PM +0800, Fam Zheng wrote:
> +int coroutine_fn bdrv_co_map_range(BdrvChild *child, int64_t offset,
> +                                   int64_t bytes, int64_t *pnum, int64_t *map,

I just noticed that .bdrv_co_block_status() also calls the argument
"map".  In that case, let's keep it "map" unless you want to rename
"map" to "file_offset" in .bdrv_co_block_status() too.

Stefan
diff mbox

Patch

diff --git a/block/io.c b/block/io.c
index bd9a19a9c4..1b4cfcacb1 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2826,3 +2826,47 @@  void bdrv_unregister_buf(BlockDriverState *bs, void *host)
         bdrv_unregister_buf(child->bs, host);
     }
 }
+
+int coroutine_fn bdrv_co_map_range(BdrvChild *child, int64_t offset,
+                                   int64_t bytes, int64_t *pnum, int64_t *map,
+                                   BlockDriverState **file,
+                                   int flags)
+{
+    BlockDriverState *bs = child->bs;
+
+    if (bs->encrypted) {
+        return -ENOTSUP;
+    }
+    while (bs && bs->drv && bs->drv->bdrv_co_map_range) {
+        int64_t cur_pnum, cur_map;
+        BlockDriverState *cur_file = NULL;
+        int r = bs->drv->bdrv_co_map_range(bs, offset, bytes,
+                                           &cur_pnum, &cur_map, &cur_file,
+                                           flags);
+        if (r < 0) {
+            return r;
+        }
+        offset = cur_map;
+        bytes = cur_pnum;
+        if (pnum) {
+            *pnum = cur_pnum;
+        }
+        if (map) {
+            *map = cur_map;
+        }
+        if (!(r & BDRV_BLOCK_OFFSET_VALID)) {
+            return -ENOTSUP;
+        }
+        if (file) {
+            *file = cur_file;
+        }
+        if (!cur_file) {
+            return -ENOTSUP;
+        }
+        if (cur_file == bs) {
+            return r;
+        }
+        bs = cur_file;
+    }
+    return -ENOTSUP;
+}
diff --git a/include/block/block.h b/include/block/block.h
index cdec3639a3..21d3e9db32 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -53,9 +53,10 @@  typedef enum {
     BDRV_REQ_NO_SERIALISING     = 0x8,
     BDRV_REQ_FUA                = 0x10,
     BDRV_REQ_WRITE_COMPRESSED   = 0x20,
+    BDRV_REQ_ALLOCATE           = 0x40,
 
     /* Mask of valid flags */
-    BDRV_REQ_MASK               = 0x3f,
+    BDRV_REQ_MASK               = 0x7f,
 } BdrvRequestFlags;
 
 typedef struct BlockSizes {
@@ -604,4 +605,9 @@  bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
  */
 void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
 void bdrv_unregister_buf(BlockDriverState *bs, void *host);
+
+int bdrv_co_map_range(BdrvChild *child, int64_t offset,
+                      int64_t bytes, int64_t *pnum, int64_t *map,
+                      BlockDriverState **file,
+                      int flags);
 #endif
diff --git a/include/block/block_int.h b/include/block/block_int.h
index c4dd1d4bb8..3f3f6f3efd 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -475,6 +475,27 @@  struct BlockDriver {
      */
     void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
     void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
+
+    /**
+     * Get or create offset mappings for a virtual range.
+     *
+     * @offset: the offset of the beginning of the range.
+     * @bytes: the maximum bytes of the range to map.
+     * @pnum: at return, this will point to the number of bytes in the returned
+     * mapping status
+     * @map: at return, this will point to the offset which the range maps to
+     * @file: at return, this will point to the file where the data is mapped.
+     * If it is set to bs, it means the mapping is terminal and the result can
+     * be used for read/write and copy_range; if it is NULL, it means the range
+     * doesn't map to a file, or no I/O to any file is necessary; otherwise, it
+     * means the query should be passed down to an underlying format/protocol.
+     * @flags: flags for the request. BDRV_REQ_ALLOCATE means the range
+     * should be allocated if necessary.
+     */
+    int (*bdrv_co_map_range)(BlockDriverState *bs, int64_t offset,
+                             int64_t bytes, int64_t *pnum, int64_t *map,
+                             BlockDriverState **file,
+                             int flags);
     QLIST_ENTRY(BlockDriver) list;
 };