diff mbox

[2/5] qcow2: Implement .bdrv_co_preadv()

Message ID 1464974478-23598-3-git-send-email-kwolf@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kevin Wolf June 3, 2016, 5:21 p.m. UTC
Reading from qcow2 images is now byte granularity.

Most of the affected code in qcow2 actually gets simpler with this
change. The only exception is encryption, which is fixed on 512 bytes
blocks; in order to keep this working, bs->request_alignment is set for
encrypted images.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2-cluster.c |  18 ++++-----
 block/qcow2.c         | 108 +++++++++++++++++++++++++++-----------------------
 block/qcow2.h         |   2 +-
 3 files changed, 67 insertions(+), 61 deletions(-)

Comments

Eric Blake June 3, 2016, 7:18 p.m. UTC | #1
On 06/03/2016 11:21 AM, Kevin Wolf wrote:
> Reading from qcow2 images is now byte granularity.
> 
> Most of the affected code in qcow2 actually gets simpler with this
> change. The only exception is encryption, which is fixed on 512 bytes
> blocks; in order to keep this working, bs->request_alignment is set for
> encrypted images.
> 
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  block/qcow2-cluster.c |  18 ++++-----
>  block/qcow2.c         | 108 +++++++++++++++++++++++++++-----------------------
>  block/qcow2.h         |   2 +-
>  3 files changed, 67 insertions(+), 61 deletions(-)
> 

> @@ -467,16 +468,16 @@ out:
>   * For a given offset of the disk image, find the cluster offset in
>   * qcow2 file. The offset is stored in *cluster_offset.
>   *
> - * on entry, *num is the number of contiguous sectors we'd like to
> + * on entry, *bytes is the number of contiguous bytes we'd like to

maybe s/number/maximum number/

>   * access following offset.
>   *
> - * on exit, *num is the number of contiguous sectors we can read.
> + * on exit, *bytes is the number of contiguous bytes we can read.

maybe s/we can read/with the same cluster type/

>   *
>   * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
>   * cases.
>   */
>  int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
> -    int *num, uint64_t *cluster_offset)
> +                             unsigned int *bytes, uint64_t *cluster_offset)
>  {
>      BDRVQcow2State *s = bs->opaque;
>      unsigned int l2_index;
> @@ -485,12 +486,9 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
>      unsigned int offset_in_cluster, nb_clusters;
>      uint64_t bytes_available, bytes_needed;
>      int ret;
> -    unsigned int bytes;
> -
> -    bytes = *num * BDRV_SECTOR_SIZE;

One potential overflow gone...

>  
>      offset_in_cluster = offset_into_cluster(s, offset);
> -    bytes_needed = bytes + offset_in_cluster;
> +    bytes_needed = *bytes + offset_in_cluster;

...but not the other.  Looks like your callers limit their input 'bytes'
to at most INT_MAX, and therefore it happens to not overflow unsigned
int in practice, but you may want an assertion?

> +++ b/block/qcow2.c
> @@ -975,6 +975,9 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
>          }
>  
>          bs->encrypted = 1;
> +
> +        /* Encryption works on a sector granularity */
> +        bs->request_alignment = BDRV_SECTOR_SIZE;

Trivial conflict with my patch 5/5 that moves request_alignment into
BlockLimits (if we even want that, since I still have to find why my
patch makes qemu-iotests 77 hang)

>  
> -static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
> -                          int remaining_sectors, QEMUIOVector *qiov)
> +static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
> +                                        uint64_t bytes, QEMUIOVector *qiov,
> +                                        int flags)

Wait a minute.  .bdrv_co_preadv() takes uint64_t bytes, while
bdrv_co_preadv() takes only unsigned int bytes?  Eww.  We've got some
more scrubbing work to do.  At least it is going to get easier to
universally turn on full 64-bit byte interfaces everywhere, especially
once my patches for auto-fragmenting at max_transfer_length land (which
in turn won't be posted before your conversion of bdrv_aligned_preadv()
to a byte interface).  So no impact to this patch.

>  {
>      BDRVQcow2State *s = bs->opaque;
> -    int index_in_cluster, n1;
> +    int offset_in_cluster, n1;
>      int ret;
> -    int cur_nr_sectors; /* number of sectors in current iteration */
> +    unsigned int cur_bytes; /* number of sectors in current iteration */

comment is stale now

>      uint64_t cluster_offset = 0;
>      uint64_t bytes_done = 0;
>      QEMUIOVector hd_qiov;
> @@ -1389,26 +1402,24 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
>  
>      qemu_co_mutex_lock(&s->lock);
>  
> -    while (remaining_sectors != 0) {
> +    while (bytes != 0) {
>  
>          /* prepare next request */
> -        cur_nr_sectors = remaining_sectors;
> +        cur_bytes = MIN(bytes, INT_MAX);
>          if (s->cipher) {
> -            cur_nr_sectors = MIN(cur_nr_sectors,
> -                QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
> +            cur_bytes = MIN(cur_bytes,
> +                            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);

Again, my work on auto-fragmenting at the block layer should make it so
that we can eventually further simplify this part to just assert that
bytes doesn't exceed max_transfer_length, rather than having to fragment
it at INT_MAX ourselves.

Couple of tweaks to fix as pointed out above, but mostly looks sane.
diff mbox

Patch

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index b2405b1..9fb7f9f 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -424,7 +424,8 @@  static int coroutine_fn copy_sectors(BlockDriverState *bs,
      * interface.  This avoids double I/O throttling and request tracking,
      * which can lead to deadlock when block layer copy-on-read is enabled.
      */
-    ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
+    ret = bs->drv->bdrv_co_preadv(bs, (start_sect + n_start) * BDRV_SECTOR_SIZE,
+                                  n * BDRV_SECTOR_SIZE, &qiov, 0);
     if (ret < 0) {
         goto out;
     }
@@ -467,16 +468,16 @@  out:
  * For a given offset of the disk image, find the cluster offset in
  * qcow2 file. The offset is stored in *cluster_offset.
  *
- * on entry, *num is the number of contiguous sectors we'd like to
+ * on entry, *bytes is the number of contiguous bytes we'd like to
  * access following offset.
  *
- * on exit, *num is the number of contiguous sectors we can read.
+ * on exit, *bytes is the number of contiguous bytes we can read.
  *
  * Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
  * cases.
  */
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *cluster_offset)
+                             unsigned int *bytes, uint64_t *cluster_offset)
 {
     BDRVQcow2State *s = bs->opaque;
     unsigned int l2_index;
@@ -485,12 +486,9 @@  int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     unsigned int offset_in_cluster, nb_clusters;
     uint64_t bytes_available, bytes_needed;
     int ret;
-    unsigned int bytes;
-
-    bytes = *num * BDRV_SECTOR_SIZE;
 
     offset_in_cluster = offset_into_cluster(s, offset);
-    bytes_needed = bytes + offset_in_cluster;
+    bytes_needed = *bytes + offset_in_cluster;
 
     l1_bits = s->l2_bits + s->cluster_bits;
 
@@ -594,9 +592,7 @@  out:
         bytes_available = bytes_needed;
     }
 
-    bytes = bytes_available - offset_in_cluster;
-    assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
-    *num = bytes >> BDRV_SECTOR_BITS;
+    *bytes = bytes_available - offset_in_cluster;
 
     return ret;
 
diff --git a/block/qcow2.c b/block/qcow2.c
index 6f5fb81..b498753 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -975,6 +975,9 @@  static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
         }
 
         bs->encrypted = 1;
+
+        /* Encryption works on a sector granularity */
+        bs->request_alignment = BDRV_SECTOR_SIZE;
     }
 
     s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
@@ -1331,16 +1334,20 @@  static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
     BDRVQcow2State *s = bs->opaque;
     uint64_t cluster_offset;
     int index_in_cluster, ret;
+    unsigned int bytes;
     int64_t status = 0;
 
-    *pnum = nb_sectors;
+    bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE);
     qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
+    ret = qcow2_get_cluster_offset(bs, sector_num << 9, &bytes,
+                                   &cluster_offset);
     qemu_co_mutex_unlock(&s->lock);
     if (ret < 0) {
         return ret;
     }
 
+    *pnum = bytes >> BDRV_SECTOR_BITS;
+
     if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
         !s->cipher) {
         index_in_cluster = sector_num & (s->cluster_sectors - 1);
@@ -1358,28 +1365,34 @@  static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
 
 /* handle reading after the end of the backing file */
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
-                  int64_t sector_num, int nb_sectors)
+                        int64_t offset, int bytes)
 {
+    uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
     int n1;
-    if ((sector_num + nb_sectors) <= bs->total_sectors)
-        return nb_sectors;
-    if (sector_num >= bs->total_sectors)
+
+    if ((offset + bytes) <= bs_size) {
+        return bytes;
+    }
+
+    if (offset >= bs_size) {
         n1 = 0;
-    else
-        n1 = bs->total_sectors - sector_num;
+    } else {
+        n1 = bs_size - offset;
+    }
 
-    qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
+    qemu_iovec_memset(qiov, n1, 0, bytes - n1);
 
     return n1;
 }
 
-static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
-                          int remaining_sectors, QEMUIOVector *qiov)
+static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
+                                        uint64_t bytes, QEMUIOVector *qiov,
+                                        int flags)
 {
     BDRVQcow2State *s = bs->opaque;
-    int index_in_cluster, n1;
+    int offset_in_cluster, n1;
     int ret;
-    int cur_nr_sectors; /* number of sectors in current iteration */
+    unsigned int cur_bytes; /* number of sectors in current iteration */
     uint64_t cluster_offset = 0;
     uint64_t bytes_done = 0;
     QEMUIOVector hd_qiov;
@@ -1389,26 +1402,24 @@  static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
 
     qemu_co_mutex_lock(&s->lock);
 
-    while (remaining_sectors != 0) {
+    while (bytes != 0) {
 
         /* prepare next request */
-        cur_nr_sectors = remaining_sectors;
+        cur_bytes = MIN(bytes, INT_MAX);
         if (s->cipher) {
-            cur_nr_sectors = MIN(cur_nr_sectors,
-                QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+            cur_bytes = MIN(cur_bytes,
+                            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
         }
 
-        ret = qcow2_get_cluster_offset(bs, sector_num << 9,
-            &cur_nr_sectors, &cluster_offset);
+        ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
         if (ret < 0) {
             goto fail;
         }
 
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
+        offset_in_cluster = offset_into_cluster(s, offset);
 
         qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
-            cur_nr_sectors * 512);
+        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
 
         switch (ret) {
         case QCOW2_CLUSTER_UNALLOCATED:
@@ -1416,18 +1427,17 @@  static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
             if (bs->backing) {
                 /* read from the base image */
                 n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
-                    sector_num, cur_nr_sectors);
+                                         offset, cur_bytes);
                 if (n1 > 0) {
                     QEMUIOVector local_qiov;
 
                     qemu_iovec_init(&local_qiov, hd_qiov.niov);
-                    qemu_iovec_concat(&local_qiov, &hd_qiov, 0,
-                                      n1 * BDRV_SECTOR_SIZE);
+                    qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
 
                     BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
                     qemu_co_mutex_unlock(&s->lock);
-                    ret = bdrv_co_readv(bs->backing->bs, sector_num,
-                                        n1, &local_qiov);
+                    ret = bdrv_co_preadv(bs->backing->bs, offset, n1,
+                                         &local_qiov, 0);
                     qemu_co_mutex_lock(&s->lock);
 
                     qemu_iovec_destroy(&local_qiov);
@@ -1438,12 +1448,12 @@  static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                 }
             } else {
                 /* Note: in this case, no need to wait */
-                qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+                qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
             }
             break;
 
         case QCOW2_CLUSTER_ZERO:
-            qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+            qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
             break;
 
         case QCOW2_CLUSTER_COMPRESSED:
@@ -1454,8 +1464,8 @@  static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
             }
 
             qemu_iovec_from_buf(&hd_qiov, 0,
-                s->cluster_cache + index_in_cluster * 512,
-                512 * cur_nr_sectors);
+                                s->cluster_cache + offset_in_cluster,
+                                cur_bytes);
             break;
 
         case QCOW2_CLUSTER_NORMAL:
@@ -1482,34 +1492,34 @@  static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                     }
                 }
 
-                assert(cur_nr_sectors <=
-                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+                assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
                 qemu_iovec_reset(&hd_qiov);
-                qemu_iovec_add(&hd_qiov, cluster_data,
-                    512 * cur_nr_sectors);
+                qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
             }
 
             BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
             qemu_co_mutex_unlock(&s->lock);
-            ret = bdrv_co_readv(bs->file->bs,
-                                (cluster_offset >> 9) + index_in_cluster,
-                                cur_nr_sectors, &hd_qiov);
+            ret = bdrv_co_preadv(bs->file->bs,
+                                 cluster_offset + offset_in_cluster,
+                                 cur_bytes, &hd_qiov, 0);
             qemu_co_mutex_lock(&s->lock);
             if (ret < 0) {
                 goto fail;
             }
             if (bs->encrypted) {
                 assert(s->cipher);
+                assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+                assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
                 Error *err = NULL;
-                if (qcow2_encrypt_sectors(s, sector_num,  cluster_data,
-                                          cluster_data, cur_nr_sectors, false,
-                                          &err) < 0) {
+                if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS,
+                                          cluster_data, cluster_data,
+                                          cur_bytes >> BDRV_SECTOR_BITS,
+                                          false, &err) < 0) {
                     error_free(err);
                     ret = -EIO;
                     goto fail;
                 }
-                qemu_iovec_from_buf(qiov, bytes_done,
-                    cluster_data, 512 * cur_nr_sectors);
+                qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
             }
             break;
 
@@ -1519,9 +1529,9 @@  static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
             goto fail;
         }
 
-        remaining_sectors -= cur_nr_sectors;
-        sector_num += cur_nr_sectors;
-        bytes_done += cur_nr_sectors * 512;
+        bytes -= cur_bytes;
+        offset += cur_bytes;
+        bytes_done += cur_bytes;
     }
     ret = 0;
 
@@ -2435,7 +2445,7 @@  static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
     if (head || tail) {
         int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
         uint64_t off;
-        int nr;
+        unsigned int nr;
 
         assert(head + count <= s->cluster_size);
 
@@ -2452,7 +2462,7 @@  static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
         /* We can have new write after previous check */
         offset = cl_start << BDRV_SECTOR_BITS;
         count = s->cluster_size;
-        nr = s->cluster_sectors;
+        nr = s->cluster_size;
         ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
         if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) {
             qemu_co_mutex_unlock(&s->lock);
@@ -3368,7 +3378,7 @@  BlockDriver bdrv_qcow2 = {
     .bdrv_co_get_block_status = qcow2_co_get_block_status,
     .bdrv_set_key       = qcow2_set_key,
 
-    .bdrv_co_readv          = qcow2_co_readv,
+    .bdrv_co_preadv         = qcow2_co_preadv,
     .bdrv_co_writev         = qcow2_co_writev,
     .bdrv_co_flush_to_os    = qcow2_co_flush_to_os,
 
diff --git a/block/qcow2.h b/block/qcow2.h
index 7db9795..e2c42d5 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -544,7 +544,7 @@  int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
                           int nb_sectors, bool enc, Error **errp);
 
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
-    int *num, uint64_t *cluster_offset);
+                             unsigned int *bytes, uint64_t *cluster_offset);
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
     int *num, uint64_t *host_offset, QCowL2Meta **m);
 uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,