diff mbox

[v7,04/16] qcow2: add qcow2_co_pwritev_compressed

Message ID 1469175475-15420-5-git-send-email-den@openvz.org (mailing list archive)
State New, archived
Headers show

Commit Message

Denis V. Lunev July 22, 2016, 8:17 a.m. UTC
From: Pavel Butsykin <pbutsykin@virtuozzo.com>

Added implementation of the qcow2_co_pwritev_compressed function that
will allow us to safely use compressed writes for the qcow2 from running
VMs.

Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Jeff Cody <jcody@redhat.com>
CC: Markus Armbruster <armbru@redhat.com>
CC: Eric Blake <eblake@redhat.com>
CC: John Snow <jsnow@redhat.com>
CC: Stefan Hajnoczi <stefanha@redhat.com>
CC: Kevin Wolf <kwolf@redhat.com>
---
 block/qcow2.c | 124 +++++++++++++++++++++++-----------------------------------
 1 file changed, 50 insertions(+), 74 deletions(-)

Comments

Kevin Wolf Aug. 8, 2016, 1:44 p.m. UTC | #1
Am 22.07.2016 um 10:17 hat Denis V. Lunev geschrieben:
> From: Pavel Butsykin <pbutsykin@virtuozzo.com>
> 
> Added implementation of the qcow2_co_pwritev_compressed function that
> will allow us to safely use compressed writes for the qcow2 from running
> VMs.
> 
> Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> Signed-off-by: Denis V. Lunev <den@openvz.org>
> CC: Jeff Cody <jcody@redhat.com>
> CC: Markus Armbruster <armbru@redhat.com>
> CC: Eric Blake <eblake@redhat.com>
> CC: John Snow <jsnow@redhat.com>
> CC: Stefan Hajnoczi <stefanha@redhat.com>
> CC: Kevin Wolf <kwolf@redhat.com>

>  /* XXX: put compressed sectors first, then all the cluster aligned
>     tables to avoid losing bytes in alignment */
> -static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
> -                                  const uint8_t *buf, int nb_sectors)
> +static coroutine_fn int
> +qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
> +                            uint64_t bytes, QEMUIOVector *qiov)
>  {
>      BDRVQcow2State *s = bs->opaque;
> +    QEMUIOVector hd_qiov;
> +    struct iovec iov;
>      z_stream strm;
>      int ret, out_len;
> -    uint8_t *out_buf;
> +    uint8_t *buf, *out_buf;
>      uint64_t cluster_offset;
>  
> -    if (nb_sectors == 0) {
> +    if (bytes == 0) {
>          /* align end of file to a sector boundary to ease reading with
>             sector based I/Os */
>          cluster_offset = bdrv_getlength(bs->file->bs);
>          return bdrv_truncate(bs->file->bs, cluster_offset);
>      }
>  
> -    if (nb_sectors != s->cluster_sectors) {
> +    if (bytes != s->cluster_size) {
>          ret = -EINVAL;
>  
>          /* Zero-pad last write if image size is not cluster aligned */
> -        if (sector_num + nb_sectors == bs->total_sectors &&
> -            nb_sectors < s->cluster_sectors) {
> +        if (offset + bytes == bs->total_sectors << BDRV_SECTOR_BITS &&
> +            bytes < s->cluster_size)
> +        {
>              uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
>              memset(pad_buf, 0, s->cluster_size);
> -            memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
> -            ret = qcow2_write_compressed(bs, sector_num,
> -                                         pad_buf, s->cluster_sectors);
> +            qemu_iovec_to_buf(qiov, 0, pad_buf, s->cluster_size);

I think, bytes would be more exact than s->cluster_size here. Anyway,
shouldn't hurt because we stop at the end of the qiov anyway and its
size should match bytes.

> +            iov = (struct iovec) {
> +                .iov_base   = pad_buf,
> +                .iov_len    = s->cluster_size,
> +            };
> +            qemu_iovec_init_external(&hd_qiov, &iov, 1);
> +            ret = qcow2_co_pwritev_compressed(bs, offset, bytes, &hd_qiov);
>              qemu_vfree(pad_buf);
>          }
>          return ret;
>      }

Kevin
Pavel Butsykin Aug. 15, 2016, 8:53 a.m. UTC | #2
On 08.08.2016 16:44, Kevin Wolf wrote:
> Am 22.07.2016 um 10:17 hat Denis V. Lunev geschrieben:
>> From: Pavel Butsykin <pbutsykin@virtuozzo.com>
>>
>> Added implementation of the qcow2_co_pwritev_compressed function that
>> will allow us to safely use compressed writes for the qcow2 from running
>> VMs.
>>
>> Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>
>> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
>> Signed-off-by: Denis V. Lunev <den@openvz.org>
>> CC: Jeff Cody <jcody@redhat.com>
>> CC: Markus Armbruster <armbru@redhat.com>
>> CC: Eric Blake <eblake@redhat.com>
>> CC: John Snow <jsnow@redhat.com>
>> CC: Stefan Hajnoczi <stefanha@redhat.com>
>> CC: Kevin Wolf <kwolf@redhat.com>
>
>>   /* XXX: put compressed sectors first, then all the cluster aligned
>>      tables to avoid losing bytes in alignment */
>> -static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
>> -                                  const uint8_t *buf, int nb_sectors)
>> +static coroutine_fn int
>> +qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
>> +                            uint64_t bytes, QEMUIOVector *qiov)
>>   {
>>       BDRVQcow2State *s = bs->opaque;
>> +    QEMUIOVector hd_qiov;
>> +    struct iovec iov;
>>       z_stream strm;
>>       int ret, out_len;
>> -    uint8_t *out_buf;
>> +    uint8_t *buf, *out_buf;
>>       uint64_t cluster_offset;
>>
>> -    if (nb_sectors == 0) {
>> +    if (bytes == 0) {
>>           /* align end of file to a sector boundary to ease reading with
>>              sector based I/Os */
>>           cluster_offset = bdrv_getlength(bs->file->bs);
>>           return bdrv_truncate(bs->file->bs, cluster_offset);
>>       }
>>
>> -    if (nb_sectors != s->cluster_sectors) {
>> +    if (bytes != s->cluster_size) {
>>           ret = -EINVAL;
>>
>>           /* Zero-pad last write if image size is not cluster aligned */
>> -        if (sector_num + nb_sectors == bs->total_sectors &&
>> -            nb_sectors < s->cluster_sectors) {
>> +        if (offset + bytes == bs->total_sectors << BDRV_SECTOR_BITS &&
>> +            bytes < s->cluster_size)
>> +        {
>>               uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
>>               memset(pad_buf, 0, s->cluster_size);
>> -            memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
>> -            ret = qcow2_write_compressed(bs, sector_num,
>> -                                         pad_buf, s->cluster_sectors);
>> +            qemu_iovec_to_buf(qiov, 0, pad_buf, s->cluster_size);
>
> I think, bytes would be more exact than s->cluster_size here. Anyway,
> shouldn't hurt because we stop at the end of the qiov anyway and its
> size should match bytes.

Yes, I can send follow-up patches.
>
>> +            iov = (struct iovec) {
>> +                .iov_base   = pad_buf,
>> +                .iov_len    = s->cluster_size,
>> +            };
>> +            qemu_iovec_init_external(&hd_qiov, &iov, 1);
>> +            ret = qcow2_co_pwritev_compressed(bs, offset, bytes, &hd_qiov);
>>               qemu_vfree(pad_buf);
>>           }
>>           return ret;
>>       }
>
> Kevin
>
Kevin Wolf Aug. 15, 2016, 10:11 a.m. UTC | #3
Am 15.08.2016 um 11:39 hat Pavel Butsykin geschrieben:
> Use bytes as the size would be more exact than s->cluster_size.  Although
> qemu_iovec_to_buf() will not allow to go beyond the qiov.
> 
> Signed-off-by: Pavel Butsykin <pbutsykin@virtuozzo.com>

Thanks, applied to block-next.

Kevin
diff mbox

Patch

diff --git a/block/qcow2.c b/block/qcow2.c
index d620d0a..b5c69df 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2533,84 +2533,49 @@  static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
     return 0;
 }
 
-typedef struct Qcow2WriteCo {
-    BlockDriverState *bs;
-    int64_t sector_num;
-    const uint8_t *buf;
-    int nb_sectors;
-    int ret;
-} Qcow2WriteCo;
-
-static void qcow2_write_co_entry(void *opaque)
-{
-    Qcow2WriteCo *co = opaque;
-    QEMUIOVector qiov;
-    uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
-    uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
-
-    struct iovec iov = (struct iovec) {
-        .iov_base   = (uint8_t*) co->buf,
-        .iov_len    = bytes,
-    };
-    qemu_iovec_init_external(&qiov, &iov, 1);
-
-    co->ret = qcow2_co_pwritev(co->bs, offset, bytes, &qiov, 0);
-}
-
-/* Wrapper for non-coroutine contexts */
-static int qcow2_write(BlockDriverState *bs, int64_t sector_num,
-                       const uint8_t *buf, int nb_sectors)
-{
-    Coroutine *co;
-    AioContext *aio_context = bdrv_get_aio_context(bs);
-    Qcow2WriteCo data = {
-        .bs         = bs,
-        .sector_num = sector_num,
-        .buf        = buf,
-        .nb_sectors = nb_sectors,
-        .ret        = -EINPROGRESS,
-    };
-    co = qemu_coroutine_create(qcow2_write_co_entry, &data);
-    qemu_coroutine_enter(co);
-    while (data.ret == -EINPROGRESS) {
-        aio_poll(aio_context, true);
-    }
-    return data.ret;
-}
-
 /* XXX: put compressed sectors first, then all the cluster aligned
    tables to avoid losing bytes in alignment */
-static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                                  const uint8_t *buf, int nb_sectors)
+static coroutine_fn int
+qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
+                            uint64_t bytes, QEMUIOVector *qiov)
 {
     BDRVQcow2State *s = bs->opaque;
+    QEMUIOVector hd_qiov;
+    struct iovec iov;
     z_stream strm;
     int ret, out_len;
-    uint8_t *out_buf;
+    uint8_t *buf, *out_buf;
     uint64_t cluster_offset;
 
-    if (nb_sectors == 0) {
+    if (bytes == 0) {
         /* align end of file to a sector boundary to ease reading with
            sector based I/Os */
         cluster_offset = bdrv_getlength(bs->file->bs);
         return bdrv_truncate(bs->file->bs, cluster_offset);
     }
 
-    if (nb_sectors != s->cluster_sectors) {
+    if (bytes != s->cluster_size) {
         ret = -EINVAL;
 
         /* Zero-pad last write if image size is not cluster aligned */
-        if (sector_num + nb_sectors == bs->total_sectors &&
-            nb_sectors < s->cluster_sectors) {
+        if (offset + bytes == bs->total_sectors << BDRV_SECTOR_BITS &&
+            bytes < s->cluster_size)
+        {
             uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
             memset(pad_buf, 0, s->cluster_size);
-            memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
-            ret = qcow2_write_compressed(bs, sector_num,
-                                         pad_buf, s->cluster_sectors);
+            qemu_iovec_to_buf(qiov, 0, pad_buf, s->cluster_size);
+            iov = (struct iovec) {
+                .iov_base   = pad_buf,
+                .iov_len    = s->cluster_size,
+            };
+            qemu_iovec_init_external(&hd_qiov, &iov, 1);
+            ret = qcow2_co_pwritev_compressed(bs, offset, bytes, &hd_qiov);
             qemu_vfree(pad_buf);
         }
         return ret;
     }
+    buf = qemu_blockalign(bs, s->cluster_size);
+    qemu_iovec_to_buf(qiov, 0, buf, s->cluster_size);
 
     out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
 
@@ -2641,33 +2606,44 @@  static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
 
     if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
         /* could not compress: write normal cluster */
-        ret = qcow2_write(bs, sector_num, buf, s->cluster_sectors);
+        ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
         if (ret < 0) {
             goto fail;
         }
-    } else {
-        cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
-            sector_num << 9, out_len);
-        if (!cluster_offset) {
-            ret = -EIO;
-            goto fail;
-        }
-        cluster_offset &= s->cluster_offset_mask;
+        goto success;
+    }
 
-        ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
-        if (ret < 0) {
-            goto fail;
-        }
+    qemu_co_mutex_lock(&s->lock);
+    cluster_offset =
+        qcow2_alloc_compressed_cluster_offset(bs, offset, out_len);
+    if (!cluster_offset) {
+        qemu_co_mutex_unlock(&s->lock);
+        ret = -EIO;
+        goto fail;
+    }
+    cluster_offset &= s->cluster_offset_mask;
 
-        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
-        ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
-        if (ret < 0) {
-            goto fail;
-        }
+    ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
+    qemu_co_mutex_unlock(&s->lock);
+    if (ret < 0) {
+        goto fail;
     }
 
+    iov = (struct iovec) {
+        .iov_base   = out_buf,
+        .iov_len    = out_len,
+    };
+    qemu_iovec_init_external(&hd_qiov, &iov, 1);
+
+    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
+    ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
+    if (ret < 0) {
+        goto fail;
+    }
+success:
     ret = 0;
 fail:
+    qemu_vfree(buf);
     g_free(out_buf);
     return ret;
 }
@@ -3412,7 +3388,7 @@  BlockDriver bdrv_qcow2 = {
     .bdrv_co_pwrite_zeroes  = qcow2_co_pwrite_zeroes,
     .bdrv_co_pdiscard       = qcow2_co_pdiscard,
     .bdrv_truncate          = qcow2_truncate,
-    .bdrv_write_compressed  = qcow2_write_compressed,
+    .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
     .bdrv_make_empty        = qcow2_make_empty,
 
     .bdrv_snapshot_create   = qcow2_snapshot_create,