diff mbox

[v3] migration/block: use blk_pwrite_zeroes for each zero cluster

Message ID 1491741460-10308-1-git-send-email-lidongchen@tencent.com (mailing list archive)
State New, archived
Headers show

Commit Message

858585 jemmy April 9, 2017, 12:37 p.m. UTC
From: Lidong Chen <lidongchen@tencent.com>

BLOCK_SIZE is (1 << 20), qcow2 cluster size is 65536 by default,
this maybe cause the qcow2 file size is bigger after migration.
This patch check each cluster, use blk_pwrite_zeroes for each
zero cluster.

Signed-off-by: Lidong Chen <lidongchen@tencent.com>
---
 migration/block.c | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

Comments

Fam Zheng April 10, 2017, 12:51 a.m. UTC | #1
On Sun, 04/09 20:37, jemmy858585@gmail.com wrote:
> From: Lidong Chen <lidongchen@tencent.com>
> 
> BLOCK_SIZE is (1 << 20), qcow2 cluster size is 65536 by default,
> this maybe cause the qcow2 file size is bigger after migration.
> This patch check each cluster, use blk_pwrite_zeroes for each
> zero cluster.
> 
> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
> ---
>  migration/block.c | 38 ++++++++++++++++++++++++++++++++++++--
>  1 file changed, 36 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/block.c b/migration/block.c
> index 7734ff7..fe613db 100644
> --- a/migration/block.c
> +++ b/migration/block.c
> @@ -885,6 +885,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>      int64_t total_sectors = 0;
>      int nr_sectors;
>      int ret;
> +    BlockDriverInfo bdi;
> +    int cluster_size;
>  
>      do {
>          addr = qemu_get_be64(f);
> @@ -934,8 +936,40 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>              } else {
>                  buf = g_malloc(BLOCK_SIZE);
>                  qemu_get_buffer(f, buf, BLOCK_SIZE);
> -                ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
> -                                 nr_sectors * BDRV_SECTOR_SIZE, 0);
> +
> +                ret = bdrv_get_info(blk_bs(blk), &bdi);
> +                cluster_size = bdi.cluster_size;
> +
> +                if (ret == 0 && cluster_size > 0 &&
> +                    cluster_size <= BLOCK_SIZE &&
> +                    BLOCK_SIZE % cluster_size == 0) {
> +                    int i;
> +                    int64_t cur_addr;
> +                    uint8_t *cur_buf;
> +
> +                    for (i = 0; i < BLOCK_SIZE / cluster_size; i++) {
> +                        cur_addr = addr * BDRV_SECTOR_SIZE
> +                                        + i * cluster_size;
> +                        cur_buf = buf + i * cluster_size;
> +
> +                        if (buffer_is_zero(cur_buf, cluster_size)) {
> +                            ret = blk_pwrite_zeroes(blk, cur_addr,
> +                                                    cluster_size,
> +                                                    BDRV_REQ_MAY_UNMAP);
> +                        } else {
> +                             ret = blk_pwrite(blk, cur_addr, cur_buf,
> +                                              cluster_size, 0);
> +                        }
> +
> +                        if (ret < 0) {
> +                            g_free(buf);
> +                            return ret;
> +                        }

This if block is not necessary because...

> +                    }
> +                } else {
> +                    ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
> +                                     nr_sectors * BDRV_SECTOR_SIZE, 0);
> +                }
>                  g_free(buf);
...


            if (ret < 0) {
                return ret;
            }
>              }
>  
> -- 
> 1.8.3.1
> 
> 

If you remove that:

Reviewed-by: Fam Zheng <famz@redhat.com>
858585 jemmy April 10, 2017, 1:10 a.m. UTC | #2
On Mon, Apr 10, 2017 at 8:51 AM, Fam Zheng <famz@redhat.com> wrote:
> On Sun, 04/09 20:37, jemmy858585@gmail.com wrote:
>> From: Lidong Chen <lidongchen@tencent.com>
>>
>> BLOCK_SIZE is (1 << 20), qcow2 cluster size is 65536 by default,
>> this maybe cause the qcow2 file size is bigger after migration.
>> This patch check each cluster, use blk_pwrite_zeroes for each
>> zero cluster.
>>
>> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
>> ---
>>  migration/block.c | 38 ++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 36 insertions(+), 2 deletions(-)
>>
>> diff --git a/migration/block.c b/migration/block.c
>> index 7734ff7..fe613db 100644
>> --- a/migration/block.c
>> +++ b/migration/block.c
>> @@ -885,6 +885,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>>      int64_t total_sectors = 0;
>>      int nr_sectors;
>>      int ret;
>> +    BlockDriverInfo bdi;
>> +    int cluster_size;
>>
>>      do {
>>          addr = qemu_get_be64(f);
>> @@ -934,8 +936,40 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>>              } else {
>>                  buf = g_malloc(BLOCK_SIZE);
>>                  qemu_get_buffer(f, buf, BLOCK_SIZE);
>> -                ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
>> -                                 nr_sectors * BDRV_SECTOR_SIZE, 0);
>> +
>> +                ret = bdrv_get_info(blk_bs(blk), &bdi);
>> +                cluster_size = bdi.cluster_size;
>> +
>> +                if (ret == 0 && cluster_size > 0 &&
>> +                    cluster_size <= BLOCK_SIZE &&
>> +                    BLOCK_SIZE % cluster_size == 0) {
>> +                    int i;
>> +                    int64_t cur_addr;
>> +                    uint8_t *cur_buf;
>> +
>> +                    for (i = 0; i < BLOCK_SIZE / cluster_size; i++) {
>> +                        cur_addr = addr * BDRV_SECTOR_SIZE
>> +                                        + i * cluster_size;
>> +                        cur_buf = buf + i * cluster_size;
>> +
>> +                        if (buffer_is_zero(cur_buf, cluster_size)) {
>> +                            ret = blk_pwrite_zeroes(blk, cur_addr,
>> +                                                    cluster_size,
>> +                                                    BDRV_REQ_MAY_UNMAP);
>> +                        } else {
>> +                             ret = blk_pwrite(blk, cur_addr, cur_buf,
>> +                                              cluster_size, 0);
>> +                        }
>> +
>> +                        if (ret < 0) {
>> +                            g_free(buf);
>> +                            return ret;
>> +                        }
>
> This if block is not necessary because...

Hi Fam:
      It's necessary to check each cluster is written successfully.
      if we remove this if block, it maybe ignore some error, and only check
      the last cluster.
      Thanks.

>
>> +                    }
>> +                } else {
>> +                    ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
>> +                                     nr_sectors * BDRV_SECTOR_SIZE, 0);
>> +                }
>>                  g_free(buf);
> ...
>
>
>             if (ret < 0) {
>                 return ret;
>             }
>>              }
>>
>> --
>> 1.8.3.1
>>
>>
>
> If you remove that:
>
> Reviewed-by: Fam Zheng <famz@redhat.com>
Fam Zheng April 10, 2017, 1:44 a.m. UTC | #3
On Mon, 04/10 09:10, 858585 jemmy wrote:
> >> +                        if (ret < 0) {
> >> +                            g_free(buf);
> >> +                            return ret;
> >> +                        }
> >
> > This if block is not necessary because...
> 
> Hi Fam:
>       It's necessary to check each cluster is written successfully.
>       if we remove this if block, it maybe ignore some error, and only check
>       the last cluster.
>       Thanks.

Yes, I missed the fact it is in the loop body. My bad. Thanks for pointint out:

Reviewed-by: Fam Zheng <famz@redhat.com>
Stefan Hajnoczi April 10, 2017, 4 p.m. UTC | #4
On Sun, Apr 09, 2017 at 08:37:40PM +0800, jemmy858585@gmail.com wrote:
> From: Lidong Chen <lidongchen@tencent.com>
> 
> BLOCK_SIZE is (1 << 20), qcow2 cluster size is 65536 by default,
> this maybe cause the qcow2 file size is bigger after migration.
> This patch check each cluster, use blk_pwrite_zeroes for each
> zero cluster.
> 
> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
> ---
>  migration/block.c | 38 ++++++++++++++++++++++++++++++++++++--
>  1 file changed, 36 insertions(+), 2 deletions(-)
> 
> diff --git a/migration/block.c b/migration/block.c
> index 7734ff7..fe613db 100644
> --- a/migration/block.c
> +++ b/migration/block.c
> @@ -885,6 +885,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>      int64_t total_sectors = 0;
>      int nr_sectors;
>      int ret;
> +    BlockDriverInfo bdi;
> +    int cluster_size;
>  
>      do {
>          addr = qemu_get_be64(f);
> @@ -934,8 +936,40 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>              } else {
>                  buf = g_malloc(BLOCK_SIZE);
>                  qemu_get_buffer(f, buf, BLOCK_SIZE);
> -                ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
> -                                 nr_sectors * BDRV_SECTOR_SIZE, 0);
> +
> +                ret = bdrv_get_info(blk_bs(blk), &bdi);
> +                cluster_size = bdi.cluster_size;
> +
> +                if (ret == 0 && cluster_size > 0 &&
> +                    cluster_size <= BLOCK_SIZE &&
> +                    BLOCK_SIZE % cluster_size == 0) {

How about:

  if (blk != blk_prev) {
      blk_prev = blk;
      total_sectors = blk_nb_sectors(blk);
      if (total_sectors <= 0) {
          error_report("Error getting length of block device %s",
                       device_name);
          return -EINVAL;
      }

      blk_invalidate_cache(blk, &local_err);
      if (local_err) {
          error_report_err(local_err);
          return -EINVAL;
      }

+     ret = bdrv_get_info(blk_bs(blk), &bdi);
+     if (ret == 0 && cluster_size > 0 && cluster_size <= BLOCK_SIZE &&
+         BLOCK_SIZE % cluster_size == 0) {
+         zero_cluster_size = bdi.cluster_size;
+     } else {
+         zero_cluster_size = 0;
+     }
  }

That way we only fetch the cluster size once per device.

When processing a block we do without repeatedly fetching the cluster
size:

  if (zero_cluster_size) {
     ...detect zeroes...
  }

> +                    int i;
> +                    int64_t cur_addr;
> +                    uint8_t *cur_buf;
> +
> +                    for (i = 0; i < BLOCK_SIZE / cluster_size; i++) {
> +                        cur_addr = addr * BDRV_SECTOR_SIZE
> +                                        + i * cluster_size;
> +                        cur_buf = buf + i * cluster_size;
> +
> +                        if (buffer_is_zero(cur_buf, cluster_size)) {
> +                            ret = blk_pwrite_zeroes(blk, cur_addr,
> +                                                    cluster_size,
> +                                                    BDRV_REQ_MAY_UNMAP);
> +                        } else {
> +                             ret = blk_pwrite(blk, cur_addr, cur_buf,
> +                                              cluster_size, 0);

Indentation is off here.

> +                        }
> +
> +                        if (ret < 0) {
> +                            g_free(buf);
> +                            return ret;
> +                        }
> +                    }
> +                } else {
> +                    ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
> +                                     nr_sectors * BDRV_SECTOR_SIZE, 0);
> +                }
>                  g_free(buf);
>              }
>  
> -- 
> 1.8.3.1
>
858585 jemmy April 11, 2017, 1:19 a.m. UTC | #5
On Tue, Apr 11, 2017 at 12:00 AM, Stefan Hajnoczi <stefanha@redhat.com> wrote:
> On Sun, Apr 09, 2017 at 08:37:40PM +0800, jemmy858585@gmail.com wrote:
>> From: Lidong Chen <lidongchen@tencent.com>
>>
>> BLOCK_SIZE is (1 << 20), qcow2 cluster size is 65536 by default,
>> this maybe cause the qcow2 file size is bigger after migration.
>> This patch check each cluster, use blk_pwrite_zeroes for each
>> zero cluster.
>>
>> Signed-off-by: Lidong Chen <lidongchen@tencent.com>
>> ---
>>  migration/block.c | 38 ++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 36 insertions(+), 2 deletions(-)
>>
>> diff --git a/migration/block.c b/migration/block.c
>> index 7734ff7..fe613db 100644
>> --- a/migration/block.c
>> +++ b/migration/block.c
>> @@ -885,6 +885,8 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>>      int64_t total_sectors = 0;
>>      int nr_sectors;
>>      int ret;
>> +    BlockDriverInfo bdi;
>> +    int cluster_size;
>>
>>      do {
>>          addr = qemu_get_be64(f);
>> @@ -934,8 +936,40 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
>>              } else {
>>                  buf = g_malloc(BLOCK_SIZE);
>>                  qemu_get_buffer(f, buf, BLOCK_SIZE);
>> -                ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
>> -                                 nr_sectors * BDRV_SECTOR_SIZE, 0);
>> +
>> +                ret = bdrv_get_info(blk_bs(blk), &bdi);
>> +                cluster_size = bdi.cluster_size;
>> +
>> +                if (ret == 0 && cluster_size > 0 &&
>> +                    cluster_size <= BLOCK_SIZE &&
>> +                    BLOCK_SIZE % cluster_size == 0) {
>
> How about:
>
>   if (blk != blk_prev) {
>       blk_prev = blk;
>       total_sectors = blk_nb_sectors(blk);
>       if (total_sectors <= 0) {
>           error_report("Error getting length of block device %s",
>                        device_name);
>           return -EINVAL;
>       }
>
>       blk_invalidate_cache(blk, &local_err);
>       if (local_err) {
>           error_report_err(local_err);
>           return -EINVAL;
>       }
>
> +     ret = bdrv_get_info(blk_bs(blk), &bdi);
> +     if (ret == 0 && cluster_size > 0 && cluster_size <= BLOCK_SIZE &&
> +         BLOCK_SIZE % cluster_size == 0) {
> +         zero_cluster_size = bdi.cluster_size;
> +     } else {
> +         zero_cluster_size = 0;
> +     }
>   }
>
> That way we only fetch the cluster size once per device.
>
> When processing a block we do without repeatedly fetching the cluster
> size:
>
>   if (zero_cluster_size) {
>      ...detect zeroes...
>   }

good idea, i will test again for this patch.

>
>> +                    int i;
>> +                    int64_t cur_addr;
>> +                    uint8_t *cur_buf;
>> +
>> +                    for (i = 0; i < BLOCK_SIZE / cluster_size; i++) {
>> +                        cur_addr = addr * BDRV_SECTOR_SIZE
>> +                                        + i * cluster_size;
>> +                        cur_buf = buf + i * cluster_size;
>> +
>> +                        if (buffer_is_zero(cur_buf, cluster_size)) {
>> +                            ret = blk_pwrite_zeroes(blk, cur_addr,
>> +                                                    cluster_size,
>> +                                                    BDRV_REQ_MAY_UNMAP);
>> +                        } else {
>> +                             ret = blk_pwrite(blk, cur_addr, cur_buf,
>> +                                              cluster_size, 0);
>
> Indentation is off here.
>
>> +                        }
>> +
>> +                        if (ret < 0) {
>> +                            g_free(buf);
>> +                            return ret;
>> +                        }
>> +                    }
>> +                } else {
>> +                    ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
>> +                                     nr_sectors * BDRV_SECTOR_SIZE, 0);
>> +                }
>>                  g_free(buf);
>>              }
>>
>> --
>> 1.8.3.1
>>
diff mbox

Patch

diff --git a/migration/block.c b/migration/block.c
index 7734ff7..fe613db 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -885,6 +885,8 @@  static int block_load(QEMUFile *f, void *opaque, int version_id)
     int64_t total_sectors = 0;
     int nr_sectors;
     int ret;
+    BlockDriverInfo bdi;
+    int cluster_size;
 
     do {
         addr = qemu_get_be64(f);
@@ -934,8 +936,40 @@  static int block_load(QEMUFile *f, void *opaque, int version_id)
             } else {
                 buf = g_malloc(BLOCK_SIZE);
                 qemu_get_buffer(f, buf, BLOCK_SIZE);
-                ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
-                                 nr_sectors * BDRV_SECTOR_SIZE, 0);
+
+                ret = bdrv_get_info(blk_bs(blk), &bdi);
+                cluster_size = bdi.cluster_size;
+
+                if (ret == 0 && cluster_size > 0 &&
+                    cluster_size <= BLOCK_SIZE &&
+                    BLOCK_SIZE % cluster_size == 0) {
+                    int i;
+                    int64_t cur_addr;
+                    uint8_t *cur_buf;
+
+                    for (i = 0; i < BLOCK_SIZE / cluster_size; i++) {
+                        cur_addr = addr * BDRV_SECTOR_SIZE
+                                        + i * cluster_size;
+                        cur_buf = buf + i * cluster_size;
+
+                        if (buffer_is_zero(cur_buf, cluster_size)) {
+                            ret = blk_pwrite_zeroes(blk, cur_addr,
+                                                    cluster_size,
+                                                    BDRV_REQ_MAY_UNMAP);
+                        } else {
+                             ret = blk_pwrite(blk, cur_addr, cur_buf,
+                                              cluster_size, 0);
+                        }
+
+                        if (ret < 0) {
+                            g_free(buf);
+                            return ret;
+                        }
+                    }
+                } else {
+                    ret = blk_pwrite(blk, addr * BDRV_SECTOR_SIZE, buf,
+                                     nr_sectors * BDRV_SECTOR_SIZE, 0);
+                }
                 g_free(buf);
             }