diff mbox

[06/22] hbitmap: load/store

Message ID 1458072268-53705-7-git-send-email-vsementsov@virtuozzo.com (mailing list archive)
State New, archived
Headers show

Commit Message

Vladimir Sementsov-Ogievskiy March 15, 2016, 8:04 p.m. UTC
Add functions for load/store HBitmap to BDS, using clusters table:
Last level of the bitmap is splitted into chunks of 'cluster_size'
size. Each cell of the table contains offset in bds, to load/store
corresponding chunk.

Also,
    0 in cell means all-zeroes-chunk (should not be saved)
    1 in cell means all-ones-chunk (should not be saved)
    hbitmap_prepare_store() fills table with
      0 for all-zeroes chunks
      1 for all-ones chunks
      2 for others

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
---
 block/dirty-bitmap.c         |  23 +++++
 include/block/dirty-bitmap.h |  11 +++
 include/qemu/hbitmap.h       |  12 +++
 util/hbitmap.c               | 209 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 255 insertions(+)

Comments

John Snow March 21, 2016, 10:42 p.m. UTC | #1
On 03/15/2016 04:04 PM, Vladimir Sementsov-Ogievskiy wrote:
> Add functions for load/store HBitmap to BDS, using clusters table:
> Last level of the bitmap is splitted into chunks of 'cluster_size'
> size. Each cell of the table contains offset in bds, to load/store
> corresponding chunk.
> 
> Also,
>     0 in cell means all-zeroes-chunk (should not be saved)
>     1 in cell means all-ones-chunk (should not be saved)
>     hbitmap_prepare_store() fills table with
>       0 for all-zeroes chunks
>       1 for all-ones chunks
>       2 for others
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>  block/dirty-bitmap.c         |  23 +++++
>  include/block/dirty-bitmap.h |  11 +++
>  include/qemu/hbitmap.h       |  12 +++
>  util/hbitmap.c               | 209 +++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 255 insertions(+)
> 
> diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
> index e68c177..816c6ee 100644
> --- a/block/dirty-bitmap.c
> +++ b/block/dirty-bitmap.c
> @@ -396,3 +396,26 @@ int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
>  {
>      return hbitmap_count(bitmap->bitmap);
>  }
> +
> +int bdrv_dirty_bitmap_load(BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
> +                           const uint64_t *table, uint32_t table_size,
> +                           uint32_t cluster_size)
> +{
> +    return hbitmap_load(bitmap->bitmap, bs, table, table_size, cluster_size);
> +}
> +
> +int bdrv_dirty_bitmap_prepare_store(const BdrvDirtyBitmap *bitmap,
> +                                    uint32_t cluster_size,
> +                                    uint64_t *table,
> +                                    uint32_t *table_size)
> +{
> +    return hbitmap_prepare_store(bitmap->bitmap, cluster_size,
> +                                 table, table_size);
> +}
> +
> +int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
> +                            const uint64_t *table, uint32_t table_size,
> +                            uint32_t cluster_size)
> +{
> +    return hbitmap_store(bitmap->bitmap, bs, table, table_size, cluster_size);
> +}
> diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
> index 27515af..20cb540 100644
> --- a/include/block/dirty-bitmap.h
> +++ b/include/block/dirty-bitmap.h
> @@ -43,4 +43,15 @@ void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
>  int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
>  void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
>  
> +int bdrv_dirty_bitmap_load(BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
> +                           const uint64_t *table, uint32_t table_size,
> +                           uint32_t cluster_size);
> +int bdrv_dirty_bitmap_prepare_store(const BdrvDirtyBitmap *bitmap,
> +                                    uint32_t cluster_size,
> +                                    uint64_t *table,
> +                                    uint32_t *table_size);
> +int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
> +                            const uint64_t *table, uint32_t table_size,
> +                            uint32_t cluster_size);
> +
>  #endif
> diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
> index 6d1da4d..d83bb79 100644
> --- a/include/qemu/hbitmap.h
> +++ b/include/qemu/hbitmap.h
> @@ -241,5 +241,17 @@ static inline size_t hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_c
>      return hbi->pos;
>  }
>  
> +typedef struct BlockDriverState BlockDriverState;
> +
> +int hbitmap_load(HBitmap *bitmap, BlockDriverState *bs,
> +                 const uint64_t *table, uint32_t table_size,
> +                 uint32_t cluster_size);
> +
> +int hbitmap_prepare_store(const HBitmap *bitmap, uint32_t cluster_size,
> +                          uint64_t *table, uint32_t *table_size);
> +
> +int hbitmap_store(HBitmap *bitmap, BlockDriverState *bs,
> +                  const uint64_t *table, uint32_t table_size,
> +                  uint32_t cluster_size);
>  
>  #endif
> diff --git a/util/hbitmap.c b/util/hbitmap.c
> index 28595fb..1960e4f 100644
> --- a/util/hbitmap.c
> +++ b/util/hbitmap.c
> @@ -15,6 +15,8 @@
>  #include "qemu/host-utils.h"
>  #include "trace.h"
>  
> +#include "block/block.h"
> +

This is a bit of a red flag -- we shouldn't need block layer specifics
in the subcomponent-agnostic HBitmap utility.

Further, by relying on these facilities here in hbitmap.c, "make check"
no longer can compile the relevant hbitmap tests.

Make sure that each intermediate commit here passes these necessary
tests, test-hbitmap in particular for each, and a "make check" overall
at the end of your series.

--js

>  /* HBitmaps provides an array of bits.  The bits are stored as usual in an
>   * array of unsigned longs, but HBitmap is also optimized to provide fast
>   * iteration over set bits; going from one bit to the next is O(logB n)
> @@ -499,3 +501,210 @@ char *hbitmap_md5(const HBitmap *bitmap)
>      const guchar *data = (const guchar *)bitmap->levels[HBITMAP_LEVELS - 1];
>      return g_compute_checksum_for_data(G_CHECKSUM_MD5, data, size);
>  }
> +
> +/* hb_restore_levels()
> + * Using the last level restore all other levels
> + */
> +static void hb_restore_levels(HBitmap *bitmap)
> +{
> +    int64_t i, size, prev_size;
> +    int lev;
> +
> +    /* restore levels starting from penultimate to zero level, assuming
> +     * that the last level is ok */
> +    size = MAX((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
> +    for (lev = HBITMAP_LEVELS - 1; lev-- > 0; ) {
> +        prev_size = size;
> +        size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
> +        memset(bitmap->levels[lev], 0, size * sizeof(unsigned long));
> +
> +        for (i = 0; i < prev_size; ++i) {
> +            if (bitmap->levels[lev + 1][i]) {
> +                bitmap->levels[lev][i >> BITS_PER_LEVEL] |=
> +                    1UL << (i & (BITS_PER_LONG - 1));
> +            }
> +        }
> +    }
> +
> +    bitmap->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
> +}
> +
> +/* load_bitmap()
> + * Load dirty bitmap from file, using table with cluster offsets.
> + * Table entries are assumed to be in little endian format.
> + */
> +int hbitmap_load(HBitmap *bitmap, BlockDriverState *bs,
> +                 const uint64_t *table, uint32_t table_size,
> +                 uint32_t cluster_size)
> +{
> +    uint32_t i;
> +    uint8_t *cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
> +    uint8_t *end = cur + ((bitmap->size + 7) >> 3);
> +
> +    hbitmap_reset_all(bitmap);
> +
> +    for (i = 0; i < table_size && cur < end; ++i) {
> +        uint64_t offset = table[i];
> +        uint64_t count = MIN(cluster_size, end - cur);
> +
> +        /* Zero offset means zero region, offset = 1 means filled region.
> +         * Cluster is not allocated in both cases. */
> +        if (offset == 1) {
> +            memset(cur, 0xff, count);
> +        } else if (offset) {
> +            int ret = bdrv_pread(bs, offset, cur, count);
> +            if (ret < 0) {
> +                return ret;
> +            }
> +        }
> +
> +        cur += cluster_size;
> +    }
> +
> +    cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
> +    while (cur < end) {
> +        if (BITS_PER_LONG == 32) {
> +            le32_to_cpus((uint32_t *)cur);
> +        } else {
> +            le64_to_cpus((uint64_t *)cur);
> +        }
> +
> +        cur += sizeof(unsigned long);
> +    }
> +
> +    hb_restore_levels(bitmap);
> +
> +    return 0;
> +}
> +
> +static bool buffer_is_all_ones(void *buf, size_t len)
> +{
> +    /* FIXME */
> +    return false;
> +}
> +
> +/* hbitmap_prepare_store()
> + * Devide bitmap data into clusters, and then,
> + * for zero cluster: table[i] = 0
> + * for all-ones cluster: table[i] = 1
> + * for other clusters: table[i] = 2
> + */
> +int hbitmap_prepare_store(const HBitmap *bitmap,
> +                          uint32_t cluster_size,
> +                          uint64_t *table,
> +                          uint32_t *table_size)
> +{
> +    HBitmapIter hbi;
> +    hbitmap_iter_init(&hbi, bitmap, 0);
> +    uint64_t nb_bits_in_cl = (uint64_t)cluster_size << 3;
> +    uint32_t need_table_size =
> +            (bitmap->size + nb_bits_in_cl - 1) / nb_bits_in_cl;
> +
> +    if (table == NULL && *table_size == 0) {
> +        *table_size = need_table_size;
> +        return 0;
> +    }
> +
> +    if (*table_size < need_table_size) {
> +        return -ENOMEM;
> +    }
> +
> +    memset(table, 0, *table_size * sizeof(table[0]));
> +
> +    for (;;) {
> +        unsigned long cur;
> +        size_t pos = hbitmap_iter_next_word(&hbi, &cur);
> +        size_t byte = pos * sizeof(unsigned long);
> +        uint64_t bit = byte << 3;
> +        uint64_t nbits = MIN(cluster_size << 3, bitmap->size - bit), next_bit;
> +        size_t i = byte / cluster_size;
> +
> +        if (pos == -1) {
> +            break;
> +        }
> +
> +        if (pos % cluster_size != 0) {
> +            table[i] = 2;
> +        } else if (buffer_is_all_ones(&bitmap->levels[HBITMAP_LEVELS - 1][pos],
> +                               nbits >> 3)) {
> +            table[i] = 1;
> +            if (nbits & 7) {
> +                uint8_t last_byte =
> +                        *(((uint8_t *)&bitmap->levels[HBITMAP_LEVELS - 1][pos])
> +                                + (nbits >> 3));
> +                if (last_byte != ((1 << (nbits & 7)) - 1)) {
> +                    table[i] = 2;
> +                }
> +            }
> +        } else {
> +            table[i] = 2;
> +        }
> +
> +        next_bit = (i + 1) * cluster_size << 3;
> +
> +        if (next_bit >= bitmap->size) {
> +            break;
> +        }
> +
> +        hbitmap_iter_init(&hbi, bitmap, next_bit << bitmap->granularity);
> +    }
> +
> +    return 0;
> +}
> +
> +static void longs_to_le(unsigned long *longs, size_t count)
> +{
> +    unsigned long *end = longs + count;
> +    while (longs < end) {
> +        if (BITS_PER_LONG == 32) {
> +            cpu_to_le32s((uint32_t *)longs);
> +        } else {
> +            cpu_to_le64s((uint64_t *)longs);
> +        }
> +
> +        longs++;
> +    }
> +}
> +
> +/* store_bitmap()
> + * update bitmap table by storing bitmap to it.
> + * bitmap table entries are assumed to be in big endian format
> + * On the error, the resulting bitmap table is valid for clearing, but
> + * may contain invalid bitmap */
> +int hbitmap_store(HBitmap *bitmap, BlockDriverState *bs,
> +                  const uint64_t *table, uint32_t table_size,
> +                  uint32_t cluster_size)
> +{
> +    int i;
> +    uint8_t *cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
> +    uint8_t *end = cur +
> +        ((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL) * (sizeof(long));
> +
> +    for (i = 0; i < table_size && cur < end; ++i) {
> +        uint64_t offset = table[i];
> +        uint64_t count = MIN(cluster_size, end - cur);
> +
> +        /* Zero offset means zero region, offset = 1 means filled region.
> +         * Cluster is not allocated in both cases. */
> +        if (offset > 1) {
> +            int ret;
> +            if (cpu_to_le16(1) == 1) {
> +                ret = bdrv_pwrite(bs, offset, cur, count);
> +            } else {
> +                void *tmp = g_memdup(cur, count);
> +                longs_to_le((unsigned long *)cur,
> +                            count / sizeof(unsigned long));
> +                ret = bdrv_pwrite(bs, offset, tmp, count);
> +                g_free(tmp);
> +            }
> +
> +            if (ret < 0) {
> +                return ret;
> +            }
> +        }
> +
> +        cur += cluster_size;
> +    }
> +
> +    return 0;
> +}
>
Vladimir Sementsov-Ogievskiy March 22, 2016, 10:47 a.m. UTC | #2
On 22.03.2016 01:42, John Snow wrote:
>
> On 03/15/2016 04:04 PM, Vladimir Sementsov-Ogievskiy wrote:
>> Add functions for load/store HBitmap to BDS, using clusters table:
>> Last level of the bitmap is splitted into chunks of 'cluster_size'
>> size. Each cell of the table contains offset in bds, to load/store
>> corresponding chunk.
>>
>> Also,
>>      0 in cell means all-zeroes-chunk (should not be saved)
>>      1 in cell means all-ones-chunk (should not be saved)
>>      hbitmap_prepare_store() fills table with
>>        0 for all-zeroes chunks
>>        1 for all-ones chunks
>>        2 for others
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>>   block/dirty-bitmap.c         |  23 +++++
>>   include/block/dirty-bitmap.h |  11 +++
>>   include/qemu/hbitmap.h       |  12 +++
>>   util/hbitmap.c               | 209 +++++++++++++++++++++++++++++++++++++++++++
>>   4 files changed, 255 insertions(+)
>>
>> diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
>> index e68c177..816c6ee 100644
>> --- a/block/dirty-bitmap.c
>> +++ b/block/dirty-bitmap.c
>> @@ -396,3 +396,26 @@ int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
>>   {
>>       return hbitmap_count(bitmap->bitmap);
>>   }
>> +
>> +int bdrv_dirty_bitmap_load(BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
>> +                           const uint64_t *table, uint32_t table_size,
>> +                           uint32_t cluster_size)
>> +{
>> +    return hbitmap_load(bitmap->bitmap, bs, table, table_size, cluster_size);
>> +}
>> +
>> +int bdrv_dirty_bitmap_prepare_store(const BdrvDirtyBitmap *bitmap,
>> +                                    uint32_t cluster_size,
>> +                                    uint64_t *table,
>> +                                    uint32_t *table_size)
>> +{
>> +    return hbitmap_prepare_store(bitmap->bitmap, cluster_size,
>> +                                 table, table_size);
>> +}
>> +
>> +int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
>> +                            const uint64_t *table, uint32_t table_size,
>> +                            uint32_t cluster_size)
>> +{
>> +    return hbitmap_store(bitmap->bitmap, bs, table, table_size, cluster_size);
>> +}
>> diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
>> index 27515af..20cb540 100644
>> --- a/include/block/dirty-bitmap.h
>> +++ b/include/block/dirty-bitmap.h
>> @@ -43,4 +43,15 @@ void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
>>   int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
>>   void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
>>   
>> +int bdrv_dirty_bitmap_load(BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
>> +                           const uint64_t *table, uint32_t table_size,
>> +                           uint32_t cluster_size);
>> +int bdrv_dirty_bitmap_prepare_store(const BdrvDirtyBitmap *bitmap,
>> +                                    uint32_t cluster_size,
>> +                                    uint64_t *table,
>> +                                    uint32_t *table_size);
>> +int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
>> +                            const uint64_t *table, uint32_t table_size,
>> +                            uint32_t cluster_size);
>> +
>>   #endif
>> diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
>> index 6d1da4d..d83bb79 100644
>> --- a/include/qemu/hbitmap.h
>> +++ b/include/qemu/hbitmap.h
>> @@ -241,5 +241,17 @@ static inline size_t hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_c
>>       return hbi->pos;
>>   }
>>   
>> +typedef struct BlockDriverState BlockDriverState;
>> +
>> +int hbitmap_load(HBitmap *bitmap, BlockDriverState *bs,
>> +                 const uint64_t *table, uint32_t table_size,
>> +                 uint32_t cluster_size);
>> +
>> +int hbitmap_prepare_store(const HBitmap *bitmap, uint32_t cluster_size,
>> +                          uint64_t *table, uint32_t *table_size);
>> +
>> +int hbitmap_store(HBitmap *bitmap, BlockDriverState *bs,
>> +                  const uint64_t *table, uint32_t table_size,
>> +                  uint32_t cluster_size);
>>   
>>   #endif
>> diff --git a/util/hbitmap.c b/util/hbitmap.c
>> index 28595fb..1960e4f 100644
>> --- a/util/hbitmap.c
>> +++ b/util/hbitmap.c
>> @@ -15,6 +15,8 @@
>>   #include "qemu/host-utils.h"
>>   #include "trace.h"
>>   
>> +#include "block/block.h"
>> +
> This is a bit of a red flag -- we shouldn't need block layer specifics
> in the subcomponent-agnostic HBitmap utility.
>
> Further, by relying on these facilities here in hbitmap.c, "make check"
> no longer can compile the relevant hbitmap tests.

Thanks. Locally I've already fixed it (add entity into tests Makefile). 
But in fact, it may be better to move these functions into 
block/dirty_bitmap.c or to new file like block/hbitmap.c.

>
> Make sure that each intermediate commit here passes these necessary
> tests, test-hbitmap in particular for each, and a "make check" overall
> at the end of your series.
>
> --js
>
>>   /* HBitmaps provides an array of bits.  The bits are stored as usual in an
>>    * array of unsigned longs, but HBitmap is also optimized to provide fast
>>    * iteration over set bits; going from one bit to the next is O(logB n)
>> @@ -499,3 +501,210 @@ char *hbitmap_md5(const HBitmap *bitmap)
>>       const guchar *data = (const guchar *)bitmap->levels[HBITMAP_LEVELS - 1];
>>       return g_compute_checksum_for_data(G_CHECKSUM_MD5, data, size);
>>   }
>> +
>> +/* hb_restore_levels()
>> + * Using the last level restore all other levels
>> + */
>> +static void hb_restore_levels(HBitmap *bitmap)
>> +{
>> +    int64_t i, size, prev_size;
>> +    int lev;
>> +
>> +    /* restore levels starting from penultimate to zero level, assuming
>> +     * that the last level is ok */
>> +    size = MAX((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
>> +    for (lev = HBITMAP_LEVELS - 1; lev-- > 0; ) {
>> +        prev_size = size;
>> +        size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
>> +        memset(bitmap->levels[lev], 0, size * sizeof(unsigned long));
>> +
>> +        for (i = 0; i < prev_size; ++i) {
>> +            if (bitmap->levels[lev + 1][i]) {
>> +                bitmap->levels[lev][i >> BITS_PER_LEVEL] |=
>> +                    1UL << (i & (BITS_PER_LONG - 1));
>> +            }
>> +        }
>> +    }
>> +
>> +    bitmap->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
>> +}
>> +
>> +/* load_bitmap()
>> + * Load dirty bitmap from file, using table with cluster offsets.
>> + * Table entries are assumed to be in little endian format.
>> + */
>> +int hbitmap_load(HBitmap *bitmap, BlockDriverState *bs,
>> +                 const uint64_t *table, uint32_t table_size,
>> +                 uint32_t cluster_size)
>> +{
>> +    uint32_t i;
>> +    uint8_t *cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
>> +    uint8_t *end = cur + ((bitmap->size + 7) >> 3);
>> +
>> +    hbitmap_reset_all(bitmap);
>> +
>> +    for (i = 0; i < table_size && cur < end; ++i) {
>> +        uint64_t offset = table[i];
>> +        uint64_t count = MIN(cluster_size, end - cur);
>> +
>> +        /* Zero offset means zero region, offset = 1 means filled region.
>> +         * Cluster is not allocated in both cases. */
>> +        if (offset == 1) {
>> +            memset(cur, 0xff, count);
>> +        } else if (offset) {
>> +            int ret = bdrv_pread(bs, offset, cur, count);
>> +            if (ret < 0) {
>> +                return ret;
>> +            }
>> +        }
>> +
>> +        cur += cluster_size;
>> +    }
>> +
>> +    cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
>> +    while (cur < end) {
>> +        if (BITS_PER_LONG == 32) {
>> +            le32_to_cpus((uint32_t *)cur);
>> +        } else {
>> +            le64_to_cpus((uint64_t *)cur);
>> +        }
>> +
>> +        cur += sizeof(unsigned long);
>> +    }
>> +
>> +    hb_restore_levels(bitmap);
>> +
>> +    return 0;
>> +}
>> +
>> +static bool buffer_is_all_ones(void *buf, size_t len)
>> +{
>> +    /* FIXME */
>> +    return false;
>> +}
>> +
>> +/* hbitmap_prepare_store()
>> + * Devide bitmap data into clusters, and then,
>> + * for zero cluster: table[i] = 0
>> + * for all-ones cluster: table[i] = 1
>> + * for other clusters: table[i] = 2
>> + */
>> +int hbitmap_prepare_store(const HBitmap *bitmap,
>> +                          uint32_t cluster_size,
>> +                          uint64_t *table,
>> +                          uint32_t *table_size)
>> +{
>> +    HBitmapIter hbi;
>> +    hbitmap_iter_init(&hbi, bitmap, 0);
>> +    uint64_t nb_bits_in_cl = (uint64_t)cluster_size << 3;
>> +    uint32_t need_table_size =
>> +            (bitmap->size + nb_bits_in_cl - 1) / nb_bits_in_cl;
>> +
>> +    if (table == NULL && *table_size == 0) {
>> +        *table_size = need_table_size;
>> +        return 0;
>> +    }
>> +
>> +    if (*table_size < need_table_size) {
>> +        return -ENOMEM;
>> +    }
>> +
>> +    memset(table, 0, *table_size * sizeof(table[0]));
>> +
>> +    for (;;) {
>> +        unsigned long cur;
>> +        size_t pos = hbitmap_iter_next_word(&hbi, &cur);
>> +        size_t byte = pos * sizeof(unsigned long);
>> +        uint64_t bit = byte << 3;
>> +        uint64_t nbits = MIN(cluster_size << 3, bitmap->size - bit), next_bit;
>> +        size_t i = byte / cluster_size;
>> +
>> +        if (pos == -1) {
>> +            break;
>> +        }
>> +
>> +        if (pos % cluster_size != 0) {
>> +            table[i] = 2;
>> +        } else if (buffer_is_all_ones(&bitmap->levels[HBITMAP_LEVELS - 1][pos],
>> +                               nbits >> 3)) {
>> +            table[i] = 1;
>> +            if (nbits & 7) {
>> +                uint8_t last_byte =
>> +                        *(((uint8_t *)&bitmap->levels[HBITMAP_LEVELS - 1][pos])
>> +                                + (nbits >> 3));
>> +                if (last_byte != ((1 << (nbits & 7)) - 1)) {
>> +                    table[i] = 2;
>> +                }
>> +            }
>> +        } else {
>> +            table[i] = 2;
>> +        }
>> +
>> +        next_bit = (i + 1) * cluster_size << 3;
>> +
>> +        if (next_bit >= bitmap->size) {
>> +            break;
>> +        }
>> +
>> +        hbitmap_iter_init(&hbi, bitmap, next_bit << bitmap->granularity);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static void longs_to_le(unsigned long *longs, size_t count)
>> +{
>> +    unsigned long *end = longs + count;
>> +    while (longs < end) {
>> +        if (BITS_PER_LONG == 32) {
>> +            cpu_to_le32s((uint32_t *)longs);
>> +        } else {
>> +            cpu_to_le64s((uint64_t *)longs);
>> +        }
>> +
>> +        longs++;
>> +    }
>> +}
>> +
>> +/* store_bitmap()
>> + * update bitmap table by storing bitmap to it.
>> + * bitmap table entries are assumed to be in big endian format
>> + * On the error, the resulting bitmap table is valid for clearing, but
>> + * may contain invalid bitmap */
>> +int hbitmap_store(HBitmap *bitmap, BlockDriverState *bs,
>> +                  const uint64_t *table, uint32_t table_size,
>> +                  uint32_t cluster_size)
>> +{
>> +    int i;
>> +    uint8_t *cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
>> +    uint8_t *end = cur +
>> +        ((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL) * (sizeof(long));
>> +
>> +    for (i = 0; i < table_size && cur < end; ++i) {
>> +        uint64_t offset = table[i];
>> +        uint64_t count = MIN(cluster_size, end - cur);
>> +
>> +        /* Zero offset means zero region, offset = 1 means filled region.
>> +         * Cluster is not allocated in both cases. */
>> +        if (offset > 1) {
>> +            int ret;
>> +            if (cpu_to_le16(1) == 1) {
>> +                ret = bdrv_pwrite(bs, offset, cur, count);
>> +            } else {
>> +                void *tmp = g_memdup(cur, count);
>> +                longs_to_le((unsigned long *)cur,
>> +                            count / sizeof(unsigned long));
>> +                ret = bdrv_pwrite(bs, offset, tmp, count);
>> +                g_free(tmp);
>> +            }
>> +
>> +            if (ret < 0) {
>> +                return ret;
>> +            }
>> +        }
>> +
>> +        cur += cluster_size;
>> +    }
>> +
>> +    return 0;
>> +}
>>
John Snow March 22, 2016, 9:49 p.m. UTC | #3
On 03/15/2016 04:04 PM, Vladimir Sementsov-Ogievskiy wrote:
> Add functions for load/store HBitmap to BDS, using clusters table:
> Last level of the bitmap is splitted into chunks of 'cluster_size'
> size. Each cell of the table contains offset in bds, to load/store
> corresponding chunk.
> 
> Also,
>     0 in cell means all-zeroes-chunk (should not be saved)
>     1 in cell means all-ones-chunk (should not be saved)
>     hbitmap_prepare_store() fills table with
>       0 for all-zeroes chunks
>       1 for all-ones chunks
>       2 for others
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
> ---
>  block/dirty-bitmap.c         |  23 +++++
>  include/block/dirty-bitmap.h |  11 +++
>  include/qemu/hbitmap.h       |  12 +++
>  util/hbitmap.c               | 209 +++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 255 insertions(+)
> 
> diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
> index e68c177..816c6ee 100644
> --- a/block/dirty-bitmap.c
> +++ b/block/dirty-bitmap.c
> @@ -396,3 +396,26 @@ int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
>  {
>      return hbitmap_count(bitmap->bitmap);
>  }
> +
> +int bdrv_dirty_bitmap_load(BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
> +                           const uint64_t *table, uint32_t table_size,
> +                           uint32_t cluster_size)
> +{
> +    return hbitmap_load(bitmap->bitmap, bs, table, table_size, cluster_size);
> +}
> +
> +int bdrv_dirty_bitmap_prepare_store(const BdrvDirtyBitmap *bitmap,
> +                                    uint32_t cluster_size,
> +                                    uint64_t *table,
> +                                    uint32_t *table_size)
> +{
> +    return hbitmap_prepare_store(bitmap->bitmap, cluster_size,
> +                                 table, table_size);
> +}
> +
> +int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
> +                            const uint64_t *table, uint32_t table_size,
> +                            uint32_t cluster_size)
> +{
> +    return hbitmap_store(bitmap->bitmap, bs, table, table_size, cluster_size);
> +}
> diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
> index 27515af..20cb540 100644
> --- a/include/block/dirty-bitmap.h
> +++ b/include/block/dirty-bitmap.h
> @@ -43,4 +43,15 @@ void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
>  int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
>  void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
>  
> +int bdrv_dirty_bitmap_load(BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
> +                           const uint64_t *table, uint32_t table_size,
> +                           uint32_t cluster_size);
> +int bdrv_dirty_bitmap_prepare_store(const BdrvDirtyBitmap *bitmap,
> +                                    uint32_t cluster_size,
> +                                    uint64_t *table,
> +                                    uint32_t *table_size);
> +int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
> +                            const uint64_t *table, uint32_t table_size,
> +                            uint32_t cluster_size);
> +
>  #endif
> diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
> index 6d1da4d..d83bb79 100644
> --- a/include/qemu/hbitmap.h
> +++ b/include/qemu/hbitmap.h
> @@ -241,5 +241,17 @@ static inline size_t hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_c
>      return hbi->pos;
>  }
>  
> +typedef struct BlockDriverState BlockDriverState;
> +
> +int hbitmap_load(HBitmap *bitmap, BlockDriverState *bs,
> +                 const uint64_t *table, uint32_t table_size,
> +                 uint32_t cluster_size);
> +
> +int hbitmap_prepare_store(const HBitmap *bitmap, uint32_t cluster_size,
> +                          uint64_t *table, uint32_t *table_size);
> +
> +int hbitmap_store(HBitmap *bitmap, BlockDriverState *bs,
> +                  const uint64_t *table, uint32_t table_size,
> +                  uint32_t cluster_size);
>  
>  #endif
> diff --git a/util/hbitmap.c b/util/hbitmap.c
> index 28595fb..1960e4f 100644
> --- a/util/hbitmap.c
> +++ b/util/hbitmap.c
> @@ -15,6 +15,8 @@
>  #include "qemu/host-utils.h"
>  #include "trace.h"
>  
> +#include "block/block.h"
> +
>  /* HBitmaps provides an array of bits.  The bits are stored as usual in an
>   * array of unsigned longs, but HBitmap is also optimized to provide fast
>   * iteration over set bits; going from one bit to the next is O(logB n)
> @@ -499,3 +501,210 @@ char *hbitmap_md5(const HBitmap *bitmap)
>      const guchar *data = (const guchar *)bitmap->levels[HBITMAP_LEVELS - 1];
>      return g_compute_checksum_for_data(G_CHECKSUM_MD5, data, size);
>  }
> +
> +/* hb_restore_levels()
> + * Using the last level restore all other levels
> + */
> +static void hb_restore_levels(HBitmap *bitmap)
> +{
> +    int64_t i, size, prev_size;
> +    int lev;
> +
> +    /* restore levels starting from penultimate to zero level, assuming
> +     * that the last level is ok */
> +    size = MAX((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
> +    for (lev = HBITMAP_LEVELS - 1; lev-- > 0; ) {
> +        prev_size = size;
> +        size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
> +        memset(bitmap->levels[lev], 0, size * sizeof(unsigned long));
> +
> +        for (i = 0; i < prev_size; ++i) {
> +            if (bitmap->levels[lev + 1][i]) {
> +                bitmap->levels[lev][i >> BITS_PER_LEVEL] |=
> +                    1UL << (i & (BITS_PER_LONG - 1));
> +            }
> +        }
> +    }
> +
> +    bitmap->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
> +}
> +
> +/* load_bitmap()
> + * Load dirty bitmap from file, using table with cluster offsets.
> + * Table entries are assumed to be in little endian format.
> + */
> +int hbitmap_load(HBitmap *bitmap, BlockDriverState *bs,
> +                 const uint64_t *table, uint32_t table_size,
> +                 uint32_t cluster_size)
> +{
> +    uint32_t i;
> +    uint8_t *cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
> +    uint8_t *end = cur + ((bitmap->size + 7) >> 3);
> +
> +    hbitmap_reset_all(bitmap);
> +
> +    for (i = 0; i < table_size && cur < end; ++i) {
> +        uint64_t offset = table[i];
> +        uint64_t count = MIN(cluster_size, end - cur);
> +
> +        /* Zero offset means zero region, offset = 1 means filled region.
> +         * Cluster is not allocated in both cases. */
> +        if (offset == 1) {
> +            memset(cur, 0xff, count);
> +        } else if (offset) {
> +            int ret = bdrv_pread(bs, offset, cur, count);
> +            if (ret < 0) {
> +                return ret;
> +            }
> +        }
> +
> +        cur += cluster_size;
> +    }
> +
> +    cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
> +    while (cur < end) {
> +        if (BITS_PER_LONG == 32) {
> +            le32_to_cpus((uint32_t *)cur);
> +        } else {
> +            le64_to_cpus((uint64_t *)cur);
> +        }
> +
> +        cur += sizeof(unsigned long);
> +    }
> +
> +    hb_restore_levels(bitmap);
> +
> +    return 0;
> +}
> +
> +static bool buffer_is_all_ones(void *buf, size_t len)
> +{
> +    /* FIXME */
> +    return false;
> +}
> +
> +/* hbitmap_prepare_store()
> + * Devide bitmap data into clusters, and then,
> + * for zero cluster: table[i] = 0
> + * for all-ones cluster: table[i] = 1
> + * for other clusters: table[i] = 2
> + */
> +int hbitmap_prepare_store(const HBitmap *bitmap,
> +                          uint32_t cluster_size,
> +                          uint64_t *table,
> +                          uint32_t *table_size)
> +{
> +    HBitmapIter hbi;
> +    hbitmap_iter_init(&hbi, bitmap, 0);
> +    uint64_t nb_bits_in_cl = (uint64_t)cluster_size << 3;
> +    uint32_t need_table_size =
> +            (bitmap->size + nb_bits_in_cl - 1) / nb_bits_in_cl;
> +
> +    if (table == NULL && *table_size == 0) {
> +        *table_size = need_table_size;
> +        return 0;
> +    }
> +
> +    if (*table_size < need_table_size) {
> +        return -ENOMEM;
> +    }
> +
> +    memset(table, 0, *table_size * sizeof(table[0]));
> +
> +    for (;;) {
> +        unsigned long cur;
> +        size_t pos = hbitmap_iter_next_word(&hbi, &cur);
> +        size_t byte = pos * sizeof(unsigned long);
> +        uint64_t bit = byte << 3;
> +        uint64_t nbits = MIN(cluster_size << 3, bitmap->size - bit), next_bit;
> +        size_t i = byte / cluster_size;
> +
> +        if (pos == -1) {
> +            break;
> +        }
> +
> +        if (pos % cluster_size != 0) {
> +            table[i] = 2;
> +        } else if (buffer_is_all_ones(&bitmap->levels[HBITMAP_LEVELS - 1][pos],
> +                               nbits >> 3)) {
> +            table[i] = 1;
> +            if (nbits & 7) {
> +                uint8_t last_byte =
> +                        *(((uint8_t *)&bitmap->levels[HBITMAP_LEVELS - 1][pos])
> +                                + (nbits >> 3));
> +                if (last_byte != ((1 << (nbits & 7)) - 1)) {
> +                    table[i] = 2;
> +                }
> +            }
> +        } else {
> +            table[i] = 2;
> +        }
> +
> +        next_bit = (i + 1) * cluster_size << 3;
> +
> +        if (next_bit >= bitmap->size) {
> +            break;
> +        }
> +
> +        hbitmap_iter_init(&hbi, bitmap, next_bit << bitmap->granularity);
> +    }
> +
> +    return 0;
> +}
> +
> +static void longs_to_le(unsigned long *longs, size_t count)
> +{
> +    unsigned long *end = longs + count;
> +    while (longs < end) {
> +        if (BITS_PER_LONG == 32) {
> +            cpu_to_le32s((uint32_t *)longs);
> +        } else {
> +            cpu_to_le64s((uint64_t *)longs);
> +        }
> +
> +        longs++;
> +    }
> +}
> +
> +/* store_bitmap()
> + * update bitmap table by storing bitmap to it.
> + * bitmap table entries are assumed to be in big endian format
> + * On the error, the resulting bitmap table is valid for clearing, but
> + * may contain invalid bitmap */
> +int hbitmap_store(HBitmap *bitmap, BlockDriverState *bs,
> +                  const uint64_t *table, uint32_t table_size,
> +                  uint32_t cluster_size)
> +{
> +    int i;
> +    uint8_t *cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
> +    uint8_t *end = cur +
> +        ((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL) * (sizeof(long));
> +
> +    for (i = 0; i < table_size && cur < end; ++i) {
> +        uint64_t offset = table[i];
> +        uint64_t count = MIN(cluster_size, end - cur);
> +
> +        /* Zero offset means zero region, offset = 1 means filled region.
> +         * Cluster is not allocated in both cases. */
> +        if (offset > 1) {
> +            int ret;
> +            if (cpu_to_le16(1) == 1) {
> +                ret = bdrv_pwrite(bs, offset, cur, count);

I suspect that the reason you're using bdrv_pwrite down in here is to
avoid a buffered copy where hbitmap prepares a buffer and then the file
format layer decides what to do with it, opting instead to allow the
hbitmap layer itself to do a direct write.

I don't think this is going to work, though -- hbitmaps are a generic
utility and shouldn't have block layer access.

 I think the standard, naive design is the right one:

(1) qcow2-bitmap.c calls
bdrv_dirty_bitmap_serialize(bdrvdirtybitmap *bitmap, void *out);
and serialization primitives for hbitmap are used to construct a buffer
placed in *out.

(2) qcow2-bitmap.c writes this buffer itself where it needs it, using
bdrv_pwrite.

If this proves to be too slow, we can optimize it later.

> +            } else {
> +                void *tmp = g_memdup(cur, count);
> +                longs_to_le((unsigned long *)cur,
> +                            count / sizeof(unsigned long));
> +                ret = bdrv_pwrite(bs, offset, tmp, count);
> +                g_free(tmp);
> +            }
> +
> +            if (ret < 0) {
> +                return ret;
> +            }
> +        }
> +
> +        cur += cluster_size;
> +    }
> +
> +    return 0;
> +}
>
Vladimir Sementsov-Ogievskiy March 23, 2016, 8:22 a.m. UTC | #4
On 23.03.2016 00:49, John Snow wrote:
>
> On 03/15/2016 04:04 PM, Vladimir Sementsov-Ogievskiy wrote:
>> Add functions for load/store HBitmap to BDS, using clusters table:
>> Last level of the bitmap is splitted into chunks of 'cluster_size'
>> size. Each cell of the table contains offset in bds, to load/store
>> corresponding chunk.
>>
>> Also,
>>      0 in cell means all-zeroes-chunk (should not be saved)
>>      1 in cell means all-ones-chunk (should not be saved)
>>      hbitmap_prepare_store() fills table with
>>        0 for all-zeroes chunks
>>        1 for all-ones chunks
>>        2 for others
>>
>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>> ---
>>   block/dirty-bitmap.c         |  23 +++++
>>   include/block/dirty-bitmap.h |  11 +++
>>   include/qemu/hbitmap.h       |  12 +++
>>   util/hbitmap.c               | 209 +++++++++++++++++++++++++++++++++++++++++++
>>   4 files changed, 255 insertions(+)
>>
>> diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
>> index e68c177..816c6ee 100644
>> --- a/block/dirty-bitmap.c
>> +++ b/block/dirty-bitmap.c
>> @@ -396,3 +396,26 @@ int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
>>   {
>>       return hbitmap_count(bitmap->bitmap);
>>   }
>> +
>> +int bdrv_dirty_bitmap_load(BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
>> +                           const uint64_t *table, uint32_t table_size,
>> +                           uint32_t cluster_size)
>> +{
>> +    return hbitmap_load(bitmap->bitmap, bs, table, table_size, cluster_size);
>> +}
>> +
>> +int bdrv_dirty_bitmap_prepare_store(const BdrvDirtyBitmap *bitmap,
>> +                                    uint32_t cluster_size,
>> +                                    uint64_t *table,
>> +                                    uint32_t *table_size)
>> +{
>> +    return hbitmap_prepare_store(bitmap->bitmap, cluster_size,
>> +                                 table, table_size);
>> +}
>> +
>> +int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
>> +                            const uint64_t *table, uint32_t table_size,
>> +                            uint32_t cluster_size)
>> +{
>> +    return hbitmap_store(bitmap->bitmap, bs, table, table_size, cluster_size);
>> +}
>> diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
>> index 27515af..20cb540 100644
>> --- a/include/block/dirty-bitmap.h
>> +++ b/include/block/dirty-bitmap.h
>> @@ -43,4 +43,15 @@ void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
>>   int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
>>   void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
>>   
>> +int bdrv_dirty_bitmap_load(BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
>> +                           const uint64_t *table, uint32_t table_size,
>> +                           uint32_t cluster_size);
>> +int bdrv_dirty_bitmap_prepare_store(const BdrvDirtyBitmap *bitmap,
>> +                                    uint32_t cluster_size,
>> +                                    uint64_t *table,
>> +                                    uint32_t *table_size);
>> +int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
>> +                            const uint64_t *table, uint32_t table_size,
>> +                            uint32_t cluster_size);
>> +
>>   #endif
>> diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
>> index 6d1da4d..d83bb79 100644
>> --- a/include/qemu/hbitmap.h
>> +++ b/include/qemu/hbitmap.h
>> @@ -241,5 +241,17 @@ static inline size_t hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_c
>>       return hbi->pos;
>>   }
>>   
>> +typedef struct BlockDriverState BlockDriverState;
>> +
>> +int hbitmap_load(HBitmap *bitmap, BlockDriverState *bs,
>> +                 const uint64_t *table, uint32_t table_size,
>> +                 uint32_t cluster_size);
>> +
>> +int hbitmap_prepare_store(const HBitmap *bitmap, uint32_t cluster_size,
>> +                          uint64_t *table, uint32_t *table_size);
>> +
>> +int hbitmap_store(HBitmap *bitmap, BlockDriverState *bs,
>> +                  const uint64_t *table, uint32_t table_size,
>> +                  uint32_t cluster_size);
>>   
>>   #endif
>> diff --git a/util/hbitmap.c b/util/hbitmap.c
>> index 28595fb..1960e4f 100644
>> --- a/util/hbitmap.c
>> +++ b/util/hbitmap.c
>> @@ -15,6 +15,8 @@
>>   #include "qemu/host-utils.h"
>>   #include "trace.h"
>>   
>> +#include "block/block.h"
>> +
>>   /* HBitmaps provides an array of bits.  The bits are stored as usual in an
>>    * array of unsigned longs, but HBitmap is also optimized to provide fast
>>    * iteration over set bits; going from one bit to the next is O(logB n)
>> @@ -499,3 +501,210 @@ char *hbitmap_md5(const HBitmap *bitmap)
>>       const guchar *data = (const guchar *)bitmap->levels[HBITMAP_LEVELS - 1];
>>       return g_compute_checksum_for_data(G_CHECKSUM_MD5, data, size);
>>   }
>> +
>> +/* hb_restore_levels()
>> + * Using the last level restore all other levels
>> + */
>> +static void hb_restore_levels(HBitmap *bitmap)
>> +{
>> +    int64_t i, size, prev_size;
>> +    int lev;
>> +
>> +    /* restore levels starting from penultimate to zero level, assuming
>> +     * that the last level is ok */
>> +    size = MAX((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
>> +    for (lev = HBITMAP_LEVELS - 1; lev-- > 0; ) {
>> +        prev_size = size;
>> +        size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
>> +        memset(bitmap->levels[lev], 0, size * sizeof(unsigned long));
>> +
>> +        for (i = 0; i < prev_size; ++i) {
>> +            if (bitmap->levels[lev + 1][i]) {
>> +                bitmap->levels[lev][i >> BITS_PER_LEVEL] |=
>> +                    1UL << (i & (BITS_PER_LONG - 1));
>> +            }
>> +        }
>> +    }
>> +
>> +    bitmap->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
>> +}
>> +
>> +/* load_bitmap()
>> + * Load dirty bitmap from file, using table with cluster offsets.
>> + * Table entries are assumed to be in little endian format.
>> + */
>> +int hbitmap_load(HBitmap *bitmap, BlockDriverState *bs,
>> +                 const uint64_t *table, uint32_t table_size,
>> +                 uint32_t cluster_size)
>> +{
>> +    uint32_t i;
>> +    uint8_t *cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
>> +    uint8_t *end = cur + ((bitmap->size + 7) >> 3);
>> +
>> +    hbitmap_reset_all(bitmap);
>> +
>> +    for (i = 0; i < table_size && cur < end; ++i) {
>> +        uint64_t offset = table[i];
>> +        uint64_t count = MIN(cluster_size, end - cur);
>> +
>> +        /* Zero offset means zero region, offset = 1 means filled region.
>> +         * Cluster is not allocated in both cases. */
>> +        if (offset == 1) {
>> +            memset(cur, 0xff, count);
>> +        } else if (offset) {
>> +            int ret = bdrv_pread(bs, offset, cur, count);
>> +            if (ret < 0) {
>> +                return ret;
>> +            }
>> +        }
>> +
>> +        cur += cluster_size;
>> +    }
>> +
>> +    cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
>> +    while (cur < end) {
>> +        if (BITS_PER_LONG == 32) {
>> +            le32_to_cpus((uint32_t *)cur);
>> +        } else {
>> +            le64_to_cpus((uint64_t *)cur);
>> +        }
>> +
>> +        cur += sizeof(unsigned long);
>> +    }
>> +
>> +    hb_restore_levels(bitmap);
>> +
>> +    return 0;
>> +}
>> +
>> +static bool buffer_is_all_ones(void *buf, size_t len)
>> +{
>> +    /* FIXME */
>> +    return false;
>> +}
>> +
>> +/* hbitmap_prepare_store()
>> + * Devide bitmap data into clusters, and then,
>> + * for zero cluster: table[i] = 0
>> + * for all-ones cluster: table[i] = 1
>> + * for other clusters: table[i] = 2
>> + */
>> +int hbitmap_prepare_store(const HBitmap *bitmap,
>> +                          uint32_t cluster_size,
>> +                          uint64_t *table,
>> +                          uint32_t *table_size)
>> +{
>> +    HBitmapIter hbi;
>> +    hbitmap_iter_init(&hbi, bitmap, 0);
>> +    uint64_t nb_bits_in_cl = (uint64_t)cluster_size << 3;
>> +    uint32_t need_table_size =
>> +            (bitmap->size + nb_bits_in_cl - 1) / nb_bits_in_cl;
>> +
>> +    if (table == NULL && *table_size == 0) {
>> +        *table_size = need_table_size;
>> +        return 0;
>> +    }
>> +
>> +    if (*table_size < need_table_size) {
>> +        return -ENOMEM;
>> +    }
>> +
>> +    memset(table, 0, *table_size * sizeof(table[0]));
>> +
>> +    for (;;) {
>> +        unsigned long cur;
>> +        size_t pos = hbitmap_iter_next_word(&hbi, &cur);
>> +        size_t byte = pos * sizeof(unsigned long);
>> +        uint64_t bit = byte << 3;
>> +        uint64_t nbits = MIN(cluster_size << 3, bitmap->size - bit), next_bit;
>> +        size_t i = byte / cluster_size;
>> +
>> +        if (pos == -1) {
>> +            break;
>> +        }
>> +
>> +        if (pos % cluster_size != 0) {
>> +            table[i] = 2;
>> +        } else if (buffer_is_all_ones(&bitmap->levels[HBITMAP_LEVELS - 1][pos],
>> +                               nbits >> 3)) {
>> +            table[i] = 1;
>> +            if (nbits & 7) {
>> +                uint8_t last_byte =
>> +                        *(((uint8_t *)&bitmap->levels[HBITMAP_LEVELS - 1][pos])
>> +                                + (nbits >> 3));
>> +                if (last_byte != ((1 << (nbits & 7)) - 1)) {
>> +                    table[i] = 2;
>> +                }
>> +            }
>> +        } else {
>> +            table[i] = 2;
>> +        }
>> +
>> +        next_bit = (i + 1) * cluster_size << 3;
>> +
>> +        if (next_bit >= bitmap->size) {
>> +            break;
>> +        }
>> +
>> +        hbitmap_iter_init(&hbi, bitmap, next_bit << bitmap->granularity);
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static void longs_to_le(unsigned long *longs, size_t count)
>> +{
>> +    unsigned long *end = longs + count;
>> +    while (longs < end) {
>> +        if (BITS_PER_LONG == 32) {
>> +            cpu_to_le32s((uint32_t *)longs);
>> +        } else {
>> +            cpu_to_le64s((uint64_t *)longs);
>> +        }
>> +
>> +        longs++;
>> +    }
>> +}
>> +
>> +/* store_bitmap()
>> + * update bitmap table by storing bitmap to it.
>> + * bitmap table entries are assumed to be in big endian format
>> + * On the error, the resulting bitmap table is valid for clearing, but
>> + * may contain invalid bitmap */
>> +int hbitmap_store(HBitmap *bitmap, BlockDriverState *bs,
>> +                  const uint64_t *table, uint32_t table_size,
>> +                  uint32_t cluster_size)
>> +{
>> +    int i;
>> +    uint8_t *cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
>> +    uint8_t *end = cur +
>> +        ((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL) * (sizeof(long));
>> +
>> +    for (i = 0; i < table_size && cur < end; ++i) {
>> +        uint64_t offset = table[i];
>> +        uint64_t count = MIN(cluster_size, end - cur);
>> +
>> +        /* Zero offset means zero region, offset = 1 means filled region.
>> +         * Cluster is not allocated in both cases. */
>> +        if (offset > 1) {
>> +            int ret;
>> +            if (cpu_to_le16(1) == 1) {
>> +                ret = bdrv_pwrite(bs, offset, cur, count);
> I suspect that the reason you're using bdrv_pwrite down in here is to
> avoid a buffered copy where hbitmap prepares a buffer and then the file
> format layer decides what to do with it, opting instead to allow the
> hbitmap layer itself to do a direct write.
>
> I don't think this is going to work, though -- hbitmaps are a generic
> utility and shouldn't have block layer access.
>
>   I think the standard, naive design is the right one:
>
> (1) qcow2-bitmap.c calls
> bdrv_dirty_bitmap_serialize(bdrvdirtybitmap *bitmap, void *out);
> and serialization primitives for hbitmap are used to construct a buffer
> placed in *out.
>
> (2) qcow2-bitmap.c writes this buffer itself where it needs it, using
> bdrv_pwrite.
>
> If this proves to be too slow, we can optimize it later.

May be just pass writing function to hbitmap_store, like int 
(*pwrite_func)(void *opaque, int64_t offset, const void *buf, int bytes) 
? Serialization is superfluous here. It is needed for migration, where 
we forced to work with data chunks protocol layer. But why to split here 
the bitmap, dance with granularity and block layer wrappers, and make an 
additional copy of the data?

>
>> +            } else {
>> +                void *tmp = g_memdup(cur, count);
>> +                longs_to_le((unsigned long *)cur,
>> +                            count / sizeof(unsigned long));
>> +                ret = bdrv_pwrite(bs, offset, tmp, count);
>> +                g_free(tmp);
>> +            }
>> +
>> +            if (ret < 0) {
>> +                return ret;
>> +            }
>> +        }
>> +
>> +        cur += cluster_size;
>> +    }
>> +
>> +    return 0;
>> +}
>>
John Snow March 28, 2016, 8:20 p.m. UTC | #5
On 03/23/2016 04:22 AM, Vladimir Sementsov-Ogievskiy wrote:
> On 23.03.2016 00:49, John Snow wrote:
>>
>> On 03/15/2016 04:04 PM, Vladimir Sementsov-Ogievskiy wrote:
>>> Add functions for load/store HBitmap to BDS, using clusters table:
>>> Last level of the bitmap is splitted into chunks of 'cluster_size'
>>> size. Each cell of the table contains offset in bds, to load/store
>>> corresponding chunk.
>>>
>>> Also,
>>>      0 in cell means all-zeroes-chunk (should not be saved)
>>>      1 in cell means all-ones-chunk (should not be saved)
>>>      hbitmap_prepare_store() fills table with
>>>        0 for all-zeroes chunks
>>>        1 for all-ones chunks
>>>        2 for others
>>>
>>> Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
>>> ---
>>>   block/dirty-bitmap.c         |  23 +++++
>>>   include/block/dirty-bitmap.h |  11 +++
>>>   include/qemu/hbitmap.h       |  12 +++
>>>   util/hbitmap.c               | 209
>>> +++++++++++++++++++++++++++++++++++++++++++
>>>   4 files changed, 255 insertions(+)
>>>
>>> diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
>>> index e68c177..816c6ee 100644
>>> --- a/block/dirty-bitmap.c
>>> +++ b/block/dirty-bitmap.c
>>> @@ -396,3 +396,26 @@ int64_t bdrv_get_dirty_count(BdrvDirtyBitmap
>>> *bitmap)
>>>   {
>>>       return hbitmap_count(bitmap->bitmap);
>>>   }
>>> +
>>> +int bdrv_dirty_bitmap_load(BdrvDirtyBitmap *bitmap, BlockDriverState
>>> *bs,
>>> +                           const uint64_t *table, uint32_t table_size,
>>> +                           uint32_t cluster_size)
>>> +{
>>> +    return hbitmap_load(bitmap->bitmap, bs, table, table_size,
>>> cluster_size);
>>> +}
>>> +
>>> +int bdrv_dirty_bitmap_prepare_store(const BdrvDirtyBitmap *bitmap,
>>> +                                    uint32_t cluster_size,
>>> +                                    uint64_t *table,
>>> +                                    uint32_t *table_size)
>>> +{
>>> +    return hbitmap_prepare_store(bitmap->bitmap, cluster_size,
>>> +                                 table, table_size);
>>> +}
>>> +
>>> +int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap,
>>> BlockDriverState *bs,
>>> +                            const uint64_t *table, uint32_t table_size,
>>> +                            uint32_t cluster_size)
>>> +{
>>> +    return hbitmap_store(bitmap->bitmap, bs, table, table_size,
>>> cluster_size);
>>> +}
>>> diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
>>> index 27515af..20cb540 100644
>>> --- a/include/block/dirty-bitmap.h
>>> +++ b/include/block/dirty-bitmap.h
>>> @@ -43,4 +43,15 @@ void bdrv_set_dirty_iter(struct HBitmapIter *hbi,
>>> int64_t offset);
>>>   int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
>>>   void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
>>>   +int bdrv_dirty_bitmap_load(BdrvDirtyBitmap *bitmap,
>>> BlockDriverState *bs,
>>> +                           const uint64_t *table, uint32_t table_size,
>>> +                           uint32_t cluster_size);
>>> +int bdrv_dirty_bitmap_prepare_store(const BdrvDirtyBitmap *bitmap,
>>> +                                    uint32_t cluster_size,
>>> +                                    uint64_t *table,
>>> +                                    uint32_t *table_size);
>>> +int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap,
>>> BlockDriverState *bs,
>>> +                            const uint64_t *table, uint32_t table_size,
>>> +                            uint32_t cluster_size);
>>> +
>>>   #endif
>>> diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
>>> index 6d1da4d..d83bb79 100644
>>> --- a/include/qemu/hbitmap.h
>>> +++ b/include/qemu/hbitmap.h
>>> @@ -241,5 +241,17 @@ static inline size_t
>>> hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_c
>>>       return hbi->pos;
>>>   }
>>>   +typedef struct BlockDriverState BlockDriverState;
>>> +
>>> +int hbitmap_load(HBitmap *bitmap, BlockDriverState *bs,
>>> +                 const uint64_t *table, uint32_t table_size,
>>> +                 uint32_t cluster_size);
>>> +
>>> +int hbitmap_prepare_store(const HBitmap *bitmap, uint32_t cluster_size,
>>> +                          uint64_t *table, uint32_t *table_size);
>>> +
>>> +int hbitmap_store(HBitmap *bitmap, BlockDriverState *bs,
>>> +                  const uint64_t *table, uint32_t table_size,
>>> +                  uint32_t cluster_size);
>>>     #endif
>>> diff --git a/util/hbitmap.c b/util/hbitmap.c
>>> index 28595fb..1960e4f 100644
>>> --- a/util/hbitmap.c
>>> +++ b/util/hbitmap.c
>>> @@ -15,6 +15,8 @@
>>>   #include "qemu/host-utils.h"
>>>   #include "trace.h"
>>>   +#include "block/block.h"
>>> +
>>>   /* HBitmaps provides an array of bits.  The bits are stored as
>>> usual in an
>>>    * array of unsigned longs, but HBitmap is also optimized to
>>> provide fast
>>>    * iteration over set bits; going from one bit to the next is
>>> O(logB n)
>>> @@ -499,3 +501,210 @@ char *hbitmap_md5(const HBitmap *bitmap)
>>>       const guchar *data = (const guchar
>>> *)bitmap->levels[HBITMAP_LEVELS - 1];
>>>       return g_compute_checksum_for_data(G_CHECKSUM_MD5, data, size);
>>>   }
>>> +
>>> +/* hb_restore_levels()
>>> + * Using the last level restore all other levels
>>> + */
>>> +static void hb_restore_levels(HBitmap *bitmap)
>>> +{
>>> +    int64_t i, size, prev_size;
>>> +    int lev;
>>> +
>>> +    /* restore levels starting from penultimate to zero level, assuming
>>> +     * that the last level is ok */
>>> +    size = MAX((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL,
>>> 1);
>>> +    for (lev = HBITMAP_LEVELS - 1; lev-- > 0; ) {
>>> +        prev_size = size;
>>> +        size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
>>> +        memset(bitmap->levels[lev], 0, size * sizeof(unsigned long));
>>> +
>>> +        for (i = 0; i < prev_size; ++i) {
>>> +            if (bitmap->levels[lev + 1][i]) {
>>> +                bitmap->levels[lev][i >> BITS_PER_LEVEL] |=
>>> +                    1UL << (i & (BITS_PER_LONG - 1));
>>> +            }
>>> +        }
>>> +    }
>>> +
>>> +    bitmap->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
>>> +}
>>> +
>>> +/* load_bitmap()
>>> + * Load dirty bitmap from file, using table with cluster offsets.
>>> + * Table entries are assumed to be in little endian format.
>>> + */
>>> +int hbitmap_load(HBitmap *bitmap, BlockDriverState *bs,
>>> +                 const uint64_t *table, uint32_t table_size,
>>> +                 uint32_t cluster_size)
>>> +{
>>> +    uint32_t i;
>>> +    uint8_t *cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
>>> +    uint8_t *end = cur + ((bitmap->size + 7) >> 3);
>>> +
>>> +    hbitmap_reset_all(bitmap);
>>> +
>>> +    for (i = 0; i < table_size && cur < end; ++i) {
>>> +        uint64_t offset = table[i];
>>> +        uint64_t count = MIN(cluster_size, end - cur);
>>> +
>>> +        /* Zero offset means zero region, offset = 1 means filled
>>> region.
>>> +         * Cluster is not allocated in both cases. */
>>> +        if (offset == 1) {
>>> +            memset(cur, 0xff, count);
>>> +        } else if (offset) {
>>> +            int ret = bdrv_pread(bs, offset, cur, count);
>>> +            if (ret < 0) {
>>> +                return ret;
>>> +            }
>>> +        }
>>> +
>>> +        cur += cluster_size;
>>> +    }
>>> +
>>> +    cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
>>> +    while (cur < end) {
>>> +        if (BITS_PER_LONG == 32) {
>>> +            le32_to_cpus((uint32_t *)cur);
>>> +        } else {
>>> +            le64_to_cpus((uint64_t *)cur);
>>> +        }
>>> +
>>> +        cur += sizeof(unsigned long);
>>> +    }
>>> +
>>> +    hb_restore_levels(bitmap);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static bool buffer_is_all_ones(void *buf, size_t len)
>>> +{
>>> +    /* FIXME */
>>> +    return false;
>>> +}
>>> +
>>> +/* hbitmap_prepare_store()
>>> + * Devide bitmap data into clusters, and then,
>>> + * for zero cluster: table[i] = 0
>>> + * for all-ones cluster: table[i] = 1
>>> + * for other clusters: table[i] = 2
>>> + */
>>> +int hbitmap_prepare_store(const HBitmap *bitmap,
>>> +                          uint32_t cluster_size,
>>> +                          uint64_t *table,
>>> +                          uint32_t *table_size)
>>> +{
>>> +    HBitmapIter hbi;
>>> +    hbitmap_iter_init(&hbi, bitmap, 0);
>>> +    uint64_t nb_bits_in_cl = (uint64_t)cluster_size << 3;
>>> +    uint32_t need_table_size =
>>> +            (bitmap->size + nb_bits_in_cl - 1) / nb_bits_in_cl;
>>> +
>>> +    if (table == NULL && *table_size == 0) {
>>> +        *table_size = need_table_size;
>>> +        return 0;
>>> +    }
>>> +
>>> +    if (*table_size < need_table_size) {
>>> +        return -ENOMEM;
>>> +    }
>>> +
>>> +    memset(table, 0, *table_size * sizeof(table[0]));
>>> +
>>> +    for (;;) {
>>> +        unsigned long cur;
>>> +        size_t pos = hbitmap_iter_next_word(&hbi, &cur);
>>> +        size_t byte = pos * sizeof(unsigned long);
>>> +        uint64_t bit = byte << 3;
>>> +        uint64_t nbits = MIN(cluster_size << 3, bitmap->size - bit),
>>> next_bit;
>>> +        size_t i = byte / cluster_size;
>>> +
>>> +        if (pos == -1) {
>>> +            break;
>>> +        }
>>> +
>>> +        if (pos % cluster_size != 0) {
>>> +            table[i] = 2;
>>> +        } else if (buffer_is_all_ones(&bitmap->levels[HBITMAP_LEVELS
>>> - 1][pos],
>>> +                               nbits >> 3)) {
>>> +            table[i] = 1;
>>> +            if (nbits & 7) {
>>> +                uint8_t last_byte =
>>> +                        *(((uint8_t *)&bitmap->levels[HBITMAP_LEVELS
>>> - 1][pos])
>>> +                                + (nbits >> 3));
>>> +                if (last_byte != ((1 << (nbits & 7)) - 1)) {
>>> +                    table[i] = 2;
>>> +                }
>>> +            }
>>> +        } else {
>>> +            table[i] = 2;
>>> +        }
>>> +
>>> +        next_bit = (i + 1) * cluster_size << 3;
>>> +
>>> +        if (next_bit >= bitmap->size) {
>>> +            break;
>>> +        }
>>> +
>>> +        hbitmap_iter_init(&hbi, bitmap, next_bit <<
>>> bitmap->granularity);
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static void longs_to_le(unsigned long *longs, size_t count)
>>> +{
>>> +    unsigned long *end = longs + count;
>>> +    while (longs < end) {
>>> +        if (BITS_PER_LONG == 32) {
>>> +            cpu_to_le32s((uint32_t *)longs);
>>> +        } else {
>>> +            cpu_to_le64s((uint64_t *)longs);
>>> +        }
>>> +
>>> +        longs++;
>>> +    }
>>> +}
>>> +
>>> +/* store_bitmap()
>>> + * update bitmap table by storing bitmap to it.
>>> + * bitmap table entries are assumed to be in big endian format
>>> + * On the error, the resulting bitmap table is valid for clearing, but
>>> + * may contain invalid bitmap */
>>> +int hbitmap_store(HBitmap *bitmap, BlockDriverState *bs,
>>> +                  const uint64_t *table, uint32_t table_size,
>>> +                  uint32_t cluster_size)
>>> +{
>>> +    int i;
>>> +    uint8_t *cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
>>> +    uint8_t *end = cur +
>>> +        ((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL) *
>>> (sizeof(long));
>>> +
>>> +    for (i = 0; i < table_size && cur < end; ++i) {
>>> +        uint64_t offset = table[i];
>>> +        uint64_t count = MIN(cluster_size, end - cur);
>>> +
>>> +        /* Zero offset means zero region, offset = 1 means filled
>>> region.
>>> +         * Cluster is not allocated in both cases. */
>>> +        if (offset > 1) {
>>> +            int ret;
>>> +            if (cpu_to_le16(1) == 1) {
>>> +                ret = bdrv_pwrite(bs, offset, cur, count);
>> I suspect that the reason you're using bdrv_pwrite down in here is to
>> avoid a buffered copy where hbitmap prepares a buffer and then the file
>> format layer decides what to do with it, opting instead to allow the
>> hbitmap layer itself to do a direct write.
>>
>> I don't think this is going to work, though -- hbitmaps are a generic
>> utility and shouldn't have block layer access.
>>
>>   I think the standard, naive design is the right one:
>>
>> (1) qcow2-bitmap.c calls
>> bdrv_dirty_bitmap_serialize(bdrvdirtybitmap *bitmap, void *out);
>> and serialization primitives for hbitmap are used to construct a buffer
>> placed in *out.
>>
>> (2) qcow2-bitmap.c writes this buffer itself where it needs it, using
>> bdrv_pwrite.
>>
>> If this proves to be too slow, we can optimize it later.
> 
> May be just pass writing function to hbitmap_store, like int
> (*pwrite_func)(void *opaque, int64_t offset, const void *buf, int bytes)
> ? Serialization is superfluous here. It is needed for migration, where
> we forced to work with data chunks protocol layer. But why to split here
> the bitmap, dance with granularity and block layer wrappers, and make an
> additional copy of the data?
> 

I hate to say it, but I think this is premature optimization. If you can
prove there is a meaningful difference between a buffered serialization
and a zero-copy, I'll go along with it -- but I think until we are
bench-marking quite large bitmaps, we won't see much of a difference.

This should be easy enough to improve later if we need to.

>>
>>> +            } else {
>>> +                void *tmp = g_memdup(cur, count);
>>> +                longs_to_le((unsigned long *)cur,
>>> +                            count / sizeof(unsigned long));
>>> +                ret = bdrv_pwrite(bs, offset, tmp, count);
>>> +                g_free(tmp);
>>> +            }
>>> +
>>> +            if (ret < 0) {
>>> +                return ret;
>>> +            }
>>> +        }
>>> +
>>> +        cur += cluster_size;
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>>
> 
>
diff mbox

Patch

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index e68c177..816c6ee 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -396,3 +396,26 @@  int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
 {
     return hbitmap_count(bitmap->bitmap);
 }
+
+int bdrv_dirty_bitmap_load(BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
+                           const uint64_t *table, uint32_t table_size,
+                           uint32_t cluster_size)
+{
+    return hbitmap_load(bitmap->bitmap, bs, table, table_size, cluster_size);
+}
+
+int bdrv_dirty_bitmap_prepare_store(const BdrvDirtyBitmap *bitmap,
+                                    uint32_t cluster_size,
+                                    uint64_t *table,
+                                    uint32_t *table_size)
+{
+    return hbitmap_prepare_store(bitmap->bitmap, cluster_size,
+                                 table, table_size);
+}
+
+int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
+                            const uint64_t *table, uint32_t table_size,
+                            uint32_t cluster_size)
+{
+    return hbitmap_store(bitmap->bitmap, bs, table, table_size, cluster_size);
+}
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 27515af..20cb540 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -43,4 +43,15 @@  void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset);
 int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap);
 void bdrv_dirty_bitmap_truncate(BlockDriverState *bs);
 
+int bdrv_dirty_bitmap_load(BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
+                           const uint64_t *table, uint32_t table_size,
+                           uint32_t cluster_size);
+int bdrv_dirty_bitmap_prepare_store(const BdrvDirtyBitmap *bitmap,
+                                    uint32_t cluster_size,
+                                    uint64_t *table,
+                                    uint32_t *table_size);
+int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap, BlockDriverState *bs,
+                            const uint64_t *table, uint32_t table_size,
+                            uint32_t cluster_size);
+
 #endif
diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h
index 6d1da4d..d83bb79 100644
--- a/include/qemu/hbitmap.h
+++ b/include/qemu/hbitmap.h
@@ -241,5 +241,17 @@  static inline size_t hbitmap_iter_next_word(HBitmapIter *hbi, unsigned long *p_c
     return hbi->pos;
 }
 
+typedef struct BlockDriverState BlockDriverState;
+
+int hbitmap_load(HBitmap *bitmap, BlockDriverState *bs,
+                 const uint64_t *table, uint32_t table_size,
+                 uint32_t cluster_size);
+
+int hbitmap_prepare_store(const HBitmap *bitmap, uint32_t cluster_size,
+                          uint64_t *table, uint32_t *table_size);
+
+int hbitmap_store(HBitmap *bitmap, BlockDriverState *bs,
+                  const uint64_t *table, uint32_t table_size,
+                  uint32_t cluster_size);
 
 #endif
diff --git a/util/hbitmap.c b/util/hbitmap.c
index 28595fb..1960e4f 100644
--- a/util/hbitmap.c
+++ b/util/hbitmap.c
@@ -15,6 +15,8 @@ 
 #include "qemu/host-utils.h"
 #include "trace.h"
 
+#include "block/block.h"
+
 /* HBitmaps provides an array of bits.  The bits are stored as usual in an
  * array of unsigned longs, but HBitmap is also optimized to provide fast
  * iteration over set bits; going from one bit to the next is O(logB n)
@@ -499,3 +501,210 @@  char *hbitmap_md5(const HBitmap *bitmap)
     const guchar *data = (const guchar *)bitmap->levels[HBITMAP_LEVELS - 1];
     return g_compute_checksum_for_data(G_CHECKSUM_MD5, data, size);
 }
+
+/* hb_restore_levels()
+ * Using the last level restore all other levels
+ */
+static void hb_restore_levels(HBitmap *bitmap)
+{
+    int64_t i, size, prev_size;
+    int lev;
+
+    /* restore levels starting from penultimate to zero level, assuming
+     * that the last level is ok */
+    size = MAX((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+    for (lev = HBITMAP_LEVELS - 1; lev-- > 0; ) {
+        prev_size = size;
+        size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+        memset(bitmap->levels[lev], 0, size * sizeof(unsigned long));
+
+        for (i = 0; i < prev_size; ++i) {
+            if (bitmap->levels[lev + 1][i]) {
+                bitmap->levels[lev][i >> BITS_PER_LEVEL] |=
+                    1UL << (i & (BITS_PER_LONG - 1));
+            }
+        }
+    }
+
+    bitmap->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
+}
+
+/* load_bitmap()
+ * Load dirty bitmap from file, using table with cluster offsets.
+ * Table entries are assumed to be in little endian format.
+ */
+int hbitmap_load(HBitmap *bitmap, BlockDriverState *bs,
+                 const uint64_t *table, uint32_t table_size,
+                 uint32_t cluster_size)
+{
+    uint32_t i;
+    uint8_t *cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
+    uint8_t *end = cur + ((bitmap->size + 7) >> 3);
+
+    hbitmap_reset_all(bitmap);
+
+    for (i = 0; i < table_size && cur < end; ++i) {
+        uint64_t offset = table[i];
+        uint64_t count = MIN(cluster_size, end - cur);
+
+        /* Zero offset means zero region, offset = 1 means filled region.
+         * Cluster is not allocated in both cases. */
+        if (offset == 1) {
+            memset(cur, 0xff, count);
+        } else if (offset) {
+            int ret = bdrv_pread(bs, offset, cur, count);
+            if (ret < 0) {
+                return ret;
+            }
+        }
+
+        cur += cluster_size;
+    }
+
+    cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
+    while (cur < end) {
+        if (BITS_PER_LONG == 32) {
+            le32_to_cpus((uint32_t *)cur);
+        } else {
+            le64_to_cpus((uint64_t *)cur);
+        }
+
+        cur += sizeof(unsigned long);
+    }
+
+    hb_restore_levels(bitmap);
+
+    return 0;
+}
+
+static bool buffer_is_all_ones(void *buf, size_t len)
+{
+    /* FIXME */
+    return false;
+}
+
+/* hbitmap_prepare_store()
+ * Devide bitmap data into clusters, and then,
+ * for zero cluster: table[i] = 0
+ * for all-ones cluster: table[i] = 1
+ * for other clusters: table[i] = 2
+ */
+int hbitmap_prepare_store(const HBitmap *bitmap,
+                          uint32_t cluster_size,
+                          uint64_t *table,
+                          uint32_t *table_size)
+{
+    HBitmapIter hbi;
+    hbitmap_iter_init(&hbi, bitmap, 0);
+    uint64_t nb_bits_in_cl = (uint64_t)cluster_size << 3;
+    uint32_t need_table_size =
+            (bitmap->size + nb_bits_in_cl - 1) / nb_bits_in_cl;
+
+    if (table == NULL && *table_size == 0) {
+        *table_size = need_table_size;
+        return 0;
+    }
+
+    if (*table_size < need_table_size) {
+        return -ENOMEM;
+    }
+
+    memset(table, 0, *table_size * sizeof(table[0]));
+
+    for (;;) {
+        unsigned long cur;
+        size_t pos = hbitmap_iter_next_word(&hbi, &cur);
+        size_t byte = pos * sizeof(unsigned long);
+        uint64_t bit = byte << 3;
+        uint64_t nbits = MIN(cluster_size << 3, bitmap->size - bit), next_bit;
+        size_t i = byte / cluster_size;
+
+        if (pos == -1) {
+            break;
+        }
+
+        if (pos % cluster_size != 0) {
+            table[i] = 2;
+        } else if (buffer_is_all_ones(&bitmap->levels[HBITMAP_LEVELS - 1][pos],
+                               nbits >> 3)) {
+            table[i] = 1;
+            if (nbits & 7) {
+                uint8_t last_byte =
+                        *(((uint8_t *)&bitmap->levels[HBITMAP_LEVELS - 1][pos])
+                                + (nbits >> 3));
+                if (last_byte != ((1 << (nbits & 7)) - 1)) {
+                    table[i] = 2;
+                }
+            }
+        } else {
+            table[i] = 2;
+        }
+
+        next_bit = (i + 1) * cluster_size << 3;
+
+        if (next_bit >= bitmap->size) {
+            break;
+        }
+
+        hbitmap_iter_init(&hbi, bitmap, next_bit << bitmap->granularity);
+    }
+
+    return 0;
+}
+
+static void longs_to_le(unsigned long *longs, size_t count)
+{
+    unsigned long *end = longs + count;
+    while (longs < end) {
+        if (BITS_PER_LONG == 32) {
+            cpu_to_le32s((uint32_t *)longs);
+        } else {
+            cpu_to_le64s((uint64_t *)longs);
+        }
+
+        longs++;
+    }
+}
+
+/* store_bitmap()
+ * update bitmap table by storing bitmap to it.
+ * bitmap table entries are assumed to be in big endian format
+ * On the error, the resulting bitmap table is valid for clearing, but
+ * may contain invalid bitmap */
+int hbitmap_store(HBitmap *bitmap, BlockDriverState *bs,
+                  const uint64_t *table, uint32_t table_size,
+                  uint32_t cluster_size)
+{
+    int i;
+    uint8_t *cur = (uint8_t *)bitmap->levels[HBITMAP_LEVELS - 1];
+    uint8_t *end = cur +
+        ((bitmap->size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL) * (sizeof(long));
+
+    for (i = 0; i < table_size && cur < end; ++i) {
+        uint64_t offset = table[i];
+        uint64_t count = MIN(cluster_size, end - cur);
+
+        /* Zero offset means zero region, offset = 1 means filled region.
+         * Cluster is not allocated in both cases. */
+        if (offset > 1) {
+            int ret;
+            if (cpu_to_le16(1) == 1) {
+                ret = bdrv_pwrite(bs, offset, cur, count);
+            } else {
+                void *tmp = g_memdup(cur, count);
+                longs_to_le((unsigned long *)cur,
+                            count / sizeof(unsigned long));
+                ret = bdrv_pwrite(bs, offset, tmp, count);
+                g_free(tmp);
+            }
+
+            if (ret < 0) {
+                return ret;
+            }
+        }
+
+        cur += cluster_size;
+    }
+
+    return 0;
+}