diff mbox series

[v3,05/10] migration/dirtyrate: Record hash results for each sampled page

Message ID 1597634433-18809-6-git-send-email-zhengchuan@huawei.com (mailing list archive)
State New, archived
Headers show
Series *** A Method for evaluating dirty page rate *** | expand

Commit Message

Zheng Chuan Aug. 17, 2020, 3:20 a.m. UTC
Record hash results for each sampled page.

Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
Signed-off-by: YanYing Zhuang <ann.zhuangyanying@huawei.com>
---
 migration/dirtyrate.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++
 migration/dirtyrate.h |   7 +++
 2 files changed, 151 insertions(+)

Comments

Dr. David Alan Gilbert Aug. 20, 2020, 5:30 p.m. UTC | #1
* Chuan Zheng (zhengchuan@huawei.com) wrote:
> Record hash results for each sampled page.
> 
> Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
> Signed-off-by: YanYing Zhuang <ann.zhuangyanying@huawei.com>
> ---
>  migration/dirtyrate.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  migration/dirtyrate.h |   7 +++
>  2 files changed, 151 insertions(+)
> 
> diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
> index c4304ef..62b6f69 100644
> --- a/migration/dirtyrate.c
> +++ b/migration/dirtyrate.c
> @@ -25,6 +25,7 @@
>  #include "dirtyrate.h"
>  
>  CalculatingDirtyRateState CalculatingState = CAL_DIRTY_RATE_INIT;
> +static unsigned long int qcrypto_hash_len = QCRYPTO_HASH_LEN;

Why do we need this static rather than just using the QCRYPTO_HASH_LEN ?
It's never going to change is it?
(and anyway it's just a MD5 len?)

>  static struct DirtyRateStat dirty_stat;
>  
>  static int dirty_rate_set_state(int new_state)
> @@ -71,6 +72,149 @@ static void update_dirtyrate(uint64_t msec)
>      dirty_stat.dirty_rate = dirty_rate;
>  }
>  
> +/*
> + * get hash result for the sampled memory with length of 4K byte in ramblock,
> + * which starts from ramblock base address.
> + */
> +static int get_ramblock_vfn_hash(struct RamblockDirtyInfo *info,
> +                                 unsigned long vfn, uint8_t **md)
> +{
> +    struct iovec iov_array;
> +    int ret = 0;
> +    int nkey = 1;
> +    size_t hash_len = qcrypto_hash_len;
> +
> +    iov_array.iov_base = info->ramblock_addr +
> +                         vfn * DIRTYRATE_SAMPLE_PAGE_SIZE;
> +    iov_array.iov_len = DIRTYRATE_SAMPLE_PAGE_SIZE;
> +
> +    if (qcrypto_hash_bytesv(QCRYPTO_HASH_ALG_MD5,
> +                            &iov_array, nkey,
> +                            md, &hash_len, NULL) < 0) {
> +        ret = -1;
> +    }
> +
> +    return ret;
> +}
> +
> +static int save_ramblock_hash(struct RamblockDirtyInfo *info)
> +{
> +    unsigned int sample_pages_count;
> +    uint8_t *md = NULL;
> +    int i;
> +    int ret = -1;
> +    GRand *rand = g_rand_new();
> +
> +    sample_pages_count = info->sample_pages_count;
> +
> +    /* ramblock size less than one page, return success to skip this ramblock */
> +    if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) {
> +        ret = 0;
> +        goto out;
> +    }
> +
> +    info->hash_result = g_try_malloc0_n(sample_pages_count,
> +                                        sizeof(uint8_t) * qcrypto_hash_len);
> +    if (!info->hash_result) {
> +        ret = -1;
> +        goto out;
> +    }
> +
> +    info->sample_page_vfn = g_try_malloc0_n(sample_pages_count,
> +                                            sizeof(unsigned long));
> +    if (!info->sample_page_vfn) {
> +        g_free(info->hash_result);
> +        ret = -1;
> +        goto out;
> +    }
> +
> +    for (i = 0; i < sample_pages_count; i++) {
> +        md = info->hash_result + i * qcrypto_hash_len;
> +        info->sample_page_vfn[i] = g_rand_int_range(rand, 0,
> +                                                    info->ramblock_pages - 1);
> +        ret = get_ramblock_vfn_hash(info, info->sample_page_vfn[i], &md);
> +        if (ret < 0) {
> +            goto out;
> +        }
> +    }
> +    ret = 0;
> +
> +out:
> +    g_rand_free(rand);
> +    return ret;
> +}
> +
> +static void get_ramblock_dirty_info(RAMBlock *block,
> +                                    struct RamblockDirtyInfo *info,
> +                                    struct DirtyRateConfig *config)
> +{
> +    uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes;
> +
> +    /* Right shift 30 bits to calc block size in GB */
> +    info->sample_pages_count = (qemu_ram_get_used_length(block)
> +                                * sample_pages_per_gigabytes) >> 30;
> +
> +    /* Right shift 12 bits to calc page count in 4KB */
> +    info->ramblock_pages = qemu_ram_get_used_length(block) >> 12;

Is this really >> 12 ?  Should it actually be 
   / DIRTYRATE_SAMPLE_PAGE_SIZE ?

(and should you need that or just use TARGET_PAGE_SIZE?)

> +    info->ramblock_addr = qemu_ram_get_host_addr(block);
> +    strcpy(info->idstr, qemu_ram_get_idstr(block));
> +}
> +
> +static struct RamblockDirtyInfo *
> +alloc_ramblock_dirty_info(int *block_index,
> +                          struct RamblockDirtyInfo *block_dinfo)
> +{
> +    struct RamblockDirtyInfo *info = NULL;
> +    int index = *block_index;
> +
> +    if (!block_dinfo) {
> +        block_dinfo = g_try_new(struct RamblockDirtyInfo, 1);
> +        index = 0;
> +    } else {
> +        index++;
> +        block_dinfo = g_try_realloc(block_dinfo, (index + 1) *
> +                                    sizeof(struct RamblockDirtyInfo));
> +    }
> +    if (!block_dinfo) {
> +        return NULL;
> +    }
> +
> +    info = &block_dinfo[index];
> +    memset(info, 0, sizeof(struct RamblockDirtyInfo));
> +
> +    *block_index = index;
> +    return block_dinfo;
> +}
> +
> +static int record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo,
> +                                     struct DirtyRateConfig config,
> +                                     int *block_index)
> +{
> +    struct RamblockDirtyInfo *info = NULL;
> +    struct RamblockDirtyInfo *dinfo = NULL;
> +    RAMBlock *block = NULL;
> +    int index = 0;
> +
> +    RAMBLOCK_FOREACH_MIGRATABLE(block) {
> +        dinfo = alloc_ramblock_dirty_info(&index, dinfo);
> +        if (dinfo == NULL) {
> +            return -1;
> +        }
> +        info = &dinfo[index];
> +        get_ramblock_dirty_info(block, info, &config);
> +        if (save_ramblock_hash(info) < 0) {
> +            *block_dinfo = dinfo;
> +            *block_index = index;
> +            return -1;
> +        }
> +    }
> +
> +    *block_dinfo = dinfo;
> +    *block_index = index;
> +
> +    return 0;

You should add some trace_ calls at various places to make this easier
to debug.

Dave

> +}
> +
>  static void calculate_dirtyrate(struct DirtyRateConfig config)
>  {
>      /* todo */
> diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h
> index af57c80..0812b16 100644
> --- a/migration/dirtyrate.h
> +++ b/migration/dirtyrate.h
> @@ -20,10 +20,17 @@
>  #define DIRTYRATE_DEFAULT_SAMPLE_PAGES            256
>  
>  /*
> + * Sample page size 4K as default.
> + */
> +#define DIRTYRATE_SAMPLE_PAGE_SIZE                4096
> +
> +/*
>   * Record ramblock idstr
>   */
>  #define RAMBLOCK_INFO_MAX_LEN                     256
>  
> +#define QCRYPTO_HASH_LEN                          16
> +
>  /* Take 1s as default for calculation duration */
>  #define DEFAULT_FETCH_DIRTYRATE_TIME_SEC          1
>  
> -- 
> 1.8.3.1
>
Daniel P. Berrangé Aug. 20, 2020, 5:51 p.m. UTC | #2
On Thu, Aug 20, 2020 at 06:30:09PM +0100, Dr. David Alan Gilbert wrote:
> * Chuan Zheng (zhengchuan@huawei.com) wrote:
> > Record hash results for each sampled page.
> > 
> > Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
> > Signed-off-by: YanYing Zhuang <ann.zhuangyanying@huawei.com>
> > ---
> >  migration/dirtyrate.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >  migration/dirtyrate.h |   7 +++
> >  2 files changed, 151 insertions(+)
> > 
> > diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
> > index c4304ef..62b6f69 100644
> > --- a/migration/dirtyrate.c
> > +++ b/migration/dirtyrate.c
> > @@ -25,6 +25,7 @@
> >  #include "dirtyrate.h"
> >  
> >  CalculatingDirtyRateState CalculatingState = CAL_DIRTY_RATE_INIT;
> > +static unsigned long int qcrypto_hash_len = QCRYPTO_HASH_LEN;
> 
> Why do we need this static rather than just using the QCRYPTO_HASH_LEN ?
> It's never going to change is it?
> (and anyway it's just a MD5 len?)

I wouldn't want to bet on that given that this is use of MD5. We might
claim this isn't security critical, but surprises happen, and we will
certainly be dinged on security audits for introducing new use of MD5
no matter what.

If a cryptographic hash is required, then sha256 should be the choice
for any new code that doesn't have back compat requirements.

If a cryptographic hash is not required then how about crc32 

IOW, it doesn't make a whole lot of sense to say we need a cryptographic
hash, but then pick the most insecure one.

sha256 is slower than md5, but it is conceivable that in future we might
gain support for something like Blake2b which is similar security level
to SHA3, while being faster than MD5.

Overall I'm pretty unethusiastic about use of MD5 being introduced and
worse, being hardcoded as the only option.

Regards,
Daniel
Dr. David Alan Gilbert Aug. 20, 2020, 5:55 p.m. UTC | #3
* Daniel P. Berrangé (berrange@redhat.com) wrote:
> On Thu, Aug 20, 2020 at 06:30:09PM +0100, Dr. David Alan Gilbert wrote:
> > * Chuan Zheng (zhengchuan@huawei.com) wrote:
> > > Record hash results for each sampled page.
> > > 
> > > Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
> > > Signed-off-by: YanYing Zhuang <ann.zhuangyanying@huawei.com>
> > > ---
> > >  migration/dirtyrate.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++
> > >  migration/dirtyrate.h |   7 +++
> > >  2 files changed, 151 insertions(+)
> > > 
> > > diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
> > > index c4304ef..62b6f69 100644
> > > --- a/migration/dirtyrate.c
> > > +++ b/migration/dirtyrate.c
> > > @@ -25,6 +25,7 @@
> > >  #include "dirtyrate.h"
> > >  
> > >  CalculatingDirtyRateState CalculatingState = CAL_DIRTY_RATE_INIT;
> > > +static unsigned long int qcrypto_hash_len = QCRYPTO_HASH_LEN;
> > 
> > Why do we need this static rather than just using the QCRYPTO_HASH_LEN ?
> > It's never going to change is it?
> > (and anyway it's just a MD5 len?)
> 
> I wouldn't want to bet on that given that this is use of MD5. We might
> claim this isn't security critical, but surprises happen, and we will
> certainly be dinged on security audits for introducing new use of MD5
> no matter what.
> 
> If a cryptographic hash is required, then sha256 should be the choice
> for any new code that doesn't have back compat requirements.
> 
> If a cryptographic hash is not required then how about crc32 

It doesn't need to be cryptographic; is crc32 the fastest reasonable hash for use
in large areas?

Dave

> IOW, it doesn't make a whole lot of sense to say we need a cryptographic
> hash, but then pick the most insecure one.
> 
> sha256 is slower than md5, but it is conceivable that in future we might
> gain support for something like Blake2b which is similar security level
> to SHA3, while being faster than MD5.
> 
> Overall I'm pretty unethusiastic about use of MD5 being introduced and
> worse, being hardcoded as the only option.
> 
> Regards,
> Daniel
> -- 
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
Zheng Chuan Aug. 21, 2020, 12:22 p.m. UTC | #4
On 2020/8/21 1:55, Dr. David Alan Gilbert wrote:
> * Daniel P. Berrangé (berrange@redhat.com) wrote:
>> On Thu, Aug 20, 2020 at 06:30:09PM +0100, Dr. David Alan Gilbert wrote:
>>> * Chuan Zheng (zhengchuan@huawei.com) wrote:
>>>> Record hash results for each sampled page.
>>>>
>>>> Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
>>>> Signed-off-by: YanYing Zhuang <ann.zhuangyanying@huawei.com>
>>>> ---
>>>>  migration/dirtyrate.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>  migration/dirtyrate.h |   7 +++
>>>>  2 files changed, 151 insertions(+)
>>>>
>>>> diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
>>>> index c4304ef..62b6f69 100644
>>>> --- a/migration/dirtyrate.c
>>>> +++ b/migration/dirtyrate.c
>>>> @@ -25,6 +25,7 @@
>>>>  #include "dirtyrate.h"
>>>>  
>>>>  CalculatingDirtyRateState CalculatingState = CAL_DIRTY_RATE_INIT;
>>>> +static unsigned long int qcrypto_hash_len = QCRYPTO_HASH_LEN;
>>>
>>> Why do we need this static rather than just using the QCRYPTO_HASH_LEN ?
>>> It's never going to change is it?
>>> (and anyway it's just a MD5 len?)
>>
>> I wouldn't want to bet on that given that this is use of MD5. We might
>> claim this isn't security critical, but surprises happen, and we will
>> certainly be dinged on security audits for introducing new use of MD5
>> no matter what.
>>
>> If a cryptographic hash is required, then sha256 should be the choice
>> for any new code that doesn't have back compat requirements.
>>
>> If a cryptographic hash is not required then how about crc32 
> 
> It doesn't need to be cryptographic; is crc32 the fastest reasonable hash for use
> in large areas?
> 
> Dave
> 
>> IOW, it doesn't make a whole lot of sense to say we need a cryptographic
>> hash, but then pick the most insecure one.
>>
>> sha256 is slower than md5, but it is conceivable that in future we might
>> gain support for something like Blake2b which is similar security level
>> to SHA3, while being faster than MD5.
>>
>> Overall I'm pretty unethusiastic about use of MD5 being introduced and
>> worse, being hardcoded as the only option.
>>
>> Regards,
>> Daniel
>> -- 
>> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
>> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
>> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|

Hi, Daniel, Dave.

I do compare MD5 and SHA256 with vm memory of 128G under mempress of 100G.

1. Calculation speed
1) MD5 takes about 500ms to sample and hash all pages by record_ramblock_hash_info().
2)  SHA256 takes about 750ms to sample all pages by record_ramblock_hash_info().

2. CPU Consumption
1)  MD5 may have instant rise up to 48% for dirtyrate thread
2)  SHA256 may have instant rise up to 75% for dirtyrate thread

3. Memory Consumption
SHA256 may need twice memory than MD5 due to its HASH_LEN.

I am trying to consider if crc32 is more faster and takes less memory and is more safer than MD5?
For now, i am trying to do how to implement crc32 instead of using qcrypto_hash_bytesv() to hash sampled page memory.
Daniel P. Berrangé Aug. 21, 2020, 12:30 p.m. UTC | #5
On Fri, Aug 21, 2020 at 08:22:06PM +0800, Zheng Chuan wrote:
> 
> 
> On 2020/8/21 1:55, Dr. David Alan Gilbert wrote:
> > * Daniel P. Berrangé (berrange@redhat.com) wrote:
> >> On Thu, Aug 20, 2020 at 06:30:09PM +0100, Dr. David Alan Gilbert wrote:
> >>> * Chuan Zheng (zhengchuan@huawei.com) wrote:
> >>>> Record hash results for each sampled page.
> >>>>
> >>>> Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
> >>>> Signed-off-by: YanYing Zhuang <ann.zhuangyanying@huawei.com>
> >>>> ---
> >>>>  migration/dirtyrate.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>>  migration/dirtyrate.h |   7 +++
> >>>>  2 files changed, 151 insertions(+)
> >>>>
> >>>> diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
> >>>> index c4304ef..62b6f69 100644
> >>>> --- a/migration/dirtyrate.c
> >>>> +++ b/migration/dirtyrate.c
> >>>> @@ -25,6 +25,7 @@
> >>>>  #include "dirtyrate.h"
> >>>>  
> >>>>  CalculatingDirtyRateState CalculatingState = CAL_DIRTY_RATE_INIT;
> >>>> +static unsigned long int qcrypto_hash_len = QCRYPTO_HASH_LEN;
> >>>
> >>> Why do we need this static rather than just using the QCRYPTO_HASH_LEN ?
> >>> It's never going to change is it?
> >>> (and anyway it's just a MD5 len?)
> >>
> >> I wouldn't want to bet on that given that this is use of MD5. We might
> >> claim this isn't security critical, but surprises happen, and we will
> >> certainly be dinged on security audits for introducing new use of MD5
> >> no matter what.
> >>
> >> If a cryptographic hash is required, then sha256 should be the choice
> >> for any new code that doesn't have back compat requirements.
> >>
> >> If a cryptographic hash is not required then how about crc32 
> > 
> > It doesn't need to be cryptographic; is crc32 the fastest reasonable hash for use
> > in large areas?
> > 
> > Dave
> > 
> >> IOW, it doesn't make a whole lot of sense to say we need a cryptographic
> >> hash, but then pick the most insecure one.
> >>
> >> sha256 is slower than md5, but it is conceivable that in future we might
> >> gain support for something like Blake2b which is similar security level
> >> to SHA3, while being faster than MD5.
> >>
> >> Overall I'm pretty unethusiastic about use of MD5 being introduced and
> >> worse, being hardcoded as the only option.
> >>
> >> Regards,
> >> Daniel
> >> -- 
> >> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> >> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> >> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
> 
> Hi, Daniel, Dave.
> 
> I do compare MD5 and SHA256 with vm memory of 128G under mempress of 100G.
> 
> 1. Calculation speed
> 1) MD5 takes about 500ms to sample and hash all pages by record_ramblock_hash_info().
> 2)  SHA256 takes about 750ms to sample all pages by record_ramblock_hash_info().
> 
> 2. CPU Consumption
> 1)  MD5 may have instant rise up to 48% for dirtyrate thread
> 2)  SHA256 may have instant rise up to 75% for dirtyrate thread
> 
> 3. Memory Consumption
> SHA256 may need twice memory than MD5 due to its HASH_LEN.
> 
> I am trying to consider if crc32 is more faster and takes less memory and is more safer than MD5?

No, crc32 is absolutely *weaker* than MD5. It is NOT a cryptographic
hash so does not try to guarantee collision resistance. It only has
2^32 possible outputs.

MD5 does try to guarantee collision resistance, but MD5 is considered
broken these days, so a malicious attacker can cause collisions if they
are motivated enough.

IOW if you need collision resistance that SHA256 should be used.


Regards,
Daniel
Dr. David Alan Gilbert Aug. 21, 2020, 12:39 p.m. UTC | #6
* Daniel P. Berrangé (berrange@redhat.com) wrote:
> On Fri, Aug 21, 2020 at 08:22:06PM +0800, Zheng Chuan wrote:
> > 
> > 
> > On 2020/8/21 1:55, Dr. David Alan Gilbert wrote:
> > > * Daniel P. Berrangé (berrange@redhat.com) wrote:
> > >> On Thu, Aug 20, 2020 at 06:30:09PM +0100, Dr. David Alan Gilbert wrote:
> > >>> * Chuan Zheng (zhengchuan@huawei.com) wrote:
> > >>>> Record hash results for each sampled page.
> > >>>>
> > >>>> Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
> > >>>> Signed-off-by: YanYing Zhuang <ann.zhuangyanying@huawei.com>
> > >>>> ---
> > >>>>  migration/dirtyrate.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++
> > >>>>  migration/dirtyrate.h |   7 +++
> > >>>>  2 files changed, 151 insertions(+)
> > >>>>
> > >>>> diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
> > >>>> index c4304ef..62b6f69 100644
> > >>>> --- a/migration/dirtyrate.c
> > >>>> +++ b/migration/dirtyrate.c
> > >>>> @@ -25,6 +25,7 @@
> > >>>>  #include "dirtyrate.h"
> > >>>>  
> > >>>>  CalculatingDirtyRateState CalculatingState = CAL_DIRTY_RATE_INIT;
> > >>>> +static unsigned long int qcrypto_hash_len = QCRYPTO_HASH_LEN;
> > >>>
> > >>> Why do we need this static rather than just using the QCRYPTO_HASH_LEN ?
> > >>> It's never going to change is it?
> > >>> (and anyway it's just a MD5 len?)
> > >>
> > >> I wouldn't want to bet on that given that this is use of MD5. We might
> > >> claim this isn't security critical, but surprises happen, and we will
> > >> certainly be dinged on security audits for introducing new use of MD5
> > >> no matter what.
> > >>
> > >> If a cryptographic hash is required, then sha256 should be the choice
> > >> for any new code that doesn't have back compat requirements.
> > >>
> > >> If a cryptographic hash is not required then how about crc32 
> > > 
> > > It doesn't need to be cryptographic; is crc32 the fastest reasonable hash for use
> > > in large areas?
> > > 
> > > Dave
> > > 
> > >> IOW, it doesn't make a whole lot of sense to say we need a cryptographic
> > >> hash, but then pick the most insecure one.
> > >>
> > >> sha256 is slower than md5, but it is conceivable that in future we might
> > >> gain support for something like Blake2b which is similar security level
> > >> to SHA3, while being faster than MD5.
> > >>
> > >> Overall I'm pretty unethusiastic about use of MD5 being introduced and
> > >> worse, being hardcoded as the only option.
> > >>
> > >> Regards,
> > >> Daniel
> > >> -- 
> > >> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> > >> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> > >> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
> > 
> > Hi, Daniel, Dave.
> > 
> > I do compare MD5 and SHA256 with vm memory of 128G under mempress of 100G.
> > 
> > 1. Calculation speed
> > 1) MD5 takes about 500ms to sample and hash all pages by record_ramblock_hash_info().
> > 2)  SHA256 takes about 750ms to sample all pages by record_ramblock_hash_info().
> > 
> > 2. CPU Consumption
> > 1)  MD5 may have instant rise up to 48% for dirtyrate thread
> > 2)  SHA256 may have instant rise up to 75% for dirtyrate thread
> > 
> > 3. Memory Consumption
> > SHA256 may need twice memory than MD5 due to its HASH_LEN.
> > 
> > I am trying to consider if crc32 is more faster and takes less memory and is more safer than MD5?
> 
> No, crc32 is absolutely *weaker* than MD5. It is NOT a cryptographic
> hash so does not try to guarantee collision resistance. It only has
> 2^32 possible outputs.
> 
> MD5 does try to guarantee collision resistance, but MD5 is considered
> broken these days, so a malicious attacker can cause collisions if they
> are motivated enough.
> 
> IOW if you need collision resistance that SHA256 should be used.

There's no need to guard against malicious behaviour here - this is just
a stat to guide migration.
If CRC32 is likely to be faster than md5 I suspect it's enough.

Dave

> 
> Regards,
> Daniel
> -- 
> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
Zheng Chuan Aug. 21, 2020, 1:07 p.m. UTC | #7
On 2020/8/21 20:39, Dr. David Alan Gilbert wrote:
> * Daniel P. Berrangé (berrange@redhat.com) wrote:
>> On Fri, Aug 21, 2020 at 08:22:06PM +0800, Zheng Chuan wrote:
>>>
>>>
>>> On 2020/8/21 1:55, Dr. David Alan Gilbert wrote:
>>>> * Daniel P. Berrangé (berrange@redhat.com) wrote:
>>>>> On Thu, Aug 20, 2020 at 06:30:09PM +0100, Dr. David Alan Gilbert wrote:
>>>>>> * Chuan Zheng (zhengchuan@huawei.com) wrote:
>>>>>>> Record hash results for each sampled page.
>>>>>>>
>>>>>>> Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
>>>>>>> Signed-off-by: YanYing Zhuang <ann.zhuangyanying@huawei.com>
>>>>>>> ---
>>>>>>>  migration/dirtyrate.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>>  migration/dirtyrate.h |   7 +++
>>>>>>>  2 files changed, 151 insertions(+)
>>>>>>>
>>>>>>> diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
>>>>>>> index c4304ef..62b6f69 100644
>>>>>>> --- a/migration/dirtyrate.c
>>>>>>> +++ b/migration/dirtyrate.c
>>>>>>> @@ -25,6 +25,7 @@
>>>>>>>  #include "dirtyrate.h"
>>>>>>>  
>>>>>>>  CalculatingDirtyRateState CalculatingState = CAL_DIRTY_RATE_INIT;
>>>>>>> +static unsigned long int qcrypto_hash_len = QCRYPTO_HASH_LEN;
>>>>>>
>>>>>> Why do we need this static rather than just using the QCRYPTO_HASH_LEN ?
>>>>>> It's never going to change is it?
>>>>>> (and anyway it's just a MD5 len?)
>>>>>
>>>>> I wouldn't want to bet on that given that this is use of MD5. We might
>>>>> claim this isn't security critical, but surprises happen, and we will
>>>>> certainly be dinged on security audits for introducing new use of MD5
>>>>> no matter what.
>>>>>
>>>>> If a cryptographic hash is required, then sha256 should be the choice
>>>>> for any new code that doesn't have back compat requirements.
>>>>>
>>>>> If a cryptographic hash is not required then how about crc32 
>>>>
>>>> It doesn't need to be cryptographic; is crc32 the fastest reasonable hash for use
>>>> in large areas?
>>>>
>>>> Dave
>>>>
>>>>> IOW, it doesn't make a whole lot of sense to say we need a cryptographic
>>>>> hash, but then pick the most insecure one.
>>>>>
>>>>> sha256 is slower than md5, but it is conceivable that in future we might
>>>>> gain support for something like Blake2b which is similar security level
>>>>> to SHA3, while being faster than MD5.
>>>>>
>>>>> Overall I'm pretty unethusiastic about use of MD5 being introduced and
>>>>> worse, being hardcoded as the only option.
>>>>>
>>>>> Regards,
>>>>> Daniel
>>>>> -- 
>>>>> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
>>>>> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
>>>>> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
>>>
>>> Hi, Daniel, Dave.
>>>
>>> I do compare MD5 and SHA256 with vm memory of 128G under mempress of 100G.
>>>
>>> 1. Calculation speed
>>> 1) MD5 takes about 500ms to sample and hash all pages by record_ramblock_hash_info().
>>> 2)  SHA256 takes about 750ms to sample all pages by record_ramblock_hash_info().
>>>
>>> 2. CPU Consumption
>>> 1)  MD5 may have instant rise up to 48% for dirtyrate thread
>>> 2)  SHA256 may have instant rise up to 75% for dirtyrate thread
>>>
>>> 3. Memory Consumption
>>> SHA256 may need twice memory than MD5 due to its HASH_LEN.
>>>
>>> I am trying to consider if crc32 is more faster and takes less memory and is more safer than MD5?
>>
>> No, crc32 is absolutely *weaker* than MD5. It is NOT a cryptographic
>> hash so does not try to guarantee collision resistance. It only has
>> 2^32 possible outputs.
>>
>> MD5 does try to guarantee collision resistance, but MD5 is considered
>> broken these days, so a malicious attacker can cause collisions if they
>> are motivated enough.
>>
>> IOW if you need collision resistance that SHA256 should be used.
> 
> There's no need to guard against malicious behaviour here - this is just
> a stat to guide migration.
> If CRC32 is likely to be faster than md5 I suspect it's enough.
> 
> Dave
> 
OK, i'll take a test by crc32.

>>
>> Regards,
>> Daniel
>> -- 
>> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
>> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
>> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
Zheng Chuan Aug. 22, 2020, 6:25 a.m. UTC | #8
On 2020/8/21 20:39, Dr. David Alan Gilbert wrote:
> * Daniel P. Berrangé (berrange@redhat.com) wrote:
>> On Fri, Aug 21, 2020 at 08:22:06PM +0800, Zheng Chuan wrote:
>>>
>>>
>>> On 2020/8/21 1:55, Dr. David Alan Gilbert wrote:
>>>> * Daniel P. Berrangé (berrange@redhat.com) wrote:
>>>>> On Thu, Aug 20, 2020 at 06:30:09PM +0100, Dr. David Alan Gilbert wrote:
>>>>>> * Chuan Zheng (zhengchuan@huawei.com) wrote:
>>>>>>> Record hash results for each sampled page.
>>>>>>>
>>>>>>> Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
>>>>>>> Signed-off-by: YanYing Zhuang <ann.zhuangyanying@huawei.com>
>>>>>>> ---
>>>>>>>  migration/dirtyrate.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>>  migration/dirtyrate.h |   7 +++
>>>>>>>  2 files changed, 151 insertions(+)
>>>>>>>
>>>>>>> diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
>>>>>>> index c4304ef..62b6f69 100644
>>>>>>> --- a/migration/dirtyrate.c
>>>>>>> +++ b/migration/dirtyrate.c
>>>>>>> @@ -25,6 +25,7 @@
>>>>>>>  #include "dirtyrate.h"
>>>>>>>  
>>>>>>>  CalculatingDirtyRateState CalculatingState = CAL_DIRTY_RATE_INIT;
>>>>>>> +static unsigned long int qcrypto_hash_len = QCRYPTO_HASH_LEN;
>>>>>>
>>>>>> Why do we need this static rather than just using the QCRYPTO_HASH_LEN ?
>>>>>> It's never going to change is it?
>>>>>> (and anyway it's just a MD5 len?)
>>>>>
>>>>> I wouldn't want to bet on that given that this is use of MD5. We might
>>>>> claim this isn't security critical, but surprises happen, and we will
>>>>> certainly be dinged on security audits for introducing new use of MD5
>>>>> no matter what.
>>>>>
>>>>> If a cryptographic hash is required, then sha256 should be the choice
>>>>> for any new code that doesn't have back compat requirements.
>>>>>
>>>>> If a cryptographic hash is not required then how about crc32 
>>>>
>>>> It doesn't need to be cryptographic; is crc32 the fastest reasonable hash for use
>>>> in large areas?
>>>>
>>>> Dave
>>>>
>>>>> IOW, it doesn't make a whole lot of sense to say we need a cryptographic
>>>>> hash, but then pick the most insecure one.
>>>>>
>>>>> sha256 is slower than md5, but it is conceivable that in future we might
>>>>> gain support for something like Blake2b which is similar security level
>>>>> to SHA3, while being faster than MD5.
>>>>>
>>>>> Overall I'm pretty unethusiastic about use of MD5 being introduced and
>>>>> worse, being hardcoded as the only option.
>>>>>
>>>>> Regards,
>>>>> Daniel
>>>>> -- 
>>>>> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
>>>>> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
>>>>> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
>>>
>>> Hi, Daniel, Dave.
>>>
>>> I do compare MD5 and SHA256 with vm memory of 128G under mempress of 100G.
>>>
>>> 1. Calculation speed
>>> 1) MD5 takes about 500ms to sample and hash all pages by record_ramblock_hash_info().
>>> 2)  SHA256 takes about 750ms to sample all pages by record_ramblock_hash_info().
>>>
>>> 2. CPU Consumption
>>> 1)  MD5 may have instant rise up to 48% for dirtyrate thread
>>> 2)  SHA256 may have instant rise up to 75% for dirtyrate thread
>>>
>>> 3. Memory Consumption
>>> SHA256 may need twice memory than MD5 due to its HASH_LEN.
>>>
>>> I am trying to consider if crc32 is more faster and takes less memory and is more safer than MD5?
>>
>> No, crc32 is absolutely *weaker* than MD5. It is NOT a cryptographic
>> hash so does not try to guarantee collision resistance. It only has
>> 2^32 possible outputs.
>>
>> MD5 does try to guarantee collision resistance, but MD5 is considered
>> broken these days, so a malicious attacker can cause collisions if they
>> are motivated enough.
>>
>> IOW if you need collision resistance that SHA256 should be used.
> 
> There's no need to guard against malicious behaviour here - this is just
> a stat to guide migration.
> If CRC32 is likely to be faster than md5 I suspect it's enough.
> 
> Dave
> 
Hi,Dave, Daniel.

I did test by crc32,it is much faster than MD5 and SHA256:)

As for 128G vm it takes only about 50ms to sample and hash all pages by record_ramblock_hash_info().
And the dirtyrate calculation is still good enough:)
++++++++++++++++++++++++++++++++++++++++++
|                      |    dirtyrate    |
++++++++++++++++++++++++++++++++++++++++++
| no mempress          |     4MB/s       |
------------------------------------------
| mempress 4096 1024   |    1248MB/s     |
++++++++++++++++++++++++++++++++++++++++++
| mempress 4096 4096   |    4060MB/s     |
++++++++++++++++++++++++++++++++++++++++++

I will take crc32 in PatchV4, is that OK from the perspective of safety?

In my opinion, it should be safe.
The crc32 is only for compare and the recorder will be free after calculation is over.
The output is just dirtyrate for user to guide migration.

What's more, i consider increase DIRTYRATE_DEFAULT_SAMPLE_PAGES from 256 to 512
which may takes about 75ms to sample and hash all pages by record_ramblock_hash_info().

>>
>> Regards,
>> Daniel
>> -- 
>> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
>> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
>> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
Dr. David Alan Gilbert Aug. 24, 2020, 8:57 a.m. UTC | #9
* Zheng Chuan (zhengchuan@huawei.com) wrote:
> 
> 
> On 2020/8/21 20:39, Dr. David Alan Gilbert wrote:
> > * Daniel P. Berrangé (berrange@redhat.com) wrote:
> >> On Fri, Aug 21, 2020 at 08:22:06PM +0800, Zheng Chuan wrote:
> >>>
> >>>
> >>> On 2020/8/21 1:55, Dr. David Alan Gilbert wrote:
> >>>> * Daniel P. Berrangé (berrange@redhat.com) wrote:
> >>>>> On Thu, Aug 20, 2020 at 06:30:09PM +0100, Dr. David Alan Gilbert wrote:
> >>>>>> * Chuan Zheng (zhengchuan@huawei.com) wrote:
> >>>>>>> Record hash results for each sampled page.
> >>>>>>>
> >>>>>>> Signed-off-by: Chuan Zheng <zhengchuan@huawei.com>
> >>>>>>> Signed-off-by: YanYing Zhuang <ann.zhuangyanying@huawei.com>
> >>>>>>> ---
> >>>>>>>  migration/dirtyrate.c | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >>>>>>>  migration/dirtyrate.h |   7 +++
> >>>>>>>  2 files changed, 151 insertions(+)
> >>>>>>>
> >>>>>>> diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
> >>>>>>> index c4304ef..62b6f69 100644
> >>>>>>> --- a/migration/dirtyrate.c
> >>>>>>> +++ b/migration/dirtyrate.c
> >>>>>>> @@ -25,6 +25,7 @@
> >>>>>>>  #include "dirtyrate.h"
> >>>>>>>  
> >>>>>>>  CalculatingDirtyRateState CalculatingState = CAL_DIRTY_RATE_INIT;
> >>>>>>> +static unsigned long int qcrypto_hash_len = QCRYPTO_HASH_LEN;
> >>>>>>
> >>>>>> Why do we need this static rather than just using the QCRYPTO_HASH_LEN ?
> >>>>>> It's never going to change is it?
> >>>>>> (and anyway it's just a MD5 len?)
> >>>>>
> >>>>> I wouldn't want to bet on that given that this is use of MD5. We might
> >>>>> claim this isn't security critical, but surprises happen, and we will
> >>>>> certainly be dinged on security audits for introducing new use of MD5
> >>>>> no matter what.
> >>>>>
> >>>>> If a cryptographic hash is required, then sha256 should be the choice
> >>>>> for any new code that doesn't have back compat requirements.
> >>>>>
> >>>>> If a cryptographic hash is not required then how about crc32 
> >>>>
> >>>> It doesn't need to be cryptographic; is crc32 the fastest reasonable hash for use
> >>>> in large areas?
> >>>>
> >>>> Dave
> >>>>
> >>>>> IOW, it doesn't make a whole lot of sense to say we need a cryptographic
> >>>>> hash, but then pick the most insecure one.
> >>>>>
> >>>>> sha256 is slower than md5, but it is conceivable that in future we might
> >>>>> gain support for something like Blake2b which is similar security level
> >>>>> to SHA3, while being faster than MD5.
> >>>>>
> >>>>> Overall I'm pretty unethusiastic about use of MD5 being introduced and
> >>>>> worse, being hardcoded as the only option.
> >>>>>
> >>>>> Regards,
> >>>>> Daniel
> >>>>> -- 
> >>>>> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> >>>>> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> >>>>> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
> >>>
> >>> Hi, Daniel, Dave.
> >>>
> >>> I do compare MD5 and SHA256 with vm memory of 128G under mempress of 100G.
> >>>
> >>> 1. Calculation speed
> >>> 1) MD5 takes about 500ms to sample and hash all pages by record_ramblock_hash_info().
> >>> 2)  SHA256 takes about 750ms to sample all pages by record_ramblock_hash_info().
> >>>
> >>> 2. CPU Consumption
> >>> 1)  MD5 may have instant rise up to 48% for dirtyrate thread
> >>> 2)  SHA256 may have instant rise up to 75% for dirtyrate thread
> >>>
> >>> 3. Memory Consumption
> >>> SHA256 may need twice memory than MD5 due to its HASH_LEN.
> >>>
> >>> I am trying to consider if crc32 is more faster and takes less memory and is more safer than MD5?
> >>
> >> No, crc32 is absolutely *weaker* than MD5. It is NOT a cryptographic
> >> hash so does not try to guarantee collision resistance. It only has
> >> 2^32 possible outputs.
> >>
> >> MD5 does try to guarantee collision resistance, but MD5 is considered
> >> broken these days, so a malicious attacker can cause collisions if they
> >> are motivated enough.
> >>
> >> IOW if you need collision resistance that SHA256 should be used.
> > 
> > There's no need to guard against malicious behaviour here - this is just
> > a stat to guide migration.
> > If CRC32 is likely to be faster than md5 I suspect it's enough.
> > 
> > Dave
> > 
> Hi,Dave, Daniel.
> 
> I did test by crc32,it is much faster than MD5 and SHA256:)
> 
> As for 128G vm it takes only about 50ms to sample and hash all pages by record_ramblock_hash_info().
> And the dirtyrate calculation is still good enough:)
> ++++++++++++++++++++++++++++++++++++++++++
> |                      |    dirtyrate    |
> ++++++++++++++++++++++++++++++++++++++++++
> | no mempress          |     4MB/s       |
> ------------------------------------------
> | mempress 4096 1024   |    1248MB/s     |
> ++++++++++++++++++++++++++++++++++++++++++
> | mempress 4096 4096   |    4060MB/s     |
> ++++++++++++++++++++++++++++++++++++++++++
> 
> I will take crc32 in PatchV4, is that OK from the perspective of safety?

Yes, it's fine since it's only a heuristic anyway.

Dave

> In my opinion, it should be safe.
> The crc32 is only for compare and the recorder will be free after calculation is over.
> The output is just dirtyrate for user to guide migration.
> 
> What's more, i consider increase DIRTYRATE_DEFAULT_SAMPLE_PAGES from 256 to 512
> which may takes about 75ms to sample and hash all pages by record_ramblock_hash_info().
> 
> >>
> >> Regards,
> >> Daniel
> >> -- 
> >> |: https://berrange.com      -o-    https://www.flickr.com/photos/dberrange :|
> >> |: https://libvirt.org         -o-            https://fstop138.berrange.com :|
> >> |: https://entangle-photo.org    -o-    https://www.instagram.com/dberrange :|
>
diff mbox series

Patch

diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
index c4304ef..62b6f69 100644
--- a/migration/dirtyrate.c
+++ b/migration/dirtyrate.c
@@ -25,6 +25,7 @@ 
 #include "dirtyrate.h"
 
 CalculatingDirtyRateState CalculatingState = CAL_DIRTY_RATE_INIT;
+static unsigned long int qcrypto_hash_len = QCRYPTO_HASH_LEN;
 static struct DirtyRateStat dirty_stat;
 
 static int dirty_rate_set_state(int new_state)
@@ -71,6 +72,149 @@  static void update_dirtyrate(uint64_t msec)
     dirty_stat.dirty_rate = dirty_rate;
 }
 
+/*
+ * get hash result for the sampled memory with length of 4K byte in ramblock,
+ * which starts from ramblock base address.
+ */
+static int get_ramblock_vfn_hash(struct RamblockDirtyInfo *info,
+                                 unsigned long vfn, uint8_t **md)
+{
+    struct iovec iov_array;
+    int ret = 0;
+    int nkey = 1;
+    size_t hash_len = qcrypto_hash_len;
+
+    iov_array.iov_base = info->ramblock_addr +
+                         vfn * DIRTYRATE_SAMPLE_PAGE_SIZE;
+    iov_array.iov_len = DIRTYRATE_SAMPLE_PAGE_SIZE;
+
+    if (qcrypto_hash_bytesv(QCRYPTO_HASH_ALG_MD5,
+                            &iov_array, nkey,
+                            md, &hash_len, NULL) < 0) {
+        ret = -1;
+    }
+
+    return ret;
+}
+
+static int save_ramblock_hash(struct RamblockDirtyInfo *info)
+{
+    unsigned int sample_pages_count;
+    uint8_t *md = NULL;
+    int i;
+    int ret = -1;
+    GRand *rand = g_rand_new();
+
+    sample_pages_count = info->sample_pages_count;
+
+    /* ramblock size less than one page, return success to skip this ramblock */
+    if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) {
+        ret = 0;
+        goto out;
+    }
+
+    info->hash_result = g_try_malloc0_n(sample_pages_count,
+                                        sizeof(uint8_t) * qcrypto_hash_len);
+    if (!info->hash_result) {
+        ret = -1;
+        goto out;
+    }
+
+    info->sample_page_vfn = g_try_malloc0_n(sample_pages_count,
+                                            sizeof(unsigned long));
+    if (!info->sample_page_vfn) {
+        g_free(info->hash_result);
+        ret = -1;
+        goto out;
+    }
+
+    for (i = 0; i < sample_pages_count; i++) {
+        md = info->hash_result + i * qcrypto_hash_len;
+        info->sample_page_vfn[i] = g_rand_int_range(rand, 0,
+                                                    info->ramblock_pages - 1);
+        ret = get_ramblock_vfn_hash(info, info->sample_page_vfn[i], &md);
+        if (ret < 0) {
+            goto out;
+        }
+    }
+    ret = 0;
+
+out:
+    g_rand_free(rand);
+    return ret;
+}
+
+static void get_ramblock_dirty_info(RAMBlock *block,
+                                    struct RamblockDirtyInfo *info,
+                                    struct DirtyRateConfig *config)
+{
+    uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes;
+
+    /* Right shift 30 bits to calc block size in GB */
+    info->sample_pages_count = (qemu_ram_get_used_length(block)
+                                * sample_pages_per_gigabytes) >> 30;
+
+    /* Right shift 12 bits to calc page count in 4KB */
+    info->ramblock_pages = qemu_ram_get_used_length(block) >> 12;
+    info->ramblock_addr = qemu_ram_get_host_addr(block);
+    strcpy(info->idstr, qemu_ram_get_idstr(block));
+}
+
+static struct RamblockDirtyInfo *
+alloc_ramblock_dirty_info(int *block_index,
+                          struct RamblockDirtyInfo *block_dinfo)
+{
+    struct RamblockDirtyInfo *info = NULL;
+    int index = *block_index;
+
+    if (!block_dinfo) {
+        block_dinfo = g_try_new(struct RamblockDirtyInfo, 1);
+        index = 0;
+    } else {
+        index++;
+        block_dinfo = g_try_realloc(block_dinfo, (index + 1) *
+                                    sizeof(struct RamblockDirtyInfo));
+    }
+    if (!block_dinfo) {
+        return NULL;
+    }
+
+    info = &block_dinfo[index];
+    memset(info, 0, sizeof(struct RamblockDirtyInfo));
+
+    *block_index = index;
+    return block_dinfo;
+}
+
+static int record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo,
+                                     struct DirtyRateConfig config,
+                                     int *block_index)
+{
+    struct RamblockDirtyInfo *info = NULL;
+    struct RamblockDirtyInfo *dinfo = NULL;
+    RAMBlock *block = NULL;
+    int index = 0;
+
+    RAMBLOCK_FOREACH_MIGRATABLE(block) {
+        dinfo = alloc_ramblock_dirty_info(&index, dinfo);
+        if (dinfo == NULL) {
+            return -1;
+        }
+        info = &dinfo[index];
+        get_ramblock_dirty_info(block, info, &config);
+        if (save_ramblock_hash(info) < 0) {
+            *block_dinfo = dinfo;
+            *block_index = index;
+            return -1;
+        }
+    }
+
+    *block_dinfo = dinfo;
+    *block_index = index;
+
+    return 0;
+}
+
 static void calculate_dirtyrate(struct DirtyRateConfig config)
 {
     /* todo */
diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h
index af57c80..0812b16 100644
--- a/migration/dirtyrate.h
+++ b/migration/dirtyrate.h
@@ -20,10 +20,17 @@ 
 #define DIRTYRATE_DEFAULT_SAMPLE_PAGES            256
 
 /*
+ * Sample page size 4K as default.
+ */
+#define DIRTYRATE_SAMPLE_PAGE_SIZE                4096
+
+/*
  * Record ramblock idstr
  */
 #define RAMBLOCK_INFO_MAX_LEN                     256
 
+#define QCRYPTO_HASH_LEN                          16
+
 /* Take 1s as default for calculation duration */
 #define DEFAULT_FETCH_DIRTYRATE_TIME_SEC          1