diff mbox

[v2,3/8] migration: show the statistics of compression

Message ID 20180719121520.30026-4-xiaoguangrong@tencent.com (mailing list archive)
State New, archived
Headers show

Commit Message

Xiao Guangrong July 19, 2018, 12:15 p.m. UTC
From: Xiao Guangrong <xiaoguangrong@tencent.com>

Currently, it includes:
pages: amount of pages compressed and transferred to the target VM
busy: amount of count that no free thread to compress data
busy-rate: rate of thread busy
reduced-size: amount of bytes reduced by compression
compression-rate: rate of compressed size

Signed-off-by: Xiao Guangrong <xiaoguangrong@tencent.com>
---
 hmp.c                 | 13 +++++++++++++
 migration/migration.c | 11 +++++++++++
 migration/ram.c       | 37 +++++++++++++++++++++++++++++++++++++
 migration/ram.h       |  1 +
 qapi/migration.json   | 26 +++++++++++++++++++++++++-
 5 files changed, 87 insertions(+), 1 deletion(-)

Comments

Peter Xu July 23, 2018, 4:36 a.m. UTC | #1
On Thu, Jul 19, 2018 at 08:15:15PM +0800, guangrong.xiao@gmail.com wrote:
> @@ -1597,6 +1608,24 @@ static void migration_update_rates(RAMState *rs, int64_t end_time)
>              rs->xbzrle_cache_miss_prev) / iter_count;
>          rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
>      }
> +
> +    if (migrate_use_compression()) {
> +        uint64_t comp_pages;
> +
> +        compression_counters.busy_rate = (double)(compression_counters.busy -
> +            rs->compress_thread_busy_prev) / iter_count;

Here I'm not sure it's correct...

"iter_count" stands for ramstate.iterations.  It's increased per
ram_find_and_save_block(), so IMHO it might contain multiple guest
pages.  However compression_counters.busy should be per guest page.

> +        rs->compress_thread_busy_prev = compression_counters.busy;
> +
> +        comp_pages = compression_counters.pages - rs->compress_pages_prev;
> +        if (comp_pages) {
> +            compression_counters.compression_rate =
> +                (double)(compression_counters.reduced_size -
> +                rs->compress_reduced_size_prev) /
> +                (comp_pages * TARGET_PAGE_SIZE);
> +            rs->compress_pages_prev = compression_counters.pages;
> +            rs->compress_reduced_size_prev = compression_counters.reduced_size;
> +        }
> +    }
>  }
>  
>  static void migration_bitmap_sync(RAMState *rs)
> @@ -1872,6 +1901,9 @@ static void flush_compressed_data(RAMState *rs)
>          qemu_mutex_lock(&comp_param[idx].mutex);
>          if (!comp_param[idx].quit) {
>              len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
> +            /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
> +            compression_counters.reduced_size += TARGET_PAGE_SIZE - len + 8;

I would agree with Dave here - why we store the "reduced size" instead
of the size of the compressed data (which I think should be len - 8)?

Meanwhile, would a helper be nicer? Like:

        void migrate_compressed_page_stats_update(int xfer_size)
        {
                /* Removing the offset header in save_page_header() */
                compression_counters.compressed_size = xfer_size - 8;
                compression_counters.pages++;
                ram_counters.transferred += bytes_xmit;
        }

> +            compression_counters.pages++;
>              ram_counters.transferred += len;
>          }
>          qemu_mutex_unlock(&comp_param[idx].mutex);
> @@ -1903,6 +1935,10 @@ retry:
>              qemu_cond_signal(&comp_param[idx].cond);
>              qemu_mutex_unlock(&comp_param[idx].mutex);
>              pages = 1;
> +            /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
> +            compression_counters.reduced_size += TARGET_PAGE_SIZE -
> +                                                 bytes_xmit + 8;
> +            compression_counters.pages++;
>              ram_counters.transferred += bytes_xmit;
>              break;
>          }

Regards,
Xiao Guangrong July 23, 2018, 7:39 a.m. UTC | #2
On 07/23/2018 12:36 PM, Peter Xu wrote:
> On Thu, Jul 19, 2018 at 08:15:15PM +0800, guangrong.xiao@gmail.com wrote:
>> @@ -1597,6 +1608,24 @@ static void migration_update_rates(RAMState *rs, int64_t end_time)
>>               rs->xbzrle_cache_miss_prev) / iter_count;
>>           rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
>>       }
>> +
>> +    if (migrate_use_compression()) {
>> +        uint64_t comp_pages;
>> +
>> +        compression_counters.busy_rate = (double)(compression_counters.busy -
>> +            rs->compress_thread_busy_prev) / iter_count;
> 
> Here I'm not sure it's correct...
> 
> "iter_count" stands for ramstate.iterations.  It's increased per
> ram_find_and_save_block(), so IMHO it might contain multiple guest

ram_find_and_save_block() returns if a page is successfully posted and
it only posts 1 page out at one time.

> pages.  However compression_counters.busy should be per guest page.
> 

Actually, it's derived from xbzrle_counters.cache_miss_rate:
         xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
             rs->xbzrle_cache_miss_prev) / iter_count;

>> +        rs->compress_thread_busy_prev = compression_counters.busy;
>> +
>> +        comp_pages = compression_counters.pages - rs->compress_pages_prev;
>> +        if (comp_pages) {
>> +            compression_counters.compression_rate =
>> +                (double)(compression_counters.reduced_size -
>> +                rs->compress_reduced_size_prev) /
>> +                (comp_pages * TARGET_PAGE_SIZE);
>> +            rs->compress_pages_prev = compression_counters.pages;
>> +            rs->compress_reduced_size_prev = compression_counters.reduced_size;
>> +        }
>> +    }
>>   }
>>   
>>   static void migration_bitmap_sync(RAMState *rs)
>> @@ -1872,6 +1901,9 @@ static void flush_compressed_data(RAMState *rs)
>>           qemu_mutex_lock(&comp_param[idx].mutex);
>>           if (!comp_param[idx].quit) {
>>               len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
>> +            /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
>> +            compression_counters.reduced_size += TARGET_PAGE_SIZE - len + 8;
> 
> I would agree with Dave here - why we store the "reduced size" instead
> of the size of the compressed data (which I think should be len - 8)?
> 

len-8 is the size of data after compressed rather than the data improved
by compression that is not straightforward for the user to see how much
the improvement is by applying compression.

Hmm... but it is not a big deal to me... :)

> Meanwhile, would a helper be nicer? Like:

Yup, that's nicer indeed.
Peter Xu July 23, 2018, 8:05 a.m. UTC | #3
On Mon, Jul 23, 2018 at 03:39:18PM +0800, Xiao Guangrong wrote:
> 
> 
> On 07/23/2018 12:36 PM, Peter Xu wrote:
> > On Thu, Jul 19, 2018 at 08:15:15PM +0800, guangrong.xiao@gmail.com wrote:
> > > @@ -1597,6 +1608,24 @@ static void migration_update_rates(RAMState *rs, int64_t end_time)
> > >               rs->xbzrle_cache_miss_prev) / iter_count;
> > >           rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
> > >       }
> > > +
> > > +    if (migrate_use_compression()) {
> > > +        uint64_t comp_pages;
> > > +
> > > +        compression_counters.busy_rate = (double)(compression_counters.busy -
> > > +            rs->compress_thread_busy_prev) / iter_count;
> > 
> > Here I'm not sure it's correct...
> > 
> > "iter_count" stands for ramstate.iterations.  It's increased per
> > ram_find_and_save_block(), so IMHO it might contain multiple guest
> 
> ram_find_and_save_block() returns if a page is successfully posted and
> it only posts 1 page out at one time.

ram_find_and_save_block() calls ram_save_host_page(), and we should be
sending multiple guest pages in ram_save_host_page() if the host page
is a huge page?

> 
> > pages.  However compression_counters.busy should be per guest page.
> > 
> 
> Actually, it's derived from xbzrle_counters.cache_miss_rate:
>         xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
>             rs->xbzrle_cache_miss_prev) / iter_count;

Then this is suspecious to me too...

> 
> > > +        rs->compress_thread_busy_prev = compression_counters.busy;
> > > +
> > > +        comp_pages = compression_counters.pages - rs->compress_pages_prev;
> > > +        if (comp_pages) {
> > > +            compression_counters.compression_rate =
> > > +                (double)(compression_counters.reduced_size -
> > > +                rs->compress_reduced_size_prev) /
> > > +                (comp_pages * TARGET_PAGE_SIZE);
> > > +            rs->compress_pages_prev = compression_counters.pages;
> > > +            rs->compress_reduced_size_prev = compression_counters.reduced_size;
> > > +        }
> > > +    }
> > >   }
> > >   static void migration_bitmap_sync(RAMState *rs)
> > > @@ -1872,6 +1901,9 @@ static void flush_compressed_data(RAMState *rs)
> > >           qemu_mutex_lock(&comp_param[idx].mutex);
> > >           if (!comp_param[idx].quit) {
> > >               len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
> > > +            /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
> > > +            compression_counters.reduced_size += TARGET_PAGE_SIZE - len + 8;
> > 
> > I would agree with Dave here - why we store the "reduced size" instead
> > of the size of the compressed data (which I think should be len - 8)?
> > 
> 
> len-8 is the size of data after compressed rather than the data improved
> by compression that is not straightforward for the user to see how much
> the improvement is by applying compression.
> 
> Hmm... but it is not a big deal to me... :)

Yeah it might be a personal preference indeed. :)

It's just natural to do that this way for me since AFAIU the
compression ratio is defined as:

                           compressed data size
  compression ratio =    ------------------------
                            original data size

> 
> > Meanwhile, would a helper be nicer? Like:
> 
> Yup, that's nicer indeed.

Regards,
Xiao Guangrong July 23, 2018, 8:40 a.m. UTC | #4
On 07/23/2018 04:05 PM, Peter Xu wrote:
> On Mon, Jul 23, 2018 at 03:39:18PM +0800, Xiao Guangrong wrote:
>>
>>
>> On 07/23/2018 12:36 PM, Peter Xu wrote:
>>> On Thu, Jul 19, 2018 at 08:15:15PM +0800, guangrong.xiao@gmail.com wrote:
>>>> @@ -1597,6 +1608,24 @@ static void migration_update_rates(RAMState *rs, int64_t end_time)
>>>>                rs->xbzrle_cache_miss_prev) / iter_count;
>>>>            rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
>>>>        }
>>>> +
>>>> +    if (migrate_use_compression()) {
>>>> +        uint64_t comp_pages;
>>>> +
>>>> +        compression_counters.busy_rate = (double)(compression_counters.busy -
>>>> +            rs->compress_thread_busy_prev) / iter_count;
>>>
>>> Here I'm not sure it's correct...
>>>
>>> "iter_count" stands for ramstate.iterations.  It's increased per
>>> ram_find_and_save_block(), so IMHO it might contain multiple guest
>>
>> ram_find_and_save_block() returns if a page is successfully posted and
>> it only posts 1 page out at one time.
> 
> ram_find_and_save_block() calls ram_save_host_page(), and we should be
> sending multiple guest pages in ram_save_host_page() if the host page
> is a huge page?
> 

You're right, thank you for pointing it out.

So, how about introduce a filed, posted_pages, into RAMState that is used
to track total pages posted out.

Then will use this filed to replace 'iter_count' for compression and use
'RAMState.posted_pages - ram_counters.duplicate' to calculate
xbzrle_cache_miss as the zero page is not handled by xbzrle.

Or introduce a new function, total_posted_pages, which returns the
sum of all page counts:

    static total_posted_pages(void)
    {
        return ram_counters.normal + ram_counters.duplicate + compression_counters.pages
               +  xbzrle_counters.pages;
    }

that would be a bit more complexity...

>>
>>> pages.  However compression_counters.busy should be per guest page.
>>>
>>
>> Actually, it's derived from xbzrle_counters.cache_miss_rate:
>>          xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
>>              rs->xbzrle_cache_miss_prev) / iter_count;
> 
> Then this is suspecious to me too...
> 
>>
>>>> +        rs->compress_thread_busy_prev = compression_counters.busy;
>>>> +
>>>> +        comp_pages = compression_counters.pages - rs->compress_pages_prev;
>>>> +        if (comp_pages) {
>>>> +            compression_counters.compression_rate =
>>>> +                (double)(compression_counters.reduced_size -
>>>> +                rs->compress_reduced_size_prev) /
>>>> +                (comp_pages * TARGET_PAGE_SIZE);
>>>> +            rs->compress_pages_prev = compression_counters.pages;
>>>> +            rs->compress_reduced_size_prev = compression_counters.reduced_size;
>>>> +        }
>>>> +    }
>>>>    }
>>>>    static void migration_bitmap_sync(RAMState *rs)
>>>> @@ -1872,6 +1901,9 @@ static void flush_compressed_data(RAMState *rs)
>>>>            qemu_mutex_lock(&comp_param[idx].mutex);
>>>>            if (!comp_param[idx].quit) {
>>>>                len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
>>>> +            /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
>>>> +            compression_counters.reduced_size += TARGET_PAGE_SIZE - len + 8;
>>>
>>> I would agree with Dave here - why we store the "reduced size" instead
>>> of the size of the compressed data (which I think should be len - 8)?
>>>
>>
>> len-8 is the size of data after compressed rather than the data improved
>> by compression that is not straightforward for the user to see how much
>> the improvement is by applying compression.
>>
>> Hmm... but it is not a big deal to me... :)
> 
> Yeah it might be a personal preference indeed. :)
> 
> It's just natural to do that this way for me since AFAIU the
> compression ratio is defined as:
> 
>                             compressed data size
>    compression ratio =    ------------------------
>                              original data size
> 

Er, we do it as following:
             compression_counters.compression_rate =
                 (double)(compression_counters.reduced_size -
                 rs->compress_reduced_size_prev) /
                 (comp_pages * TARGET_PAGE_SIZE);

We use reduced_size, i.e,:

                              original data size - compressed data size
     compression ratio =    ------------------------
                               original data size

for example, for 100 bytes raw data, if we posted 99 bytes out, then
the compression ration should be 1%.

So if i understand correctly, the reduced_size is really you want? :)
Peter Xu July 23, 2018, 9:15 a.m. UTC | #5
On Mon, Jul 23, 2018 at 04:40:29PM +0800, Xiao Guangrong wrote:
> 
> 
> On 07/23/2018 04:05 PM, Peter Xu wrote:
> > On Mon, Jul 23, 2018 at 03:39:18PM +0800, Xiao Guangrong wrote:
> > > 
> > > 
> > > On 07/23/2018 12:36 PM, Peter Xu wrote:
> > > > On Thu, Jul 19, 2018 at 08:15:15PM +0800, guangrong.xiao@gmail.com wrote:
> > > > > @@ -1597,6 +1608,24 @@ static void migration_update_rates(RAMState *rs, int64_t end_time)
> > > > >                rs->xbzrle_cache_miss_prev) / iter_count;
> > > > >            rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
> > > > >        }
> > > > > +
> > > > > +    if (migrate_use_compression()) {
> > > > > +        uint64_t comp_pages;
> > > > > +
> > > > > +        compression_counters.busy_rate = (double)(compression_counters.busy -
> > > > > +            rs->compress_thread_busy_prev) / iter_count;
> > > > 
> > > > Here I'm not sure it's correct...
> > > > 
> > > > "iter_count" stands for ramstate.iterations.  It's increased per
> > > > ram_find_and_save_block(), so IMHO it might contain multiple guest
> > > 
> > > ram_find_and_save_block() returns if a page is successfully posted and
> > > it only posts 1 page out at one time.
> > 
> > ram_find_and_save_block() calls ram_save_host_page(), and we should be
> > sending multiple guest pages in ram_save_host_page() if the host page
> > is a huge page?
> > 
> 
> You're right, thank you for pointing it out.
> 
> So, how about introduce a filed, posted_pages, into RAMState that is used
> to track total pages posted out.
> 
> Then will use this filed to replace 'iter_count' for compression and use
> 'RAMState.posted_pages - ram_counters.duplicate' to calculate
> xbzrle_cache_miss as the zero page is not handled by xbzrle.
> 
> Or introduce a new function, total_posted_pages, which returns the
> sum of all page counts:
> 
>    static total_posted_pages(void)
>    {
>        return ram_counters.normal + ram_counters.duplicate + compression_counters.pages
>               +  xbzrle_counters.pages;
>    }
> 
> that would be a bit more complexity...

If below [1] is wrong too, then I'm thinking whether we could just
move the rs->iterations++ from ram_save_iterate() loop to
ram_save_host_page() loop, then we possibly fix both places...

After all I don't see any other usages of rs->iterations so it seems
fine.  Dave/Juan?

> 
> > > 
> > > > pages.  However compression_counters.busy should be per guest page.
> > > > 
> > > 
> > > Actually, it's derived from xbzrle_counters.cache_miss_rate:
> > >          xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
> > >              rs->xbzrle_cache_miss_prev) / iter_count;
> > 
> > Then this is suspecious to me too...

[1]

> > 
> > > 
> > > > > +        rs->compress_thread_busy_prev = compression_counters.busy;
> > > > > +
> > > > > +        comp_pages = compression_counters.pages - rs->compress_pages_prev;
> > > > > +        if (comp_pages) {
> > > > > +            compression_counters.compression_rate =
> > > > > +                (double)(compression_counters.reduced_size -
> > > > > +                rs->compress_reduced_size_prev) /
> > > > > +                (comp_pages * TARGET_PAGE_SIZE);
> > > > > +            rs->compress_pages_prev = compression_counters.pages;
> > > > > +            rs->compress_reduced_size_prev = compression_counters.reduced_size;
> > > > > +        }
> > > > > +    }
> > > > >    }
> > > > >    static void migration_bitmap_sync(RAMState *rs)
> > > > > @@ -1872,6 +1901,9 @@ static void flush_compressed_data(RAMState *rs)
> > > > >            qemu_mutex_lock(&comp_param[idx].mutex);
> > > > >            if (!comp_param[idx].quit) {
> > > > >                len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
> > > > > +            /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
> > > > > +            compression_counters.reduced_size += TARGET_PAGE_SIZE - len + 8;
> > > > 
> > > > I would agree with Dave here - why we store the "reduced size" instead
> > > > of the size of the compressed data (which I think should be len - 8)?
> > > > 
> > > 
> > > len-8 is the size of data after compressed rather than the data improved
> > > by compression that is not straightforward for the user to see how much
> > > the improvement is by applying compression.
> > > 
> > > Hmm... but it is not a big deal to me... :)
> > 
> > Yeah it might be a personal preference indeed. :)
> > 
> > It's just natural to do that this way for me since AFAIU the
> > compression ratio is defined as:
> > 
> >                             compressed data size
> >    compression ratio =    ------------------------
> >                              original data size
> > 
> 
> Er, we do it as following:
>             compression_counters.compression_rate =
>                 (double)(compression_counters.reduced_size -
>                 rs->compress_reduced_size_prev) /
>                 (comp_pages * TARGET_PAGE_SIZE);
> 
> We use reduced_size, i.e,:
> 
>                              original data size - compressed data size
>     compression ratio =    ------------------------
>                               original data size
> 
> for example, for 100 bytes raw data, if we posted 99 bytes out, then
> the compression ration should be 1%.

I think it should be 99%...

> 
> So if i understand correctly, the reduced_size is really you want? :)
> 

Here's the "offical" definition. :)

https://en.wikipedia.org/wiki/Data_compression_ratio

But obviously I reverted the molecular and denominator... So maybe we
can follow what the wiki page said (then I think you'll just store the
summation of len-8)?

Regards,
Xiao Guangrong July 24, 2018, 7:37 a.m. UTC | #6
On 07/23/2018 05:15 PM, Peter Xu wrote:
> On Mon, Jul 23, 2018 at 04:40:29PM +0800, Xiao Guangrong wrote:
>>
>>
>> On 07/23/2018 04:05 PM, Peter Xu wrote:
>>> On Mon, Jul 23, 2018 at 03:39:18PM +0800, Xiao Guangrong wrote:
>>>>
>>>>
>>>> On 07/23/2018 12:36 PM, Peter Xu wrote:
>>>>> On Thu, Jul 19, 2018 at 08:15:15PM +0800, guangrong.xiao@gmail.com wrote:
>>>>>> @@ -1597,6 +1608,24 @@ static void migration_update_rates(RAMState *rs, int64_t end_time)
>>>>>>                 rs->xbzrle_cache_miss_prev) / iter_count;
>>>>>>             rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
>>>>>>         }
>>>>>> +
>>>>>> +    if (migrate_use_compression()) {
>>>>>> +        uint64_t comp_pages;
>>>>>> +
>>>>>> +        compression_counters.busy_rate = (double)(compression_counters.busy -
>>>>>> +            rs->compress_thread_busy_prev) / iter_count;
>>>>>
>>>>> Here I'm not sure it's correct...
>>>>>
>>>>> "iter_count" stands for ramstate.iterations.  It's increased per
>>>>> ram_find_and_save_block(), so IMHO it might contain multiple guest
>>>>
>>>> ram_find_and_save_block() returns if a page is successfully posted and
>>>> it only posts 1 page out at one time.
>>>
>>> ram_find_and_save_block() calls ram_save_host_page(), and we should be
>>> sending multiple guest pages in ram_save_host_page() if the host page
>>> is a huge page?
>>>
>>
>> You're right, thank you for pointing it out.
>>
>> So, how about introduce a filed, posted_pages, into RAMState that is used
>> to track total pages posted out.
>>
>> Then will use this filed to replace 'iter_count' for compression and use
>> 'RAMState.posted_pages - ram_counters.duplicate' to calculate
>> xbzrle_cache_miss as the zero page is not handled by xbzrle.
>>
>> Or introduce a new function, total_posted_pages, which returns the
>> sum of all page counts:
>>
>>     static total_posted_pages(void)
>>     {
>>         return ram_counters.normal + ram_counters.duplicate + compression_counters.pages
>>                +  xbzrle_counters.pages;
>>     }
>>
>> that would be a bit more complexity...
> 
> If below [1] is wrong too, then I'm thinking whether we could just
> move the rs->iterations++ from ram_save_iterate() loop to
> ram_save_host_page() loop, then we possibly fix both places...
> 

Beside that, even if we fix iterations, xbzrle is painful still as
the zero should not be counted in the cache miss, i.e, xbzrle does
not handle zero page at all.

Anyway, fixing iterations is a good start. :)

> After all I don't see any other usages of rs->iterations so it seems
> fine.  Dave/Juan?
> 
>>
>>>>
>>>>> pages.  However compression_counters.busy should be per guest page.
>>>>>
>>>>
>>>> Actually, it's derived from xbzrle_counters.cache_miss_rate:
>>>>           xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
>>>>               rs->xbzrle_cache_miss_prev) / iter_count;
>>>
>>> Then this is suspecious to me too...
> 
> [1]
> 
>>>
>>>>
>>>>>> +        rs->compress_thread_busy_prev = compression_counters.busy;
>>>>>> +
>>>>>> +        comp_pages = compression_counters.pages - rs->compress_pages_prev;
>>>>>> +        if (comp_pages) {
>>>>>> +            compression_counters.compression_rate =
>>>>>> +                (double)(compression_counters.reduced_size -
>>>>>> +                rs->compress_reduced_size_prev) /
>>>>>> +                (comp_pages * TARGET_PAGE_SIZE);
>>>>>> +            rs->compress_pages_prev = compression_counters.pages;
>>>>>> +            rs->compress_reduced_size_prev = compression_counters.reduced_size;
>>>>>> +        }
>>>>>> +    }
>>>>>>     }
>>>>>>     static void migration_bitmap_sync(RAMState *rs)
>>>>>> @@ -1872,6 +1901,9 @@ static void flush_compressed_data(RAMState *rs)
>>>>>>             qemu_mutex_lock(&comp_param[idx].mutex);
>>>>>>             if (!comp_param[idx].quit) {
>>>>>>                 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
>>>>>> +            /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
>>>>>> +            compression_counters.reduced_size += TARGET_PAGE_SIZE - len + 8;
>>>>>
>>>>> I would agree with Dave here - why we store the "reduced size" instead
>>>>> of the size of the compressed data (which I think should be len - 8)?
>>>>>
>>>>
>>>> len-8 is the size of data after compressed rather than the data improved
>>>> by compression that is not straightforward for the user to see how much
>>>> the improvement is by applying compression.
>>>>
>>>> Hmm... but it is not a big deal to me... :)
>>>
>>> Yeah it might be a personal preference indeed. :)
>>>
>>> It's just natural to do that this way for me since AFAIU the
>>> compression ratio is defined as:
>>>
>>>                              compressed data size
>>>     compression ratio =    ------------------------
>>>                               original data size
>>>
>>
>> Er, we do it as following:
>>              compression_counters.compression_rate =
>>                  (double)(compression_counters.reduced_size -
>>                  rs->compress_reduced_size_prev) /
>>                  (comp_pages * TARGET_PAGE_SIZE);
>>
>> We use reduced_size, i.e,:
>>
>>                               original data size - compressed data size
>>      compression ratio =    ------------------------
>>                                original data size
>>
>> for example, for 100 bytes raw data, if we posted 99 bytes out, then
>> the compression ration should be 1%.
> 
> I think it should be 99%...
> 
>>
>> So if i understand correctly, the reduced_size is really you want? :)
>>
> 
> Here's the "offical" definition. :)
> 
> https://en.wikipedia.org/wiki/Data_compression_ratio
> 
> But obviously I reverted the molecular and denominator... So maybe we
> can follow what the wiki page said (then I think you'll just store the
> summation of len-8)?

Thank you for fixing my knowledge, what i did is "spacing saving" rather
than "compression ratio". As this "compression ratio" is popularly used
in compression benchmarks, then your suggestion is fine to me.
Dr. David Alan Gilbert July 25, 2018, 4:44 p.m. UTC | #7
* Peter Xu (peterx@redhat.com) wrote:
> On Mon, Jul 23, 2018 at 03:39:18PM +0800, Xiao Guangrong wrote:
> > 
> > 
> > On 07/23/2018 12:36 PM, Peter Xu wrote:
> > > On Thu, Jul 19, 2018 at 08:15:15PM +0800, guangrong.xiao@gmail.com wrote:
> > > > @@ -1597,6 +1608,24 @@ static void migration_update_rates(RAMState *rs, int64_t end_time)
> > > >               rs->xbzrle_cache_miss_prev) / iter_count;
> > > >           rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
> > > >       }
> > > > +
> > > > +    if (migrate_use_compression()) {
> > > > +        uint64_t comp_pages;
> > > > +
> > > > +        compression_counters.busy_rate = (double)(compression_counters.busy -
> > > > +            rs->compress_thread_busy_prev) / iter_count;
> > > 
> > > Here I'm not sure it's correct...
> > > 
> > > "iter_count" stands for ramstate.iterations.  It's increased per
> > > ram_find_and_save_block(), so IMHO it might contain multiple guest
> > 
> > ram_find_and_save_block() returns if a page is successfully posted and
> > it only posts 1 page out at one time.
> 
> ram_find_and_save_block() calls ram_save_host_page(), and we should be
> sending multiple guest pages in ram_save_host_page() if the host page
> is a huge page?
> 
> > 
> > > pages.  However compression_counters.busy should be per guest page.
> > > 
> > 
> > Actually, it's derived from xbzrle_counters.cache_miss_rate:
> >         xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
> >             rs->xbzrle_cache_miss_prev) / iter_count;
> 
> Then this is suspecious to me too...

Actually; I think this isn't totally wrong;  iter_count is the *difference* in
iterations since the last time it was updated:

   uint64_t iter_count = rs->iterations - rs->iterations_prev;

        xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
            rs->xbzrle_cache_miss_prev) / iter_count;

so this is:
      cache-misses-since-last-update
      ------------------------------
        iterations since last-update

so the 'miss_rate' is ~misses / iteration.
Although that doesn't really correspond to time.

Dave

> > 
> > > > +        rs->compress_thread_busy_prev = compression_counters.busy;
> > > > +
> > > > +        comp_pages = compression_counters.pages - rs->compress_pages_prev;
> > > > +        if (comp_pages) {
> > > > +            compression_counters.compression_rate =
> > > > +                (double)(compression_counters.reduced_size -
> > > > +                rs->compress_reduced_size_prev) /
> > > > +                (comp_pages * TARGET_PAGE_SIZE);
> > > > +            rs->compress_pages_prev = compression_counters.pages;
> > > > +            rs->compress_reduced_size_prev = compression_counters.reduced_size;
> > > > +        }
> > > > +    }
> > > >   }
> > > >   static void migration_bitmap_sync(RAMState *rs)
> > > > @@ -1872,6 +1901,9 @@ static void flush_compressed_data(RAMState *rs)
> > > >           qemu_mutex_lock(&comp_param[idx].mutex);
> > > >           if (!comp_param[idx].quit) {
> > > >               len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
> > > > +            /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
> > > > +            compression_counters.reduced_size += TARGET_PAGE_SIZE - len + 8;
> > > 
> > > I would agree with Dave here - why we store the "reduced size" instead
> > > of the size of the compressed data (which I think should be len - 8)?
> > > 
> > 
> > len-8 is the size of data after compressed rather than the data improved
> > by compression that is not straightforward for the user to see how much
> > the improvement is by applying compression.
> > 
> > Hmm... but it is not a big deal to me... :)
> 
> Yeah it might be a personal preference indeed. :)
> 
> It's just natural to do that this way for me since AFAIU the
> compression ratio is defined as:
> 
>                            compressed data size
>   compression ratio =    ------------------------
>                             original data size
> 
> > 
> > > Meanwhile, would a helper be nicer? Like:
> > 
> > Yup, that's nicer indeed.
> 
> Regards,
> 
> -- 
> Peter Xu
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Peter Xu July 26, 2018, 5:29 a.m. UTC | #8
On Wed, Jul 25, 2018 at 05:44:02PM +0100, Dr. David Alan Gilbert wrote:
> * Peter Xu (peterx@redhat.com) wrote:
> > On Mon, Jul 23, 2018 at 03:39:18PM +0800, Xiao Guangrong wrote:
> > > 
> > > 
> > > On 07/23/2018 12:36 PM, Peter Xu wrote:
> > > > On Thu, Jul 19, 2018 at 08:15:15PM +0800, guangrong.xiao@gmail.com wrote:
> > > > > @@ -1597,6 +1608,24 @@ static void migration_update_rates(RAMState *rs, int64_t end_time)
> > > > >               rs->xbzrle_cache_miss_prev) / iter_count;
> > > > >           rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
> > > > >       }
> > > > > +
> > > > > +    if (migrate_use_compression()) {
> > > > > +        uint64_t comp_pages;
> > > > > +
> > > > > +        compression_counters.busy_rate = (double)(compression_counters.busy -
> > > > > +            rs->compress_thread_busy_prev) / iter_count;
> > > > 
> > > > Here I'm not sure it's correct...
> > > > 
> > > > "iter_count" stands for ramstate.iterations.  It's increased per
> > > > ram_find_and_save_block(), so IMHO it might contain multiple guest
> > > 
> > > ram_find_and_save_block() returns if a page is successfully posted and
> > > it only posts 1 page out at one time.
> > 
> > ram_find_and_save_block() calls ram_save_host_page(), and we should be
> > sending multiple guest pages in ram_save_host_page() if the host page
> > is a huge page?
> > 
> > > 
> > > > pages.  However compression_counters.busy should be per guest page.
> > > > 
> > > 
> > > Actually, it's derived from xbzrle_counters.cache_miss_rate:
> > >         xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
> > >             rs->xbzrle_cache_miss_prev) / iter_count;
> > 
> > Then this is suspecious to me too...
> 
> Actually; I think this isn't totally wrong;  iter_count is the *difference* in
> iterations since the last time it was updated:
> 
>    uint64_t iter_count = rs->iterations - rs->iterations_prev;
> 
>         xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
>             rs->xbzrle_cache_miss_prev) / iter_count;
> 
> so this is:
>       cache-misses-since-last-update
>       ------------------------------
>         iterations since last-update
> 
> so the 'miss_rate' is ~misses / iteration.
> Although that doesn't really correspond to time.

I'm not sure I got the idea here, the thing is that I think the
counters are for different granularities which might be problematic:

- xbzrle_counters.cache_miss is done in save_xbzrle_page(), so it's
  per-guest-page granularity

- RAMState.iterations is done for each ram_find_and_save_block(), so
  it's per-host-page granularity

An example is that when we migrate a 2M huge page in the guest, we
will only increase the RAMState.iterations by 1 (since
ram_find_and_save_block() will be called once), but we might increase
xbzrle_counters.cache_miss for 2M/4K=512 times (we'll call
save_xbzrle_page() that many times) if all the pages got cache miss.
Then IMHO the cache miss rate will be 512/1=51200% (while it should
actually be just 100% cache miss).

Regards,
diff mbox

Patch

diff --git a/hmp.c b/hmp.c
index 47d36e3ccf..a9e2776d60 100644
--- a/hmp.c
+++ b/hmp.c
@@ -271,6 +271,19 @@  void hmp_info_migrate(Monitor *mon, const QDict *qdict)
                        info->xbzrle_cache->overflow);
     }
 
+    if (info->has_compression) {
+        monitor_printf(mon, "compression pages: %" PRIu64 " pages\n",
+                       info->compression->pages);
+        monitor_printf(mon, "compression busy: %" PRIu64 "\n",
+                       info->compression->busy);
+        monitor_printf(mon, "compression busy rate: %0.2f\n",
+                       info->compression->busy_rate);
+        monitor_printf(mon, "compression reduced size: %" PRIu64 "\n",
+                       info->compression->reduced_size);
+        monitor_printf(mon, "compression rate: %0.2f\n",
+                       info->compression->compression_rate);
+    }
+
     if (info->has_cpu_throttle_percentage) {
         monitor_printf(mon, "cpu throttle percentage: %" PRIu64 "\n",
                        info->cpu_throttle_percentage);
diff --git a/migration/migration.c b/migration/migration.c
index 0af75465b3..8fedf13889 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -754,6 +754,17 @@  static void populate_ram_info(MigrationInfo *info, MigrationState *s)
         info->xbzrle_cache->overflow = xbzrle_counters.overflow;
     }
 
+    if (migrate_use_compression()) {
+        info->has_compression = true;
+        info->compression = g_malloc0(sizeof(*info->compression));
+        info->compression->pages = compression_counters.pages;
+        info->compression->busy = compression_counters.busy;
+        info->compression->busy_rate = compression_counters.busy_rate;
+        info->compression->reduced_size = compression_counters.reduced_size;
+        info->compression->compression_rate =
+                                    compression_counters.compression_rate;
+    }
+
     if (cpu_throttle_active()) {
         info->has_cpu_throttle_percentage = true;
         info->cpu_throttle_percentage = cpu_throttle_get_percentage();
diff --git a/migration/ram.c b/migration/ram.c
index 1b016e048d..e68b0e6dec 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -300,6 +300,15 @@  struct RAMState {
     uint64_t num_dirty_pages_period;
     /* xbzrle misses since the beginning of the period */
     uint64_t xbzrle_cache_miss_prev;
+
+    /* compression statistics since the beginning of the period */
+    /* amount of count that no free thread to compress data */
+    uint64_t compress_thread_busy_prev;
+    /* amount bytes reduced by compression */
+    uint64_t compress_reduced_size_prev;
+    /* amount of compressed pages */
+    uint64_t compress_pages_prev;
+
     /* number of iterations at the beginning of period */
     uint64_t iterations_prev;
     /* Iterations since start */
@@ -337,6 +346,8 @@  struct PageSearchStatus {
 };
 typedef struct PageSearchStatus PageSearchStatus;
 
+CompressionStats compression_counters;
+
 struct CompressParam {
     bool done;
     bool quit;
@@ -1597,6 +1608,24 @@  static void migration_update_rates(RAMState *rs, int64_t end_time)
             rs->xbzrle_cache_miss_prev) / iter_count;
         rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
     }
+
+    if (migrate_use_compression()) {
+        uint64_t comp_pages;
+
+        compression_counters.busy_rate = (double)(compression_counters.busy -
+            rs->compress_thread_busy_prev) / iter_count;
+        rs->compress_thread_busy_prev = compression_counters.busy;
+
+        comp_pages = compression_counters.pages - rs->compress_pages_prev;
+        if (comp_pages) {
+            compression_counters.compression_rate =
+                (double)(compression_counters.reduced_size -
+                rs->compress_reduced_size_prev) /
+                (comp_pages * TARGET_PAGE_SIZE);
+            rs->compress_pages_prev = compression_counters.pages;
+            rs->compress_reduced_size_prev = compression_counters.reduced_size;
+        }
+    }
 }
 
 static void migration_bitmap_sync(RAMState *rs)
@@ -1872,6 +1901,9 @@  static void flush_compressed_data(RAMState *rs)
         qemu_mutex_lock(&comp_param[idx].mutex);
         if (!comp_param[idx].quit) {
             len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
+            /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
+            compression_counters.reduced_size += TARGET_PAGE_SIZE - len + 8;
+            compression_counters.pages++;
             ram_counters.transferred += len;
         }
         qemu_mutex_unlock(&comp_param[idx].mutex);
@@ -1903,6 +1935,10 @@  retry:
             qemu_cond_signal(&comp_param[idx].cond);
             qemu_mutex_unlock(&comp_param[idx].mutex);
             pages = 1;
+            /* 8 means a header with RAM_SAVE_FLAG_CONTINUE. */
+            compression_counters.reduced_size += TARGET_PAGE_SIZE -
+                                                 bytes_xmit + 8;
+            compression_counters.pages++;
             ram_counters.transferred += bytes_xmit;
             break;
         }
@@ -2233,6 +2269,7 @@  static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
         if (res > 0) {
             return res;
         }
+        compression_counters.busy++;
     } else if (migrate_use_multifd()) {
         return ram_save_multifd_page(rs, block, offset);
     }
diff --git a/migration/ram.h b/migration/ram.h
index 457bf54b8c..a139066846 100644
--- a/migration/ram.h
+++ b/migration/ram.h
@@ -36,6 +36,7 @@ 
 
 extern MigrationStats ram_counters;
 extern XBZRLECacheStats xbzrle_counters;
+extern CompressionStats compression_counters;
 
 int xbzrle_cache_resize(int64_t new_size, Error **errp);
 uint64_t ram_bytes_remaining(void);
diff --git a/qapi/migration.json b/qapi/migration.json
index b4f394844b..5a34aa1754 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -75,6 +75,27 @@ 
            'cache-miss': 'int', 'cache-miss-rate': 'number',
            'overflow': 'int' } }
 
+##
+# @CompressionStats:
+#
+# Detailed migration compression statistics
+#
+# @pages: amount of pages compressed and transferred to the target VM
+#
+# @busy: count of times that no free thread was available to compress data
+#
+# @busy-rate: rate of thread busy
+#
+# @reduced-size: amount of bytes reduced by compression
+#
+# @compression-rate: rate of compressed size
+#
+# Since: 3.0
+##
+{ 'struct': 'CompressionStats',
+  'data': {'pages': 'int', 'busy': 'int', 'busy-rate': 'number',
+	   'reduced-size': 'int', 'compression-rate': 'number' } }
+
 ##
 # @MigrationStatus:
 #
@@ -172,6 +193,8 @@ 
 #           only present when the postcopy-blocktime migration capability
 #           is enabled. (Since 3.0)
 #
+# @compression: migration compression statistics, only returned if compression
+#           feature is on and status is 'active' or 'completed' (Since 3.0)
 #
 # Since: 0.14.0
 ##
@@ -186,7 +209,8 @@ 
            '*cpu-throttle-percentage': 'int',
            '*error-desc': 'str',
            '*postcopy-blocktime' : 'uint32',
-           '*postcopy-vcpu-blocktime': ['uint32']} }
+           '*postcopy-vcpu-blocktime': ['uint32'],
+           '*compression': 'CompressionStats'} }
 
 ##
 # @query-migrate: