diff mbox series

[v3] migration/dirtyrate: make sample page count configurable

Message ID 1fc52dd5abfa7590f516c3e97878ee263bff105a.1620742417.git.huangy81@chinatelecom.cn (mailing list archive)
State New, archived
Headers show
Series [v3] migration/dirtyrate: make sample page count configurable | expand

Commit Message

Hyman Huang May 11, 2021, 2:21 p.m. UTC
From: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>

introduce optional sample-pages argument in calc-dirty-rate,
making sample page count per GB configurable so that more
accurate dirtyrate can be calculated.

Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
---
 migration/dirtyrate.c | 31 +++++++++++++++++++++++++++----
 migration/dirtyrate.h |  8 +++++++-
 qapi/migration.json   | 13 ++++++++++---
 3 files changed, 44 insertions(+), 8 deletions(-)

Comments

Hyman Huang June 1, 2021, 12:09 p.m. UTC | #1
Ping

though dirtyrate by sampling page may kind of be inaccurate,
it still valuable for those who run qemu on non-x86 or kernel
which does not support dirty ring, this patch is necessary i
think, what would you think of it ?

在 2021/5/11 22:21, huangy81@chinatelecom.cn 写道:
> From: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
> 
> introduce optional sample-pages argument in calc-dirty-rate,
> making sample page count per GB configurable so that more
> accurate dirtyrate can be calculated.
> 
> Signed-off-by: Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
> ---
>   migration/dirtyrate.c | 31 +++++++++++++++++++++++++++----
>   migration/dirtyrate.h |  8 +++++++-
>   qapi/migration.json   | 13 ++++++++++---
>   3 files changed, 44 insertions(+), 8 deletions(-)
> 
> diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
> index ccb9814..2ee3890 100644
> --- a/migration/dirtyrate.c
> +++ b/migration/dirtyrate.c
> @@ -48,6 +48,12 @@ static bool is_sample_period_valid(int64_t sec)
>       return true;
>   }
>   
> +static bool is_sample_pages_valid(int64_t pages)
> +{
> +    return pages >= MIN_SAMPLE_PAGE_COUNT &&
> +           pages <= MAX_SAMPLE_PAGE_COUNT;
> +}
> +
>   static int dirtyrate_set_state(int *state, int old_state, int new_state)
>   {
>       assert(new_state < DIRTY_RATE_STATUS__MAX);
> @@ -72,13 +78,15 @@ static struct DirtyRateInfo *query_dirty_rate_info(void)
>       info->status = CalculatingState;
>       info->start_time = DirtyStat.start_time;
>       info->calc_time = DirtyStat.calc_time;
> +    info->sample_pages = DirtyStat.sample_pages;
>   
>       trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
>   
>       return info;
>   }
>   
> -static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time)
> +static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time,
> +                                uint64_t sample_pages)
>   {
>       DirtyStat.total_dirty_samples = 0;
>       DirtyStat.total_sample_count = 0;
> @@ -86,6 +94,7 @@ static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time)
>       DirtyStat.dirty_rate = -1;
>       DirtyStat.start_time = start_time;
>       DirtyStat.calc_time = calc_time;
> +    DirtyStat.sample_pages = sample_pages;
>   }
>   
>   static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
> @@ -361,6 +370,7 @@ void *get_dirtyrate_thread(void *arg)
>       int ret;
>       int64_t start_time;
>       int64_t calc_time;
> +    uint64_t sample_pages;
>   
>       ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
>                                 DIRTY_RATE_STATUS_MEASURING);
> @@ -371,7 +381,8 @@ void *get_dirtyrate_thread(void *arg)
>   
>       start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
>       calc_time = config.sample_period_seconds;
> -    init_dirtyrate_stat(start_time, calc_time);
> +    sample_pages = config.sample_pages_per_gigabytes;
> +    init_dirtyrate_stat(start_time, calc_time, sample_pages);
>   
>       calculate_dirtyrate(config);
>   
> @@ -383,7 +394,8 @@ void *get_dirtyrate_thread(void *arg)
>       return NULL;
>   }
>   
> -void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
> +void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages,
> +                         int64_t sample_pages, Error **errp)
>   {
>       static struct DirtyRateConfig config;
>       QemuThread thread;
> @@ -404,6 +416,17 @@ void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
>           return;
>       }
>   
> +    if (has_sample_pages) {
> +        if (!is_sample_pages_valid(sample_pages)) {
> +            error_setg(errp, "sample-pages is out of range[%d, %d].",
> +                            MIN_SAMPLE_PAGE_COUNT,
> +                            MAX_SAMPLE_PAGE_COUNT);
> +            return;
> +        }
> +    } else {
> +        sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
> +    }
> +
>       /*
>        * Init calculation state as unstarted.
>        */
> @@ -415,7 +438,7 @@ void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
>       }
>   
>       config.sample_period_seconds = calc_time;
> -    config.sample_pages_per_gigabytes = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
> +    config.sample_pages_per_gigabytes = sample_pages;
>       qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
>                          (void *)&config, QEMU_THREAD_DETACHED);
>   }
> diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h
> index 6ec4295..e1fd290 100644
> --- a/migration/dirtyrate.h
> +++ b/migration/dirtyrate.h
> @@ -15,7 +15,6 @@
>   
>   /*
>    * Sample 512 pages per GB as default.
> - * TODO: Make it configurable.
>    */
>   #define DIRTYRATE_DEFAULT_SAMPLE_PAGES            512
>   
> @@ -35,6 +34,12 @@
>   #define MIN_FETCH_DIRTYRATE_TIME_SEC              1
>   #define MAX_FETCH_DIRTYRATE_TIME_SEC              60
>   
> +/*
> + * Take 1/16 pages in 1G as the maxmum sample page count
> + */
> +#define MIN_SAMPLE_PAGE_COUNT                     128
> +#define MAX_SAMPLE_PAGE_COUNT                     16384
> +
>   struct DirtyRateConfig {
>       uint64_t sample_pages_per_gigabytes; /* sample pages per GB */
>       int64_t sample_period_seconds; /* time duration between two sampling */
> @@ -63,6 +68,7 @@ struct DirtyRateStat {
>       int64_t dirty_rate; /* dirty rate in MB/s */
>       int64_t start_time; /* calculation start time in units of second */
>       int64_t calc_time; /* time duration of two sampling in units of second */
> +    uint64_t sample_pages; /* sample pages per GB */
>   };
>   
>   void *get_dirtyrate_thread(void *arg);
> diff --git a/qapi/migration.json b/qapi/migration.json
> index 0b17cce..890e745 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -1746,6 +1746,9 @@
>   #
>   # @calc-time: time in units of second for sample dirty pages
>   #
> +# @sample-pages: page count per GB for sample dirty pages
> +#                the default value is 512 (since 6.1)
> +#
>   # Since: 5.2
>   #
>   ##
> @@ -1753,7 +1756,8 @@
>     'data': {'*dirty-rate': 'int64',
>              'status': 'DirtyRateStatus',
>              'start-time': 'int64',
> -           'calc-time': 'int64'} }
> +           'calc-time': 'int64',
> +           'sample-pages': 'uint64'} }
>   
>   ##
>   # @calc-dirty-rate:
> @@ -1762,13 +1766,16 @@
>   #
>   # @calc-time: time in units of second for sample dirty pages
>   #
> +# @sample-pages: page count per GB for sample dirty pages
> +#                the default value is 512 (since 6.1)
> +#
>   # Since: 5.2
>   #
>   # Example:
> -#   {"command": "calc-dirty-rate", "data": {"calc-time": 1} }
> +#   {"command": "calc-dirty-rate", "data": {"calc-time": 1, 'sample-pages': 512} }
>   #
>   ##
> -{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64'} }
> +{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64', '*sample-pages': 'int'} }
>   
>   ##
>   # @query-dirty-rate:
>
Peter Xu June 1, 2021, 2:11 p.m. UTC | #2
On Tue, Jun 01, 2021 at 08:09:34PM +0800, Hyman Huang wrote:
> Ping
> 
> though dirtyrate by sampling page may kind of be inaccurate,
> it still valuable for those who run qemu on non-x86 or kernel
> which does not support dirty ring, this patch is necessary i
> think, what would you think of it ?

Yes I think this patch is okay:

Reviewed-by: Peter Xu <peterx@redhat.com>

Maybe I can pick it up and repost with the hmp cmds as they conflict.

But note that even with this sample_pages parameter, my test still gets this
with a 200MB/s workload:

(qemu) calc_dirty_rate 10 16384
...
(qemu) info dirty_rate
Dirty rate: 21 (MB/s)

I think it means it does not solve the memory locality issue, so it may only be
useful for workload that mostly randomly distributed across all the ram.
However since normally this is used to evaluate "whether this customer VM can
be migrated", it also means maybe the admin has no idea about what type of
workload the guest is running.  Depending on this info will wrongly migrate a
very busy VM as the admin thought it's low loaded.

So I think at last if we want to make this feature to real use, we may need to
depend on either dirty logging or dirty ring to report the real numbers, even
without migration started.
diff mbox series

Patch

diff --git a/migration/dirtyrate.c b/migration/dirtyrate.c
index ccb9814..2ee3890 100644
--- a/migration/dirtyrate.c
+++ b/migration/dirtyrate.c
@@ -48,6 +48,12 @@  static bool is_sample_period_valid(int64_t sec)
     return true;
 }
 
+static bool is_sample_pages_valid(int64_t pages)
+{
+    return pages >= MIN_SAMPLE_PAGE_COUNT &&
+           pages <= MAX_SAMPLE_PAGE_COUNT;
+}
+
 static int dirtyrate_set_state(int *state, int old_state, int new_state)
 {
     assert(new_state < DIRTY_RATE_STATUS__MAX);
@@ -72,13 +78,15 @@  static struct DirtyRateInfo *query_dirty_rate_info(void)
     info->status = CalculatingState;
     info->start_time = DirtyStat.start_time;
     info->calc_time = DirtyStat.calc_time;
+    info->sample_pages = DirtyStat.sample_pages;
 
     trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
 
     return info;
 }
 
-static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time)
+static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time,
+                                uint64_t sample_pages)
 {
     DirtyStat.total_dirty_samples = 0;
     DirtyStat.total_sample_count = 0;
@@ -86,6 +94,7 @@  static void init_dirtyrate_stat(int64_t start_time, int64_t calc_time)
     DirtyStat.dirty_rate = -1;
     DirtyStat.start_time = start_time;
     DirtyStat.calc_time = calc_time;
+    DirtyStat.sample_pages = sample_pages;
 }
 
 static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
@@ -361,6 +370,7 @@  void *get_dirtyrate_thread(void *arg)
     int ret;
     int64_t start_time;
     int64_t calc_time;
+    uint64_t sample_pages;
 
     ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
                               DIRTY_RATE_STATUS_MEASURING);
@@ -371,7 +381,8 @@  void *get_dirtyrate_thread(void *arg)
 
     start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) / 1000;
     calc_time = config.sample_period_seconds;
-    init_dirtyrate_stat(start_time, calc_time);
+    sample_pages = config.sample_pages_per_gigabytes;
+    init_dirtyrate_stat(start_time, calc_time, sample_pages);
 
     calculate_dirtyrate(config);
 
@@ -383,7 +394,8 @@  void *get_dirtyrate_thread(void *arg)
     return NULL;
 }
 
-void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
+void qmp_calc_dirty_rate(int64_t calc_time, bool has_sample_pages,
+                         int64_t sample_pages, Error **errp)
 {
     static struct DirtyRateConfig config;
     QemuThread thread;
@@ -404,6 +416,17 @@  void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
         return;
     }
 
+    if (has_sample_pages) {
+        if (!is_sample_pages_valid(sample_pages)) {
+            error_setg(errp, "sample-pages is out of range[%d, %d].",
+                            MIN_SAMPLE_PAGE_COUNT,
+                            MAX_SAMPLE_PAGE_COUNT);
+            return;
+        }
+    } else {
+        sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
+    }
+
     /*
      * Init calculation state as unstarted.
      */
@@ -415,7 +438,7 @@  void qmp_calc_dirty_rate(int64_t calc_time, Error **errp)
     }
 
     config.sample_period_seconds = calc_time;
-    config.sample_pages_per_gigabytes = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
+    config.sample_pages_per_gigabytes = sample_pages;
     qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
                        (void *)&config, QEMU_THREAD_DETACHED);
 }
diff --git a/migration/dirtyrate.h b/migration/dirtyrate.h
index 6ec4295..e1fd290 100644
--- a/migration/dirtyrate.h
+++ b/migration/dirtyrate.h
@@ -15,7 +15,6 @@ 
 
 /*
  * Sample 512 pages per GB as default.
- * TODO: Make it configurable.
  */
 #define DIRTYRATE_DEFAULT_SAMPLE_PAGES            512
 
@@ -35,6 +34,12 @@ 
 #define MIN_FETCH_DIRTYRATE_TIME_SEC              1
 #define MAX_FETCH_DIRTYRATE_TIME_SEC              60
 
+/*
+ * Take 1/16 pages in 1G as the maxmum sample page count
+ */
+#define MIN_SAMPLE_PAGE_COUNT                     128
+#define MAX_SAMPLE_PAGE_COUNT                     16384
+
 struct DirtyRateConfig {
     uint64_t sample_pages_per_gigabytes; /* sample pages per GB */
     int64_t sample_period_seconds; /* time duration between two sampling */
@@ -63,6 +68,7 @@  struct DirtyRateStat {
     int64_t dirty_rate; /* dirty rate in MB/s */
     int64_t start_time; /* calculation start time in units of second */
     int64_t calc_time; /* time duration of two sampling in units of second */
+    uint64_t sample_pages; /* sample pages per GB */
 };
 
 void *get_dirtyrate_thread(void *arg);
diff --git a/qapi/migration.json b/qapi/migration.json
index 0b17cce..890e745 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -1746,6 +1746,9 @@ 
 #
 # @calc-time: time in units of second for sample dirty pages
 #
+# @sample-pages: page count per GB for sample dirty pages
+#                the default value is 512 (since 6.1)
+#
 # Since: 5.2
 #
 ##
@@ -1753,7 +1756,8 @@ 
   'data': {'*dirty-rate': 'int64',
            'status': 'DirtyRateStatus',
            'start-time': 'int64',
-           'calc-time': 'int64'} }
+           'calc-time': 'int64',
+           'sample-pages': 'uint64'} }
 
 ##
 # @calc-dirty-rate:
@@ -1762,13 +1766,16 @@ 
 #
 # @calc-time: time in units of second for sample dirty pages
 #
+# @sample-pages: page count per GB for sample dirty pages
+#                the default value is 512 (since 6.1)
+#
 # Since: 5.2
 #
 # Example:
-#   {"command": "calc-dirty-rate", "data": {"calc-time": 1} }
+#   {"command": "calc-dirty-rate", "data": {"calc-time": 1, 'sample-pages': 512} }
 #
 ##
-{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64'} }
+{ 'command': 'calc-dirty-rate', 'data': {'calc-time': 'int64', '*sample-pages': 'int'} }
 
 ##
 # @query-dirty-rate: