diff mbox

[2/5] migration: Fix a potential issue

Message ID 1462333259-3237-3-git-send-email-liang.z.li@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Liang Li May 4, 2016, 3:40 a.m. UTC
At the end of live migration and before vm_start() on the destination
side, we should make sure all the decompression tasks are finished, if
this can not be guaranteed, the VM may get the incorrect memory data,
or the updated memory may be overwritten by the decompression thread.
Add the code to fix this potential issue.

Suggested-by: David Alan Gilbert <dgilbert@redhat.com>
Signed-off-by: Liang Li <liang.z.li@intel.com>
---
 include/migration/migration.h |  1 +
 migration/migration.c         |  2 +-
 migration/ram.c               | 20 ++++++++++++++++++++
 3 files changed, 22 insertions(+), 1 deletion(-)

Comments

Dr. David Alan Gilbert May 4, 2016, 8:47 a.m. UTC | #1
* Liang Li (liang.z.li@intel.com) wrote:
> At the end of live migration and before vm_start() on the destination
> side, we should make sure all the decompression tasks are finished, if
> this can not be guaranteed, the VM may get the incorrect memory data,
> or the updated memory may be overwritten by the decompression thread.
> Add the code to fix this potential issue.
> 
> Suggested-by: David Alan Gilbert <dgilbert@redhat.com>
> Signed-off-by: Liang Li <liang.z.li@intel.com>
> ---
>  include/migration/migration.h |  1 +
>  migration/migration.c         |  2 +-
>  migration/ram.c               | 20 ++++++++++++++++++++
>  3 files changed, 22 insertions(+), 1 deletion(-)
> 
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index ac2c12c..1c9051e 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -223,6 +223,7 @@ void migrate_compress_threads_create(void);
>  void migrate_compress_threads_join(void);
>  void migrate_decompress_threads_create(void);
>  void migrate_decompress_threads_join(void);
> +void wait_for_decompress_done(void);
>  uint64_t ram_bytes_remaining(void);
>  uint64_t ram_bytes_transferred(void);
>  uint64_t ram_bytes_total(void);
> diff --git a/migration/migration.c b/migration/migration.c
> index 991313a..5228c28 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -347,7 +347,7 @@ static void process_incoming_migration_bh(void *opaque)
>      /* If global state section was not received or we are in running
>         state, we need to obey autostart. Any other state is set with
>         runstate_set. */
> -
> +    wait_for_decompress_done();

I wonder if that's early enough; the roder here is that we get:

   ram_load
   devices load
   wait_for_decompress_done()
   start VM

Loading the devices can access guest RAM though (especially virtio); so I
think you need:

   ram_load
   wait_for_decompress_done()
   devices load
   start VM

I think you could do that by placing the wait_for_decompress_done()
at the end of ram_load().

Dave
   
>      if (!global_state_received() ||
>          global_state_get_runstate() == RUN_STATE_RUNNING) {
>          if (autostart) {
> diff --git a/migration/ram.c b/migration/ram.c
> index 7ab6ab5..4459b38 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2220,6 +2220,26 @@ static void *do_data_decompress(void *opaque)
>      return NULL;
>  }
>  
> +void wait_for_decompress_done(void)
> +{
> +    int idx, thread_count;
> +
> +    if (!migrate_use_compression()) {
> +        return;
> +    }
> +    thread_count = migrate_decompress_threads();
> +    for (idx = 0; idx < thread_count; idx++) {
> +        if (!decomp_param[idx].done) {
> +            qemu_mutex_lock(&decomp_done_lock);
> +            while (!decomp_param[idx].done) {
> +                qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
> +            }
> +            qemu_mutex_unlock(&decomp_done_lock);
> +        }
> +    }
> +
> +}
> +
>  void migrate_decompress_threads_create(void)
>  {
>      int i, thread_count;
> -- 
> 1.9.1
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Juan Quintela May 4, 2016, 9:17 a.m. UTC | #2
Liang Li <liang.z.li@intel.com> wrote:
> At the end of live migration and before vm_start() on the destination
> side, we should make sure all the decompression tasks are finished, if
> this can not be guaranteed, the VM may get the incorrect memory data,
> or the updated memory may be overwritten by the decompression thread.
> Add the code to fix this potential issue.
>
> Suggested-by: David Alan Gilbert <dgilbert@redhat.com>
> Signed-off-by: Liang Li <liang.z.li@intel.com>
> ---
>  include/migration/migration.h |  1 +
>  migration/migration.c         |  2 +-
>  migration/ram.c               | 20 ++++++++++++++++++++
>  3 files changed, 22 insertions(+), 1 deletion(-)
>
> diff --git a/include/migration/migration.h b/include/migration/migration.h
> index ac2c12c..1c9051e 100644
> --- a/include/migration/migration.h
> +++ b/include/migration/migration.h
> @@ -223,6 +223,7 @@ void migrate_compress_threads_create(void);
>  void migrate_compress_threads_join(void);
>  void migrate_decompress_threads_create(void);
>  void migrate_decompress_threads_join(void);
> +void wait_for_decompress_done(void);
>  uint64_t ram_bytes_remaining(void);
>  uint64_t ram_bytes_transferred(void);
>  uint64_t ram_bytes_total(void);
> diff --git a/migration/migration.c b/migration/migration.c
> index 991313a..5228c28 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -347,7 +347,7 @@ static void process_incoming_migration_bh(void *opaque)
>      /* If global state section was not received or we are in running
>         state, we need to obey autostart. Any other state is set with
>         runstate_set. */
> -
> +    wait_for_decompress_done();
>      if (!global_state_received() ||
>          global_state_get_runstate() == RUN_STATE_RUNNING) {
>          if (autostart) {
> diff --git a/migration/ram.c b/migration/ram.c
> index 7ab6ab5..4459b38 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -2220,6 +2220,26 @@ static void *do_data_decompress(void *opaque)
>      return NULL;
>  }
>  

why?

> +void wait_for_decompress_done(void)
> +{
> +    int idx, thread_count;
> +
> +    if (!migrate_use_compression()) {
> +        return;
> +    }
> +    thread_count = migrate_decompress_threads();
> +    for (idx = 0; idx < thread_count; idx++) {
> +        if (!decomp_param[idx].done) {
> +            qemu_mutex_lock(&decomp_done_lock);
> +            while (!decomp_param[idx].done) {
> +                qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
> +            }
> +            qemu_mutex_unlock(&decomp_done_lock);
> +        }
> +    }
> +
> +}
> +

    thread_count = migrate_decompress_threads();
    qemu_mutex_lock(&decomp_done_lock);
    for (idx = 0; idx < thread_count; idx++) {
       while (!decomp_param[idx].done) {
           qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
       }
    }
    qemu_mutex_unlock(&decomp_done_lock);

Simpler and correct, no?

Later, Juan.
Liang Li May 4, 2016, 10:05 a.m. UTC | #3
> -----Original Message-----
> From: Dr. David Alan Gilbert [mailto:dgilbert@redhat.com]
> Sent: Wednesday, May 04, 2016 4:48 PM
> To: Li, Liang Z
> Cc: qemu-devel@nongnu.org; quintela@redhat.com;
> amit.shah@redhat.com; berrange@redhat.com
> Subject: Re: [PATCH 2/5] migration: Fix a potential issue
> 
> * Liang Li (liang.z.li@intel.com) wrote:
> > At the end of live migration and before vm_start() on the destination
> > side, we should make sure all the decompression tasks are finished, if
> > this can not be guaranteed, the VM may get the incorrect memory data,
> > or the updated memory may be overwritten by the decompression thread.
> > Add the code to fix this potential issue.
> >
> > Suggested-by: David Alan Gilbert <dgilbert@redhat.com>
> > Signed-off-by: Liang Li <liang.z.li@intel.com>
> > ---
> >  include/migration/migration.h |  1 +
> >  migration/migration.c         |  2 +-
> >  migration/ram.c               | 20 ++++++++++++++++++++
> >  3 files changed, 22 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/migration/migration.h
> > b/include/migration/migration.h index ac2c12c..1c9051e 100644
> > --- a/include/migration/migration.h
> > +++ b/include/migration/migration.h
> > @@ -223,6 +223,7 @@ void migrate_compress_threads_create(void);
> >  void migrate_compress_threads_join(void);
> >  void migrate_decompress_threads_create(void);
> >  void migrate_decompress_threads_join(void);
> > +void wait_for_decompress_done(void);
> >  uint64_t ram_bytes_remaining(void);
> >  uint64_t ram_bytes_transferred(void);  uint64_t
> > ram_bytes_total(void); diff --git a/migration/migration.c
> > b/migration/migration.c index 991313a..5228c28 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -347,7 +347,7 @@ static void process_incoming_migration_bh(void
> *opaque)
> >      /* If global state section was not received or we are in running
> >         state, we need to obey autostart. Any other state is set with
> >         runstate_set. */
> > -
> > +    wait_for_decompress_done();
> 
> I wonder if that's early enough; the roder here is that we get:
> 
>    ram_load
>    devices load
>    wait_for_decompress_done()
>    start VM
> 
> Loading the devices can access guest RAM though (especially virtio); so I
> think you need:
> 
>    ram_load
>    wait_for_decompress_done()
>    devices load
>    start VM
> 
> I think you could do that by placing the wait_for_decompress_done() at the
> end of ram_load().
> 
> Dave

You are right, will change. Thanks!

Liang
> 
> >      if (!global_state_received() ||
> >          global_state_get_runstate() == RUN_STATE_RUNNING) {
> >          if (autostart) {
> > diff --git a/migration/ram.c b/migration/ram.c index 7ab6ab5..4459b38
> > 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -2220,6 +2220,26 @@ static void *do_data_decompress(void *opaque)
> >      return NULL;
> >  }
> >
> > +void wait_for_decompress_done(void)
> > +{
> > +    int idx, thread_count;
> > +
> > +    if (!migrate_use_compression()) {
> > +        return;
> > +    }
> > +    thread_count = migrate_decompress_threads();
> > +    for (idx = 0; idx < thread_count; idx++) {
> > +        if (!decomp_param[idx].done) {
> > +            qemu_mutex_lock(&decomp_done_lock);
> > +            while (!decomp_param[idx].done) {
> > +                qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
> > +            }
> > +            qemu_mutex_unlock(&decomp_done_lock);
> > +        }
> > +    }
> > +
> > +}
> > +
> >  void migrate_decompress_threads_create(void)
> >  {
> >      int i, thread_count;
> > --
> > 1.9.1
> >
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Liang Li May 4, 2016, 2:13 p.m. UTC | #4
> -----Original Message-----
> From: Qemu-devel [mailto:qemu-devel-
> bounces+liang.z.li=intel.com@nongnu.org] On Behalf Of Juan Quintela
> Sent: Wednesday, May 04, 2016 5:18 PM
> To: Li, Liang Z
> Cc: amit.shah@redhat.com; qemu-devel@nongnu.org; dgilbert@redhat.com
> Subject: Re: [Qemu-devel] [PATCH 2/5] migration: Fix a potential issue
> 
> Liang Li <liang.z.li@intel.com> wrote:
> > At the end of live migration and before vm_start() on the destination
> > side, we should make sure all the decompression tasks are finished, if
> > this can not be guaranteed, the VM may get the incorrect memory data,
> > or the updated memory may be overwritten by the decompression thread.
> > Add the code to fix this potential issue.
> >
> > Suggested-by: David Alan Gilbert <dgilbert@redhat.com>
> > Signed-off-by: Liang Li <liang.z.li@intel.com>
> > ---
> >  include/migration/migration.h |  1 +
> >  migration/migration.c         |  2 +-
> >  migration/ram.c               | 20 ++++++++++++++++++++
> >  3 files changed, 22 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/migration/migration.h
> > b/include/migration/migration.h index ac2c12c..1c9051e 100644
> > --- a/include/migration/migration.h
> > +++ b/include/migration/migration.h
> > @@ -223,6 +223,7 @@ void migrate_compress_threads_create(void);
> >  void migrate_compress_threads_join(void);
> >  void migrate_decompress_threads_create(void);
> >  void migrate_decompress_threads_join(void);
> > +void wait_for_decompress_done(void);
> >  uint64_t ram_bytes_remaining(void);
> >  uint64_t ram_bytes_transferred(void);  uint64_t
> > ram_bytes_total(void); diff --git a/migration/migration.c
> > b/migration/migration.c index 991313a..5228c28 100644
> > --- a/migration/migration.c
> > +++ b/migration/migration.c
> > @@ -347,7 +347,7 @@ static void process_incoming_migration_bh(void
> *opaque)
> >      /* If global state section was not received or we are in running
> >         state, we need to obey autostart. Any other state is set with
> >         runstate_set. */
> > -
> > +    wait_for_decompress_done();
> >      if (!global_state_received() ||
> >          global_state_get_runstate() == RUN_STATE_RUNNING) {
> >          if (autostart) {
> > diff --git a/migration/ram.c b/migration/ram.c index 7ab6ab5..4459b38
> > 100644
> > --- a/migration/ram.c
> > +++ b/migration/ram.c
> > @@ -2220,6 +2220,26 @@ static void *do_data_decompress(void *opaque)
> >      return NULL;
> >  }
> >
> 
> why?
> 
> > +void wait_for_decompress_done(void)
> > +{
> > +    int idx, thread_count;
> > +
> > +    if (!migrate_use_compression()) {
> > +        return;
> > +    }
> > +    thread_count = migrate_decompress_threads();
> > +    for (idx = 0; idx < thread_count; idx++) {
> > +        if (!decomp_param[idx].done) {
> > +            qemu_mutex_lock(&decomp_done_lock);
> > +            while (!decomp_param[idx].done) {
> > +                qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
> > +            }
> > +            qemu_mutex_unlock(&decomp_done_lock);
> > +        }
> > +    }
> > +
> > +}
> > +
> 
>     thread_count = migrate_decompress_threads();
>     qemu_mutex_lock(&decomp_done_lock);
>     for (idx = 0; idx < thread_count; idx++) {
>        while (!decomp_param[idx].done) {
>            qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
>        }
>     }
>     qemu_mutex_unlock(&decomp_done_lock);
> 
> Simpler and correct, no?
> 
> Later, Juan.


Yes, thanks! 
Again, I think I should clean up the multiple thread compression code.

Liang
diff mbox

Patch

diff --git a/include/migration/migration.h b/include/migration/migration.h
index ac2c12c..1c9051e 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -223,6 +223,7 @@  void migrate_compress_threads_create(void);
 void migrate_compress_threads_join(void);
 void migrate_decompress_threads_create(void);
 void migrate_decompress_threads_join(void);
+void wait_for_decompress_done(void);
 uint64_t ram_bytes_remaining(void);
 uint64_t ram_bytes_transferred(void);
 uint64_t ram_bytes_total(void);
diff --git a/migration/migration.c b/migration/migration.c
index 991313a..5228c28 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -347,7 +347,7 @@  static void process_incoming_migration_bh(void *opaque)
     /* If global state section was not received or we are in running
        state, we need to obey autostart. Any other state is set with
        runstate_set. */
-
+    wait_for_decompress_done();
     if (!global_state_received() ||
         global_state_get_runstate() == RUN_STATE_RUNNING) {
         if (autostart) {
diff --git a/migration/ram.c b/migration/ram.c
index 7ab6ab5..4459b38 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -2220,6 +2220,26 @@  static void *do_data_decompress(void *opaque)
     return NULL;
 }
 
+void wait_for_decompress_done(void)
+{
+    int idx, thread_count;
+
+    if (!migrate_use_compression()) {
+        return;
+    }
+    thread_count = migrate_decompress_threads();
+    for (idx = 0; idx < thread_count; idx++) {
+        if (!decomp_param[idx].done) {
+            qemu_mutex_lock(&decomp_done_lock);
+            while (!decomp_param[idx].done) {
+                qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
+            }
+            qemu_mutex_unlock(&decomp_done_lock);
+        }
+    }
+
+}
+
 void migrate_decompress_threads_create(void)
 {
     int i, thread_count;