diff mbox

[RFC,v3,3/4] lock to protect memslots

Message ID deaf1c683fd32c28a72853710b9564dedc7d0060.1313076455.git.udeshpan@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Umesh Deshpande Aug. 11, 2011, 3:32 p.m. UTC
Following patch introduces a mutex to protect the migration thread against the
removal of memslots during the guest migration iteration.

Signed-off-by: Umesh Deshpande <udeshpan@redhat.com>
---
 arch_init.c     |   10 ++++++++++
 buffered_file.c |    4 ++++
 cpu-all.h       |    2 ++
 cpus.c          |   12 ++++++++++++
 exec.c          |   10 ++++++++++
 qemu-common.h   |    2 ++
 6 files changed, 40 insertions(+), 0 deletions(-)

Comments

Paolo Bonzini Aug. 11, 2011, 4:20 p.m. UTC | #1
> diff --git a/buffered_file.c b/buffered_file.c
> index b64ada7..5735e18 100644
> --- a/buffered_file.c
> +++ b/buffered_file.c
> @@ -243,7 +243,9 @@ static void *migrate_vm(void *opaque)
>
>       while (!s->closed) {
>           if (s->freeze_output) {
> +            qemu_mutex_unlock_iothread();
>               s->wait_for_unfreeze(s);
> +            qemu_mutex_lock_iothread();
>               s->freeze_output = 0;
>               continue;
>           }
> @@ -255,7 +257,9 @@ static void *migrate_vm(void *opaque)
>           current_time = qemu_get_clock_ms(rt_clock);
>           if (!s->closed&&  (expire_time>  current_time)) {
>               tv.tv_usec = 1000 * (expire_time - current_time);
> +            qemu_mutex_unlock_iothread();
>               select(0, NULL, NULL, NULL,&tv);
> +            qemu_mutex_lock_iothread();
>               continue;
>           }
>

I believe these should be already in patch 1 or anyway in a separate patch.

> diff --git a/cpus.c b/cpus.c
> index de70e02..6090c44 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -666,6 +666,7 @@ int qemu_init_main_loop(void)
>       qemu_cond_init(&qemu_work_cond);
>       qemu_mutex_init(&qemu_fair_mutex);
>       qemu_mutex_init(&qemu_global_mutex);
> +    qemu_mutex_init(&ram_list.mutex);
>       qemu_mutex_lock(&qemu_global_mutex);
>
>       qemu_thread_get_self(&io_thread);

These are only executed if CONFIG_IOTHREAD; you can probably move it 
somewhere in exec.c.

> @@ -919,6 +920,17 @@ void qemu_mutex_unlock_iothread(void)
>       qemu_mutex_unlock(&qemu_global_mutex);
>   }
>
> +void qemu_mutex_lock_ramlist(void)
> +{
> +    qemu_mutex_lock(&ram_list.mutex);
> +}
> +
> +void qemu_mutex_unlock_ramlist(void)
> +{
> +    qemu_mutex_unlock(&ram_list.mutex);
> +}
> +
> +

And these too.

>   static int all_vcpus_paused(void)
>   {
>       CPUState *penv = first_cpu;
> diff --git a/exec.c b/exec.c
> index 0e2ce57..7bfb36f 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -2972,6 +2972,8 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
>       }
>       new_block->length = size;
>
> +    qemu_mutex_lock_ramlist();
> +
>       QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
>
>       ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
> @@ -2979,6 +2981,8 @@ ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
>       memset(ram_list.phys_dirty + (new_block->offset>>  TARGET_PAGE_BITS),
>              0xff, size>>  TARGET_PAGE_BITS);
>
> +    qemu_mutex_unlock_ramlist();
> +
>       if (kvm_enabled())
>           kvm_setup_guest_memory(new_block->host, size);
>
> @@ -2996,7 +3000,9 @@ void qemu_ram_free_from_ptr(ram_addr_t addr)
>
>       QLIST_FOREACH(block,&ram_list.blocks, next) {
>           if (addr == block->offset) {
> +            qemu_mutex_lock_ramlist();
>               QLIST_REMOVE(block, next);
> +            qemu_mutex_unlock_ramlist();
>               qemu_free(block);
>               return;
>           }
> @@ -3009,7 +3015,9 @@ void qemu_ram_free(ram_addr_t addr)
>
>       QLIST_FOREACH(block,&ram_list.blocks, next) {
>           if (addr == block->offset) {
> +            qemu_mutex_lock_ramlist();
>               QLIST_REMOVE(block, next);
> +            qemu_mutex_unlock_ramlist();
>               if (block->flags&  RAM_PREALLOC_MASK) {
>                   ;
>               } else if (mem_path) {
> @@ -3117,8 +3125,10 @@ void *qemu_get_ram_ptr(ram_addr_t addr)
>           if (addr - block->offset<  block->length) {
>               /* Move this entry to to start of the list.  */
>               if (block != QLIST_FIRST(&ram_list.blocks)) {
> +                qemu_mutex_lock_ramlist();
>                   QLIST_REMOVE(block, next);
>                   QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
> +                qemu_mutex_unlock_ramlist();

Theoretically qemu_get_ram_ptr should be protected.  The problem is not 
just accessing the ramlist, it is accessing the data underneath it 
before anyone frees it.  Luckily we can set aside that problem for now, 
because qemu_ram_free_from_ptr is only used by device assignment and 
device assignment makes VMs unmigratable.

If we support generic RAM hotplug/hotunplug, we would probably need 
something RCU-like to protect accesses, since protecting all uses of 
qemu_get_ram_ptr is not practical.

>               }
>               if (xen_mapcache_enabled()) {
>                   /* We need to check if the requested address is in the RAM
> diff --git a/qemu-common.h b/qemu-common.h
> index abd7a75..b802883 100644
> --- a/qemu-common.h
> +++ b/qemu-common.h
> @@ -212,6 +212,8 @@ char *qemu_strndup(const char *str, size_t size);
>
>   void qemu_mutex_lock_iothread(void);
>   void qemu_mutex_unlock_iothread(void);
> +void qemu_mutex_lock_ramlist(void);
> +void qemu_mutex_unlock_ramlist(void);
>
>   int qemu_open(const char *name, int flags, ...);
>   ssize_t qemu_write_full(int fd, const void *buf, size_t count)

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Aug. 12, 2011, 6:45 a.m. UTC | #2
On 08/11/2011 06:20 PM, Paolo Bonzini wrote:
>>
>> +                qemu_mutex_lock_ramlist();
>>                   QLIST_REMOVE(block, next);
>>                   QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
>> +                qemu_mutex_unlock_ramlist();
>
> Theoretically qemu_get_ram_ptr should be protected.  The problem is not
> just accessing the ramlist, it is accessing the data underneath it
> before anyone frees it.  Luckily we can set aside that problem for now,
> because qemu_ram_free_from_ptr is only used by device assignment and
> device assignment makes VMs unmigratable.

Hmm, rethinking about it, all the loops in exec.c should be protected 
from the mutex.  That's not too good because qemu_get_ram_ptr is a hot 
path for TCG.  Perhaps you can also avoid the mutex entirely, and just 
disable the above optimization for most-recently-used-block while 
migration is running.  It's not a complete solution, but it could be 
good enough until we have RAM hot-plug/hot-unplug.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Umesh Deshpande Aug. 15, 2011, 6:45 a.m. UTC | #3
On 08/12/2011 02:45 AM, Paolo Bonzini wrote:
> On 08/11/2011 06:20 PM, Paolo Bonzini wrote:
>>>
>>> +                qemu_mutex_lock_ramlist();
>>>                   QLIST_REMOVE(block, next);
>>>                   QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
>>> +                qemu_mutex_unlock_ramlist();
>>
>> Theoretically qemu_get_ram_ptr should be protected.  The problem is not
>> just accessing the ramlist, it is accessing the data underneath it
>> before anyone frees it.  Luckily we can set aside that problem for now,
>> because qemu_ram_free_from_ptr is only used by device assignment and
>> device assignment makes VMs unmigratable.
>
> Hmm, rethinking about it, all the loops in exec.c should be protected 
> from the mutex. 
Other loops in exec.c are just for reading the ram_list members, and the 
migration thread doesn't modify ram_list.
Also, protecting the loops in exec.c would make those functions 
un-callable from the functions that are already holding the ram_list 
mutex to protect themselves against memslot removal (migration thread in 
our case).
> That's not too good because qemu_get_ram_ptr is a hot path for TCG. 
Looks like qemu_get_ram_ptr isn't called from the source side code of 
guest migration.
> Perhaps you can also avoid the mutex entirely, and just disable the 
> above optimization for most-recently-used-block while migration is 
> running.  It's not a complete solution, but it could be good enough 
> until we have RAM hot-plug/hot-unplug.
>
> Paolo

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Marcelo Tosatti Aug. 15, 2011, 7:26 a.m. UTC | #4
On Fri, Aug 12, 2011 at 08:45:50AM +0200, Paolo Bonzini wrote:
> On 08/11/2011 06:20 PM, Paolo Bonzini wrote:
> >>
> >>+                qemu_mutex_lock_ramlist();
> >>                  QLIST_REMOVE(block, next);
> >>                  QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
> >>+                qemu_mutex_unlock_ramlist();
> >
> >Theoretically qemu_get_ram_ptr should be protected.  The problem is not
> >just accessing the ramlist, it is accessing the data underneath it
> >before anyone frees it.  Luckily we can set aside that problem for now,
> >because qemu_ram_free_from_ptr is only used by device assignment and
> >device assignment makes VMs unmigratable.
> 
> Hmm, rethinking about it, all the loops in exec.c should be
> protected from the mutex.  That's not too good because
> qemu_get_ram_ptr is a hot path for TCG.

Right.

>  Perhaps you can also avoid
> the mutex entirely, and just disable the above optimization for
> most-recently-used-block while migration is running.  It's not a
> complete solution, but it could be good enough until we have RAM
> hot-plug/hot-unplug.

Actually the previous patchset does not traverse the ramlist without 
qemu_mutex locked, which is safe versus the most-recently-used-block
optimization.

So, agreed that the new ramlist lock is not necessary for now. 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paolo Bonzini Aug. 15, 2011, 2:10 p.m. UTC | #5
On 08/14/2011 11:45 PM, Umesh Deshpande wrote:
>> That's not too good because qemu_get_ram_ptr is a hot path for TCG.
>
> Looks like qemu_get_ram_ptr isn't called from the source side code of
> guest migration.

Right, but since you are accessing the list from both migration and 
non-migration threads, qemu_get_ram_ptr needs to take the ram_list lock. 
  The ram_list and IO thread mutexes together protect the "next" member, 
and you need to hold them both when modifying the list.

See the reply to Marcelo for a proposed solution.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch_init.c b/arch_init.c
index 484b39d..f0ddda6 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -298,6 +298,11 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
     bytes_transferred_last = bytes_transferred;
     bwidth = qemu_get_clock_ns(rt_clock);
 
+    if (stage != 3) {
+        qemu_mutex_lock_ramlist();
+        qemu_mutex_unlock_iothread();
+    }
+
     while (!qemu_file_rate_limit(f)) {
         int bytes_sent;
 
@@ -308,6 +313,11 @@  int ram_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
         }
     }
 
+    if (stage != 3) {
+        qemu_mutex_lock_iothread();
+        qemu_mutex_unlock_ramlist();
+    }
+
     bwidth = qemu_get_clock_ns(rt_clock) - bwidth;
     bwidth = (bytes_transferred - bytes_transferred_last) / bwidth;
 
diff --git a/buffered_file.c b/buffered_file.c
index b64ada7..5735e18 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -243,7 +243,9 @@  static void *migrate_vm(void *opaque)
 
     while (!s->closed) {
         if (s->freeze_output) {
+            qemu_mutex_unlock_iothread();
             s->wait_for_unfreeze(s);
+            qemu_mutex_lock_iothread();
             s->freeze_output = 0;
             continue;
         }
@@ -255,7 +257,9 @@  static void *migrate_vm(void *opaque)
         current_time = qemu_get_clock_ms(rt_clock);
         if (!s->closed && (expire_time > current_time)) {
             tv.tv_usec = 1000 * (expire_time - current_time);
+            qemu_mutex_unlock_iothread();
             select(0, NULL, NULL, NULL, &tv);
+            qemu_mutex_lock_iothread();
             continue;
         }
 
diff --git a/cpu-all.h b/cpu-all.h
index e839100..6a5dbb3 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -21,6 +21,7 @@ 
 
 #include "qemu-common.h"
 #include "cpu-common.h"
+#include "qemu-thread.h"
 
 /* some important defines:
  *
@@ -931,6 +932,7 @@  typedef struct RAMBlock {
 } RAMBlock;
 
 typedef struct RAMList {
+    QemuMutex mutex;
     uint8_t *phys_dirty;
     QLIST_HEAD(ram, RAMBlock) blocks;
 } RAMList;
diff --git a/cpus.c b/cpus.c
index de70e02..6090c44 100644
--- a/cpus.c
+++ b/cpus.c
@@ -666,6 +666,7 @@  int qemu_init_main_loop(void)
     qemu_cond_init(&qemu_work_cond);
     qemu_mutex_init(&qemu_fair_mutex);
     qemu_mutex_init(&qemu_global_mutex);
+    qemu_mutex_init(&ram_list.mutex);
     qemu_mutex_lock(&qemu_global_mutex);
 
     qemu_thread_get_self(&io_thread);
@@ -919,6 +920,17 @@  void qemu_mutex_unlock_iothread(void)
     qemu_mutex_unlock(&qemu_global_mutex);
 }
 
+void qemu_mutex_lock_ramlist(void)
+{
+    qemu_mutex_lock(&ram_list.mutex);
+}
+
+void qemu_mutex_unlock_ramlist(void)
+{
+    qemu_mutex_unlock(&ram_list.mutex);
+}
+
+
 static int all_vcpus_paused(void)
 {
     CPUState *penv = first_cpu;
diff --git a/exec.c b/exec.c
index 0e2ce57..7bfb36f 100644
--- a/exec.c
+++ b/exec.c
@@ -2972,6 +2972,8 @@  ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
     }
     new_block->length = size;
 
+    qemu_mutex_lock_ramlist();
+
     QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
 
     ram_list.phys_dirty = qemu_realloc(ram_list.phys_dirty,
@@ -2979,6 +2981,8 @@  ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
            0xff, size >> TARGET_PAGE_BITS);
 
+    qemu_mutex_unlock_ramlist();
+
     if (kvm_enabled())
         kvm_setup_guest_memory(new_block->host, size);
 
@@ -2996,7 +3000,9 @@  void qemu_ram_free_from_ptr(ram_addr_t addr)
 
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
+            qemu_mutex_lock_ramlist();
             QLIST_REMOVE(block, next);
+            qemu_mutex_unlock_ramlist();
             qemu_free(block);
             return;
         }
@@ -3009,7 +3015,9 @@  void qemu_ram_free(ram_addr_t addr)
 
     QLIST_FOREACH(block, &ram_list.blocks, next) {
         if (addr == block->offset) {
+            qemu_mutex_lock_ramlist();
             QLIST_REMOVE(block, next);
+            qemu_mutex_unlock_ramlist();
             if (block->flags & RAM_PREALLOC_MASK) {
                 ;
             } else if (mem_path) {
@@ -3117,8 +3125,10 @@  void *qemu_get_ram_ptr(ram_addr_t addr)
         if (addr - block->offset < block->length) {
             /* Move this entry to to start of the list.  */
             if (block != QLIST_FIRST(&ram_list.blocks)) {
+                qemu_mutex_lock_ramlist();
                 QLIST_REMOVE(block, next);
                 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
+                qemu_mutex_unlock_ramlist();
             }
             if (xen_mapcache_enabled()) {
                 /* We need to check if the requested address is in the RAM
diff --git a/qemu-common.h b/qemu-common.h
index abd7a75..b802883 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -212,6 +212,8 @@  char *qemu_strndup(const char *str, size_t size);
 
 void qemu_mutex_lock_iothread(void);
 void qemu_mutex_unlock_iothread(void);
+void qemu_mutex_lock_ramlist(void);
+void qemu_mutex_unlock_ramlist(void);
 
 int qemu_open(const char *name, int flags, ...);
 ssize_t qemu_write_full(int fd, const void *buf, size_t count)