diff mbox

execute kvm_init_vcpu in the end of pc_new_cpu

Message ID 1241619033-21416-1-git-send-email-glommer@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Glauber Costa May 6, 2009, 2:10 p.m. UTC
When we create a new vcpu, we need to make sure that
all of the state it is going to use (apic state, for example)
already exists. We can do it nicely by making sure kvm_init_vcpu
is executed after everything else in cpu creation.

After that, the first call to KVM_SET_LAPIC ioctl will not find an
existant vcpu. So we introduce a function that tell us that the vcpu
is already initialized, and is it safe to call the ioctl.

We force the execution of the KVM_SET_LAPIC from within the new
vcpu thread, that will replace this first initialization call.

Signed-off-by: Glauber Costa <glommer@redhat.com>
---
 hw/apic.c            |   21 +++++++++++----------
 hw/pc.c              |    1 +
 qemu-kvm.c           |   10 ++++++++++
 qemu-kvm.h           |    4 ++++
 target-i386/helper.c |    2 --
 5 files changed, 26 insertions(+), 12 deletions(-)

Comments

Jan Kiszka May 6, 2009, 2:56 p.m. UTC | #1
Glauber Costa wrote:
> When we create a new vcpu, we need to make sure that
> all of the state it is going to use (apic state, for example)
> already exists. We can do it nicely by making sure kvm_init_vcpu
> is executed after everything else in cpu creation.
> 
> After that, the first call to KVM_SET_LAPIC ioctl will not find an
> existant vcpu. So we introduce a function that tell us that the vcpu
> is already initialized, and is it safe to call the ioctl.
> 
> We force the execution of the KVM_SET_LAPIC from within the new
> vcpu thread, that will replace this first initialization call.
> 
> Signed-off-by: Glauber Costa <glommer@redhat.com>
> ---
>  hw/apic.c            |   21 +++++++++++----------
>  hw/pc.c              |    1 +
>  qemu-kvm.c           |   10 ++++++++++
>  qemu-kvm.h           |    4 ++++
>  target-i386/helper.c |    2 --
>  5 files changed, 26 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/apic.c b/hw/apic.c
> index 466fb7e..b7cd18e 100644
> --- a/hw/apic.c
> +++ b/hw/apic.c
> @@ -891,6 +891,15 @@ static void kvm_kernel_lapic_load_from_user(APICState *s)
>  
>  #endif
>  
> +void qemu_kvm_load_lapic(CPUState *env)
> +{
> +#ifdef KVM_CAP_IRQCHIP
> +    if (kvm_enabled() && kvm_vcpu_inited(env) && qemu_kvm_irqchip_in_kernel()) {
> +        kvm_kernel_lapic_load_from_user(env->apic_state);
> +    }
> +#endif
> +}
> +
>  static void apic_save(QEMUFile *f, void *opaque)
>  {
>      APICState *s = opaque;
> @@ -965,11 +974,7 @@ static int apic_load(QEMUFile *f, void *opaque, int version_id)
>      if (version_id >= 2)
>          qemu_get_timer(f, s->timer);
>  
> -#ifdef KVM_CAP_IRQCHIP
> -    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
> -        kvm_kernel_lapic_load_from_user(s);
> -    }
> -#endif
> +    qemu_kvm_load_lapic(s->cpu_env);
>  
>      return 0;
>  }
> @@ -991,11 +996,7 @@ static void apic_reset(void *opaque)
>           */
>          s->lvt[APIC_LVT_LINT0] = 0x700;
>      }
> -#ifdef KVM_CAP_IRQCHIP
> -    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
> -        kvm_kernel_lapic_load_from_user(s);
> -    }
> -#endif
> +    qemu_kvm_load_lapic(s->cpu_env);
>  }
>  
>  static CPUReadMemoryFunc *apic_mem_read[3] = {
> diff --git a/hw/pc.c b/hw/pc.c
> index 34a4d25..1675510 100644
> --- a/hw/pc.c
> +++ b/hw/pc.c
> @@ -854,6 +854,7 @@ CPUState *pc_new_cpu(int cpu, const char *cpu_model, int pci_enabled)
>          if (pci_enabled) {
>              apic_init(env);
>          }
> +    qemu_init_vcpu(env);
>  	return env;

Yeah, it always looks funny when you patch mis-formatted code...

>  }
>  
> diff --git a/qemu-kvm.c b/qemu-kvm.c
> index 8c0d463..8fd80c1 100644
> --- a/qemu-kvm.c
> +++ b/qemu-kvm.c
> @@ -435,6 +435,9 @@ static void *ap_main_loop(void *_env)
>      kvm_create_vcpu(kvm_context, env->cpu_index);
>      kvm_qemu_init_env(env);
>  
> +    /* APIC state creation takes place before we get here. So despite the fact that
> +     * apic_reset() (called by apic_init) will also load the apic state, we have to redo it here
> +     */
>  #ifdef USE_KVM_DEVICE_ASSIGNMENT
>      /* do ioperm for io ports of assigned devices */
>      LIST_FOREACH(data, &ioperm_head, entries)
> @@ -446,6 +449,8 @@ static void *ap_main_loop(void *_env)
>      current_env->kvm_cpu_state.created = 1;
>      pthread_cond_signal(&qemu_vcpu_cond);
>  
> +    qemu_kvm_load_lapic(env);
> +

This feels strange after a first glance, I need to look closer... Ah
wait, found one reason for this feeling: APIC is x86 stuff, but you are
patching generic code.

>      /* and wait for machine initialization */
>      while (!qemu_system_ready)
>  	qemu_cond_wait(&qemu_system_cond);
> @@ -463,6 +468,11 @@ void kvm_init_vcpu(CPUState *env)
>  	qemu_cond_wait(&qemu_vcpu_cond);
>  }
>  
> +int kvm_vcpu_inited(CPUState *env)
> +{
> +    return env->kvm_cpu_state.created;
> +}
> +
>  int kvm_init_ap(void)
>  {
>  #ifdef TARGET_I386
> diff --git a/qemu-kvm.h b/qemu-kvm.h
> index c0549df..6fa9d5a 100644
> --- a/qemu-kvm.h
> +++ b/qemu-kvm.h
> @@ -16,6 +16,7 @@ int kvm_main_loop(void);
>  int kvm_qemu_init(void);
>  int kvm_qemu_create_context(void);
>  int kvm_init_ap(void);
> +int kvm_vcpu_inited(CPUState *env);
>  void kvm_qemu_destroy(void);
>  void kvm_load_registers(CPUState *env);
>  void kvm_save_registers(CPUState *env);
> @@ -31,6 +32,9 @@ int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap);
>  int kvm_qemu_init_env(CPUState *env);
>  int kvm_qemu_check_extension(int ext);
>  void kvm_apic_init(CPUState *env);
> +/* called from vcpu initialization */
> +void qemu_kvm_load_lapic(CPUState *env);
> +
>  int kvm_set_irq(int irq, int level, int *status);
>  
>  int kvm_physical_memory_set_dirty_tracking(int enable);
> diff --git a/target-i386/helper.c b/target-i386/helper.c
> index 719e31e..511b48c 100644
> --- a/target-i386/helper.c
> +++ b/target-i386/helper.c
> @@ -1696,7 +1696,5 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
>      kqemu_init(env);
>  #endif
>  
> -    qemu_init_vcpu(env);
> -
>      return env;
>  }

The reordering of qemu_init_vcpu could also simplify reset management (I
have a patch pending that adds a kvm hook to apic reset for solving it
within the existing scheme). But I would suggest to get an ack from
upstream first, or better even merge this pattern there and then adjust
qemu-kvm. The other way around is calling for troubles if qemu sticks
with a different approach.

Jan
Glauber Costa May 6, 2009, 3:17 p.m. UTC | #2
> > diff --git a/qemu-kvm.c b/qemu-kvm.c
> > index 8c0d463..8fd80c1 100644
> > --- a/qemu-kvm.c
> > +++ b/qemu-kvm.c
> > @@ -435,6 +435,9 @@ static void *ap_main_loop(void *_env)
> >      kvm_create_vcpu(kvm_context, env->cpu_index);
> >      kvm_qemu_init_env(env);
> >  
> > +    /* APIC state creation takes place before we get here. So despite the fact that
> > +     * apic_reset() (called by apic_init) will also load the apic state, we have to redo it here
> > +     */
> >  #ifdef USE_KVM_DEVICE_ASSIGNMENT
> >      /* do ioperm for io ports of assigned devices */
> >      LIST_FOREACH(data, &ioperm_head, entries)
> > @@ -446,6 +449,8 @@ static void *ap_main_loop(void *_env)
> >      current_env->kvm_cpu_state.created = 1;
> >      pthread_cond_signal(&qemu_vcpu_cond);
> >  
> > +    qemu_kvm_load_lapic(env);
> > +
> 
> This feels strange after a first glance, I need to look closer... Ah
> wait, found one reason for this feeling: APIC is x86 stuff, but you are
> patching generic code.
Yeah, I don't disagree. I could wrap it inside an ifdef, but I don't see this
as a strong enough reason to create yet another hook. Maybe we could put this
inside kvm_qemu_init_env()? Although it is not exactly creating any env,
at least it is arch specific...

> 
> >      /* and wait for machine initialization */
> >      while (!qemu_system_ready)
> >  	qemu_cond_wait(&qemu_system_cond);
> > @@ -463,6 +468,11 @@ void kvm_init_vcpu(CPUState *env)
> >  	qemu_cond_wait(&qemu_vcpu_cond);
> >  }
> >  
> > +int kvm_vcpu_inited(CPUState *env)
> > +{
> > +    return env->kvm_cpu_state.created;
> > +}
> > +
> >  int kvm_init_ap(void)
> >  {
> >  #ifdef TARGET_I386
> > diff --git a/qemu-kvm.h b/qemu-kvm.h
> > index c0549df..6fa9d5a 100644
> > --- a/qemu-kvm.h
> > +++ b/qemu-kvm.h
> > @@ -16,6 +16,7 @@ int kvm_main_loop(void);
> >  int kvm_qemu_init(void);
> >  int kvm_qemu_create_context(void);
> >  int kvm_init_ap(void);
> > +int kvm_vcpu_inited(CPUState *env);
> >  void kvm_qemu_destroy(void);
> >  void kvm_load_registers(CPUState *env);
> >  void kvm_save_registers(CPUState *env);
> > @@ -31,6 +32,9 @@ int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap);
> >  int kvm_qemu_init_env(CPUState *env);
> >  int kvm_qemu_check_extension(int ext);
> >  void kvm_apic_init(CPUState *env);
> > +/* called from vcpu initialization */
> > +void qemu_kvm_load_lapic(CPUState *env);
> > +
> >  int kvm_set_irq(int irq, int level, int *status);
> >  
> >  int kvm_physical_memory_set_dirty_tracking(int enable);
> > diff --git a/target-i386/helper.c b/target-i386/helper.c
> > index 719e31e..511b48c 100644
> > --- a/target-i386/helper.c
> > +++ b/target-i386/helper.c
> > @@ -1696,7 +1696,5 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
> >      kqemu_init(env);
> >  #endif
> >  
> > -    qemu_init_vcpu(env);
> > -
> >      return env;
> >  }
> 
> The reordering of qemu_init_vcpu could also simplify reset management (I
> have a patch pending that adds a kvm hook to apic reset for solving it
> within the existing scheme). But I would suggest to get an ack from
> upstream first, or better even merge this pattern there and then adjust
> qemu-kvm. The other way around is calling for troubles if qemu sticks
> with a different approach.

I've just sent a couple of patches do upstream qemu that moves everything inside
cpu_x86_init, and only calls kvm_vcpu_init when everything else is already
initialized. This includes reset management.

The reason I sent this patch separatedly, is that we would have to deal with
the fact that the first call to SET_LAPIC would fail anyway, this is qemu-kvm specific.

And upstream qemu does not have pc_new_cpu, so the clash would not be that big.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kiszka May 6, 2009, 3:43 p.m. UTC | #3
Glauber Costa wrote:
>>> diff --git a/qemu-kvm.c b/qemu-kvm.c
>>> index 8c0d463..8fd80c1 100644
>>> --- a/qemu-kvm.c
>>> +++ b/qemu-kvm.c
>>> @@ -435,6 +435,9 @@ static void *ap_main_loop(void *_env)
>>>      kvm_create_vcpu(kvm_context, env->cpu_index);
>>>      kvm_qemu_init_env(env);
>>>  
>>> +    /* APIC state creation takes place before we get here. So despite the fact that
>>> +     * apic_reset() (called by apic_init) will also load the apic state, we have to redo it here
>>> +     */
>>>  #ifdef USE_KVM_DEVICE_ASSIGNMENT
>>>      /* do ioperm for io ports of assigned devices */
>>>      LIST_FOREACH(data, &ioperm_head, entries)
>>> @@ -446,6 +449,8 @@ static void *ap_main_loop(void *_env)
>>>      current_env->kvm_cpu_state.created = 1;
>>>      pthread_cond_signal(&qemu_vcpu_cond);
>>>  
>>> +    qemu_kvm_load_lapic(env);
>>> +
>> This feels strange after a first glance, I need to look closer... Ah
>> wait, found one reason for this feeling: APIC is x86 stuff, but you are
>> patching generic code.
> Yeah, I don't disagree. I could wrap it inside an ifdef, but I don't see this
> as a strong enough reason to create yet another hook. Maybe we could put this
> inside kvm_qemu_init_env()? Although it is not exactly creating any env,
> at least it is arch specific...
> 
>>>      /* and wait for machine initialization */
>>>      while (!qemu_system_ready)
>>>  	qemu_cond_wait(&qemu_system_cond);
>>> @@ -463,6 +468,11 @@ void kvm_init_vcpu(CPUState *env)
>>>  	qemu_cond_wait(&qemu_vcpu_cond);
>>>  }
>>>  
>>> +int kvm_vcpu_inited(CPUState *env)
>>> +{
>>> +    return env->kvm_cpu_state.created;
>>> +}
>>> +
>>>  int kvm_init_ap(void)
>>>  {
>>>  #ifdef TARGET_I386
>>> diff --git a/qemu-kvm.h b/qemu-kvm.h
>>> index c0549df..6fa9d5a 100644
>>> --- a/qemu-kvm.h
>>> +++ b/qemu-kvm.h
>>> @@ -16,6 +16,7 @@ int kvm_main_loop(void);
>>>  int kvm_qemu_init(void);
>>>  int kvm_qemu_create_context(void);
>>>  int kvm_init_ap(void);
>>> +int kvm_vcpu_inited(CPUState *env);
>>>  void kvm_qemu_destroy(void);
>>>  void kvm_load_registers(CPUState *env);
>>>  void kvm_save_registers(CPUState *env);
>>> @@ -31,6 +32,9 @@ int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap);
>>>  int kvm_qemu_init_env(CPUState *env);
>>>  int kvm_qemu_check_extension(int ext);
>>>  void kvm_apic_init(CPUState *env);
>>> +/* called from vcpu initialization */
>>> +void qemu_kvm_load_lapic(CPUState *env);
>>> +
>>>  int kvm_set_irq(int irq, int level, int *status);
>>>  
>>>  int kvm_physical_memory_set_dirty_tracking(int enable);
>>> diff --git a/target-i386/helper.c b/target-i386/helper.c
>>> index 719e31e..511b48c 100644
>>> --- a/target-i386/helper.c
>>> +++ b/target-i386/helper.c
>>> @@ -1696,7 +1696,5 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
>>>      kqemu_init(env);
>>>  #endif
>>>  
>>> -    qemu_init_vcpu(env);
>>> -
>>>      return env;
>>>  }
>> The reordering of qemu_init_vcpu could also simplify reset management (I
>> have a patch pending that adds a kvm hook to apic reset for solving it
>> within the existing scheme). But I would suggest to get an ack from
>> upstream first, or better even merge this pattern there and then adjust
>> qemu-kvm. The other way around is calling for troubles if qemu sticks
>> with a different approach.
> 
> I've just sent a couple of patches do upstream qemu that moves everything inside
> cpu_x86_init, and only calls kvm_vcpu_init when everything else is already
> initialized. This includes reset management.

Missed that, having a look now.

> 
> The reason I sent this patch separatedly, is that we would have to deal with
> the fact that the first call to SET_LAPIC would fail anyway, this is qemu-kvm specific.
> 
> And upstream qemu does not have pc_new_cpu, so the clash would not be that big.

Yes, but it has qemu_vcpu_init. The only meta-difference is that
upstream has no in-kernel LAPIC yet. It also has to update the kvm state
when changing APIC stuff (its base specifically).

Jan
diff mbox

Patch

diff --git a/hw/apic.c b/hw/apic.c
index 466fb7e..b7cd18e 100644
--- a/hw/apic.c
+++ b/hw/apic.c
@@ -891,6 +891,15 @@  static void kvm_kernel_lapic_load_from_user(APICState *s)
 
 #endif
 
+void qemu_kvm_load_lapic(CPUState *env)
+{
+#ifdef KVM_CAP_IRQCHIP
+    if (kvm_enabled() && kvm_vcpu_inited(env) && qemu_kvm_irqchip_in_kernel()) {
+        kvm_kernel_lapic_load_from_user(env->apic_state);
+    }
+#endif
+}
+
 static void apic_save(QEMUFile *f, void *opaque)
 {
     APICState *s = opaque;
@@ -965,11 +974,7 @@  static int apic_load(QEMUFile *f, void *opaque, int version_id)
     if (version_id >= 2)
         qemu_get_timer(f, s->timer);
 
-#ifdef KVM_CAP_IRQCHIP
-    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
-        kvm_kernel_lapic_load_from_user(s);
-    }
-#endif
+    qemu_kvm_load_lapic(s->cpu_env);
 
     return 0;
 }
@@ -991,11 +996,7 @@  static void apic_reset(void *opaque)
          */
         s->lvt[APIC_LVT_LINT0] = 0x700;
     }
-#ifdef KVM_CAP_IRQCHIP
-    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel()) {
-        kvm_kernel_lapic_load_from_user(s);
-    }
-#endif
+    qemu_kvm_load_lapic(s->cpu_env);
 }
 
 static CPUReadMemoryFunc *apic_mem_read[3] = {
diff --git a/hw/pc.c b/hw/pc.c
index 34a4d25..1675510 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -854,6 +854,7 @@  CPUState *pc_new_cpu(int cpu, const char *cpu_model, int pci_enabled)
         if (pci_enabled) {
             apic_init(env);
         }
+    qemu_init_vcpu(env);
 	return env;
 }
 
diff --git a/qemu-kvm.c b/qemu-kvm.c
index 8c0d463..8fd80c1 100644
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ -435,6 +435,9 @@  static void *ap_main_loop(void *_env)
     kvm_create_vcpu(kvm_context, env->cpu_index);
     kvm_qemu_init_env(env);
 
+    /* APIC state creation takes place before we get here. So despite the fact that
+     * apic_reset() (called by apic_init) will also load the apic state, we have to redo it here
+     */
 #ifdef USE_KVM_DEVICE_ASSIGNMENT
     /* do ioperm for io ports of assigned devices */
     LIST_FOREACH(data, &ioperm_head, entries)
@@ -446,6 +449,8 @@  static void *ap_main_loop(void *_env)
     current_env->kvm_cpu_state.created = 1;
     pthread_cond_signal(&qemu_vcpu_cond);
 
+    qemu_kvm_load_lapic(env);
+
     /* and wait for machine initialization */
     while (!qemu_system_ready)
 	qemu_cond_wait(&qemu_system_cond);
@@ -463,6 +468,11 @@  void kvm_init_vcpu(CPUState *env)
 	qemu_cond_wait(&qemu_vcpu_cond);
 }
 
+int kvm_vcpu_inited(CPUState *env)
+{
+    return env->kvm_cpu_state.created;
+}
+
 int kvm_init_ap(void)
 {
 #ifdef TARGET_I386
diff --git a/qemu-kvm.h b/qemu-kvm.h
index c0549df..6fa9d5a 100644
--- a/qemu-kvm.h
+++ b/qemu-kvm.h
@@ -16,6 +16,7 @@  int kvm_main_loop(void);
 int kvm_qemu_init(void);
 int kvm_qemu_create_context(void);
 int kvm_init_ap(void);
+int kvm_vcpu_inited(CPUState *env);
 void kvm_qemu_destroy(void);
 void kvm_load_registers(CPUState *env);
 void kvm_save_registers(CPUState *env);
@@ -31,6 +32,9 @@  int kvm_update_guest_debug(CPUState *env, unsigned long reinject_trap);
 int kvm_qemu_init_env(CPUState *env);
 int kvm_qemu_check_extension(int ext);
 void kvm_apic_init(CPUState *env);
+/* called from vcpu initialization */
+void qemu_kvm_load_lapic(CPUState *env);
+
 int kvm_set_irq(int irq, int level, int *status);
 
 int kvm_physical_memory_set_dirty_tracking(int enable);
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 719e31e..511b48c 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1696,7 +1696,5 @@  CPUX86State *cpu_x86_init(const char *cpu_model)
     kqemu_init(env);
 #endif
 
-    qemu_init_vcpu(env);
-
     return env;
 }