[v4] ppc: make idle_timer a per-cpu variable
diff mbox series

Message ID 156352619712.50279.1247507600735238783.stgit@lep8c.aus.stglabs.ibm.com
State New
Headers show
Series
  • [v4] ppc: make idle_timer a per-cpu variable
Related show

Commit Message

Shivaprasad G Bhat July 19, 2019, 8:51 a.m. UTC
The current code is broken for more than vcpu as
each thread would overwrite idle_timer and there were
memory leaks.

Make it part of PowerPCCPU so that every thread has a
separate one. Avoid using the timer_new_ns which is
not the preferred way to create timers.

Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
Reviewed-by: Greg Kurz <groug@kaod.org>
---
 v3: https://lists.gnu.org/archive/html/qemu-devel/2019-07/msg04375.html
 Changes from v3:
    - Calling timer_del() before timer_deinit()

 target/ppc/cpu.h |    1 +
 target/ppc/kvm.c |   32 +++++++++++++++++---------------
 2 files changed, 18 insertions(+), 15 deletions(-)

Comments

David Gibson July 22, 2019, 8:44 a.m. UTC | #1
On Fri, Jul 19, 2019 at 03:51:26AM -0500, Shivaprasad G Bhat wrote:
> The current code is broken for more than vcpu as
> each thread would overwrite idle_timer and there were
> memory leaks.
> 
> Make it part of PowerPCCPU so that every thread has a
> separate one. Avoid using the timer_new_ns which is
> not the preferred way to create timers.
> 
> Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
> Reviewed-by: Greg Kurz <groug@kaod.org>

So, this looks correct, but..

..the whole idle_timer logic is used only in the case that we're
running with a KVM that doesn't support KVM_CAP_PPC_IRQ_LEVEL.  That
support appears to have been in since around v2.6.36 some 9 years
ago.  We could probably just drop support for such old kernels in qemu
instead.

> ---
>  v3: https://lists.gnu.org/archive/html/qemu-devel/2019-07/msg04375.html
>  Changes from v3:
>     - Calling timer_del() before timer_deinit()
> 
>  target/ppc/cpu.h |    1 +
>  target/ppc/kvm.c |   32 +++++++++++++++++---------------
>  2 files changed, 18 insertions(+), 15 deletions(-)
> 
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index c9beba2a5c..521086d91a 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -1190,6 +1190,7 @@ struct PowerPCCPU {
>      void *machine_data;
>      int32_t node_id; /* NUMA node this CPU belongs to */
>      PPCHash64Options *hash64_opts;
> +    QEMUTimer idle_timer;
>  
>      /* Fields related to migration compatibility hacks */
>      bool pre_2_8_migration;
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index 8a06d3171e..52d3292f45 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -87,18 +87,6 @@ static int cap_large_decr;
>  
>  static uint32_t debug_inst_opcode;
>  
> -/*
> - * XXX We have a race condition where we actually have a level triggered
> - *     interrupt, but the infrastructure can't expose that yet, so the guest
> - *     takes but ignores it, goes to sleep and never gets notified that there's
> - *     still an interrupt pending.
> - *
> - *     As a quick workaround, let's just wake up again 20 ms after we injected
> - *     an interrupt. That way we can assure that we're always reinjecting
> - *     interrupts in case the guest swallowed them.
> - */
> -static QEMUTimer *idle_timer;
> -
>  static void kvm_kick_cpu(void *opaque)
>  {
>      PowerPCCPU *cpu = opaque;
> @@ -491,7 +479,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
>          return ret;
>      }
>  
> -    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
> +    timer_init_ns(&cpu->idle_timer, QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
>  
>      switch (cenv->mmu_model) {
>      case POWERPC_MMU_BOOKE206:
> @@ -523,6 +511,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
>  
>  int kvm_arch_destroy_vcpu(CPUState *cs)
>  {
> +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> +
> +    timer_del(&cpu->idle_timer);
> +    timer_deinit(&cpu->idle_timer);
> +
>      return 0;
>  }
>  
> @@ -1379,8 +1372,17 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
>              printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
>          }
>  
> -        /* Always wake up soon in case the interrupt was level based */
> -        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> +        /*
> +         * XXX We have a race condition where we actually have a level
> +         *     triggered interrupt, but the infrastructure can't expose that
> +         *     yet, so the guest takes but ignores it, goes to sleep and
> +         *     never gets notified that there's still an interrupt pending.
> +         *
> +         *     As a quick workaround, let's just wake up again 20 ms after
> +         *     we injected an interrupt. That way we can assure that we're
> +         *     always reinjecting interrupts in case the guest swallowed them.
> +         */
> +        timer_mod(&cpu->idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
>                         (NANOSECONDS_PER_SECOND / 50));
>      }
>  
>
Greg Kurz July 22, 2019, 2:44 p.m. UTC | #2
On Mon, 22 Jul 2019 18:44:24 +1000
David Gibson <david@gibson.dropbear.id.au> wrote:

> On Fri, Jul 19, 2019 at 03:51:26AM -0500, Shivaprasad G Bhat wrote:
> > The current code is broken for more than vcpu as
> > each thread would overwrite idle_timer and there were
> > memory leaks.
> > 
> > Make it part of PowerPCCPU so that every thread has a
> > separate one. Avoid using the timer_new_ns which is
> > not the preferred way to create timers.
> > 
> > Signed-off-by: Shivaprasad G Bhat <sbhat@linux.ibm.com>
> > Reviewed-by: Greg Kurz <groug@kaod.org>
> 
> So, this looks correct, but..
> 
> ..the whole idle_timer logic is used only in the case that we're
> running with a KVM that doesn't support KVM_CAP_PPC_IRQ_LEVEL.  That
> support appears to have been in since around v2.6.36 some 9 years
> ago.  We could probably just drop support for such old kernels in qemu
> instead.
> 

Ha ! Yes, this is likely the best thing to do :)

> > ---
> >  v3: https://lists.gnu.org/archive/html/qemu-devel/2019-07/msg04375.html
> >  Changes from v3:
> >     - Calling timer_del() before timer_deinit()
> > 
> >  target/ppc/cpu.h |    1 +
> >  target/ppc/kvm.c |   32 +++++++++++++++++---------------
> >  2 files changed, 18 insertions(+), 15 deletions(-)
> > 
> > diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> > index c9beba2a5c..521086d91a 100644
> > --- a/target/ppc/cpu.h
> > +++ b/target/ppc/cpu.h
> > @@ -1190,6 +1190,7 @@ struct PowerPCCPU {
> >      void *machine_data;
> >      int32_t node_id; /* NUMA node this CPU belongs to */
> >      PPCHash64Options *hash64_opts;
> > +    QEMUTimer idle_timer;
> >  
> >      /* Fields related to migration compatibility hacks */
> >      bool pre_2_8_migration;
> > diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> > index 8a06d3171e..52d3292f45 100644
> > --- a/target/ppc/kvm.c
> > +++ b/target/ppc/kvm.c
> > @@ -87,18 +87,6 @@ static int cap_large_decr;
> >  
> >  static uint32_t debug_inst_opcode;
> >  
> > -/*
> > - * XXX We have a race condition where we actually have a level triggered
> > - *     interrupt, but the infrastructure can't expose that yet, so the guest
> > - *     takes but ignores it, goes to sleep and never gets notified that there's
> > - *     still an interrupt pending.
> > - *
> > - *     As a quick workaround, let's just wake up again 20 ms after we injected
> > - *     an interrupt. That way we can assure that we're always reinjecting
> > - *     interrupts in case the guest swallowed them.
> > - */
> > -static QEMUTimer *idle_timer;
> > -
> >  static void kvm_kick_cpu(void *opaque)
> >  {
> >      PowerPCCPU *cpu = opaque;
> > @@ -491,7 +479,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
> >          return ret;
> >      }
> >  
> > -    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
> > +    timer_init_ns(&cpu->idle_timer, QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
> >  
> >      switch (cenv->mmu_model) {
> >      case POWERPC_MMU_BOOKE206:
> > @@ -523,6 +511,11 @@ int kvm_arch_init_vcpu(CPUState *cs)
> >  
> >  int kvm_arch_destroy_vcpu(CPUState *cs)
> >  {
> > +    PowerPCCPU *cpu = POWERPC_CPU(cs);
> > +
> > +    timer_del(&cpu->idle_timer);
> > +    timer_deinit(&cpu->idle_timer);
> > +
> >      return 0;
> >  }
> >  
> > @@ -1379,8 +1372,17 @@ void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
> >              printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
> >          }
> >  
> > -        /* Always wake up soon in case the interrupt was level based */
> > -        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> > +        /*
> > +         * XXX We have a race condition where we actually have a level
> > +         *     triggered interrupt, but the infrastructure can't expose that
> > +         *     yet, so the guest takes but ignores it, goes to sleep and
> > +         *     never gets notified that there's still an interrupt pending.
> > +         *
> > +         *     As a quick workaround, let's just wake up again 20 ms after
> > +         *     we injected an interrupt. That way we can assure that we're
> > +         *     always reinjecting interrupts in case the guest swallowed them.
> > +         */
> > +        timer_mod(&cpu->idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
> >                         (NANOSECONDS_PER_SECOND / 50));
> >      }
> >  
> > 
>

Patch
diff mbox series

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index c9beba2a5c..521086d91a 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -1190,6 +1190,7 @@  struct PowerPCCPU {
     void *machine_data;
     int32_t node_id; /* NUMA node this CPU belongs to */
     PPCHash64Options *hash64_opts;
+    QEMUTimer idle_timer;
 
     /* Fields related to migration compatibility hacks */
     bool pre_2_8_migration;
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 8a06d3171e..52d3292f45 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -87,18 +87,6 @@  static int cap_large_decr;
 
 static uint32_t debug_inst_opcode;
 
-/*
- * XXX We have a race condition where we actually have a level triggered
- *     interrupt, but the infrastructure can't expose that yet, so the guest
- *     takes but ignores it, goes to sleep and never gets notified that there's
- *     still an interrupt pending.
- *
- *     As a quick workaround, let's just wake up again 20 ms after we injected
- *     an interrupt. That way we can assure that we're always reinjecting
- *     interrupts in case the guest swallowed them.
- */
-static QEMUTimer *idle_timer;
-
 static void kvm_kick_cpu(void *opaque)
 {
     PowerPCCPU *cpu = opaque;
@@ -491,7 +479,7 @@  int kvm_arch_init_vcpu(CPUState *cs)
         return ret;
     }
 
-    idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
+    timer_init_ns(&cpu->idle_timer, QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 
     switch (cenv->mmu_model) {
     case POWERPC_MMU_BOOKE206:
@@ -523,6 +511,11 @@  int kvm_arch_init_vcpu(CPUState *cs)
 
 int kvm_arch_destroy_vcpu(CPUState *cs)
 {
+    PowerPCCPU *cpu = POWERPC_CPU(cs);
+
+    timer_del(&cpu->idle_timer);
+    timer_deinit(&cpu->idle_timer);
+
     return 0;
 }
 
@@ -1379,8 +1372,17 @@  void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
         }
 
-        /* Always wake up soon in case the interrupt was level based */
-        timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
+        /*
+         * XXX We have a race condition where we actually have a level
+         *     triggered interrupt, but the infrastructure can't expose that
+         *     yet, so the guest takes but ignores it, goes to sleep and
+         *     never gets notified that there's still an interrupt pending.
+         *
+         *     As a quick workaround, let's just wake up again 20 ms after
+         *     we injected an interrupt. That way we can assure that we're
+         *     always reinjecting interrupts in case the guest swallowed them.
+         */
+        timer_mod(&cpu->idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
                        (NANOSECONDS_PER_SECOND / 50));
     }