diff mbox series

[RFC,1/4] target/ppc: TCG: Migrate tb_offset and decr

Message ID 20220224185817.2207228-2-farosas@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series ppc: nested TCG migration (KVM-on-TCG) | expand

Commit Message

Fabiano Rosas Feb. 24, 2022, 6:58 p.m. UTC
These two were not migrated so the remote end was starting with the
decrementer expired.

I am seeing less frequent crashes with this patch (tested with -smp 4
and -smp 32). It certainly doesn't fix all issues but it looks like it
helps.

Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
---
 target/ppc/machine.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

Comments

Richard Henderson Feb. 24, 2022, 8:06 p.m. UTC | #1
On 2/24/22 08:58, Fabiano Rosas wrote:
> These two were not migrated so the remote end was starting with the
> decrementer expired.
> 
> I am seeing less frequent crashes with this patch (tested with -smp 4
> and -smp 32). It certainly doesn't fix all issues but it looks like it
> helps.
> 
> Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
> ---
>   target/ppc/machine.c | 17 +++++++++++++++++
>   1 file changed, 17 insertions(+)
> 
> diff --git a/target/ppc/machine.c b/target/ppc/machine.c
> index 1b63146ed1..7ee1984500 100644
> --- a/target/ppc/machine.c
> +++ b/target/ppc/machine.c
> @@ -9,6 +9,7 @@
>   #include "qemu/main-loop.h"
>   #include "kvm_ppc.h"
>   #include "power8-pmu.h"
> +#include "hw/ppc/ppc.h"
>   
>   static void post_load_update_msr(CPUPPCState *env)
>   {
> @@ -666,6 +667,18 @@ static const VMStateDescription vmstate_compat = {
>       }
>   };
>   
> +static const VMStateDescription vmstate_tb_env = {
> +    .name = "cpu/env/tb_env",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_INT64(tb_offset, ppc_tb_t),
> +        VMSTATE_UINT64(decr_next, ppc_tb_t),
> +        VMSTATE_TIMER_PTR(decr_timer, ppc_tb_t),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>   const VMStateDescription vmstate_ppc_cpu = {
>       .name = "cpu",
>       .version_id = 5,
> @@ -696,12 +709,16 @@ const VMStateDescription vmstate_ppc_cpu = {
>           /* Backward compatible internal state */
>           VMSTATE_UINTTL(env.hflags_compat_nmsr, PowerPCCPU),
>   
> +        VMSTATE_STRUCT_POINTER_V(env.tb_env, PowerPCCPU, 1,
> +                                 vmstate_tb_env, ppc_tb_t),
> +
>           /* Sanity checking */
>           VMSTATE_UINTTL_TEST(mig_msr_mask, PowerPCCPU, cpu_pre_2_8_migration),
>           VMSTATE_UINT64_TEST(mig_insns_flags, PowerPCCPU, cpu_pre_2_8_migration),
>           VMSTATE_UINT64_TEST(mig_insns_flags2, PowerPCCPU,
>                               cpu_pre_2_8_migration),
>           VMSTATE_UINT32_TEST(mig_nb_BATs, PowerPCCPU, cpu_pre_2_8_migration),
> +
>           VMSTATE_END_OF_LIST()
>       },
>       .subsections = (const VMStateDescription*[]) {

I think the new struct should go into a subsection.

r~
David Gibson Feb. 25, 2022, 3:15 a.m. UTC | #2
On Thu, Feb 24, 2022 at 03:58:14PM -0300, Fabiano Rosas wrote:
> These two were not migrated so the remote end was starting with the
> decrementer expired.
> 
> I am seeing less frequent crashes with this patch (tested with -smp 4
> and -smp 32). It certainly doesn't fix all issues but it looks like it
> helps.
> 
> Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>

Nack.  This completely breaks migration compatibility for all ppc
machines.  In order to maintain compatibility as Richard says new info
has to go into a subsection, with a needed function that allows
migration of existing machine types both to and from a new qemu
version.

There are also some other problems.

> ---
>  target/ppc/machine.c | 17 +++++++++++++++++
>  1 file changed, 17 insertions(+)
> 
> diff --git a/target/ppc/machine.c b/target/ppc/machine.c
> index 1b63146ed1..7ee1984500 100644
> --- a/target/ppc/machine.c
> +++ b/target/ppc/machine.c
> @@ -9,6 +9,7 @@
>  #include "qemu/main-loop.h"
>  #include "kvm_ppc.h"
>  #include "power8-pmu.h"
> +#include "hw/ppc/ppc.h"
>  
>  static void post_load_update_msr(CPUPPCState *env)
>  {
> @@ -666,6 +667,18 @@ static const VMStateDescription vmstate_compat = {
>      }
>  };
>  
> +static const VMStateDescription vmstate_tb_env = {
> +    .name = "cpu/env/tb_env",

Because spapr requires that all cpus have synchronized timebases, we
migrate the timebase state from vmstate_spapr, not from each cpu
individually, to make sure it stays synchronized across migration.  If
that's not working right, that's a bug, but it needs to be fixed
there, not just plastered over with extra information transmitted at
cpu level.

> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_INT64(tb_offset, ppc_tb_t),

tb_offset isn't a good thing to put directly in the migration stream.
tb_offset has kind of non-obvious semantics to begin with: when we're
dealing with TCG (which is your explicit case), there is no host TB,
so what's this an offset from?  That's why vmstate_ppc_timebase uses
an explicit guest timebase value matched with a time of day in real
units.  Again, if there's a bug, that needs fixing there.

> +        VMSTATE_UINT64(decr_next, ppc_tb_t),
> +        VMSTATE_TIMER_PTR(decr_timer, ppc_tb_t),

You're attempting to migrate decr_next and decr_timer, but not the
actual DECR value, which makes me suspect that *is* being migrated.
In which case you should be able to reconstruct the next tick and
timer state in a post_load function on receive.  If that's buggy, it
needs to be fixed there.

> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  const VMStateDescription vmstate_ppc_cpu = {
>      .name = "cpu",
>      .version_id = 5,
> @@ -696,12 +709,16 @@ const VMStateDescription vmstate_ppc_cpu = {
>          /* Backward compatible internal state */
>          VMSTATE_UINTTL(env.hflags_compat_nmsr, PowerPCCPU),
>  
> +        VMSTATE_STRUCT_POINTER_V(env.tb_env, PowerPCCPU, 1,
> +                                 vmstate_tb_env, ppc_tb_t),
> +
>          /* Sanity checking */
>          VMSTATE_UINTTL_TEST(mig_msr_mask, PowerPCCPU, cpu_pre_2_8_migration),
>          VMSTATE_UINT64_TEST(mig_insns_flags, PowerPCCPU, cpu_pre_2_8_migration),
>          VMSTATE_UINT64_TEST(mig_insns_flags2, PowerPCCPU,
>                              cpu_pre_2_8_migration),
>          VMSTATE_UINT32_TEST(mig_nb_BATs, PowerPCCPU, cpu_pre_2_8_migration),
> +
>          VMSTATE_END_OF_LIST()
>      },
>      .subsections = (const VMStateDescription*[]) {
Fabiano Rosas Feb. 25, 2022, 4:08 p.m. UTC | #3
David Gibson <david@gibson.dropbear.id.au> writes:

> On Thu, Feb 24, 2022 at 03:58:14PM -0300, Fabiano Rosas wrote:
>> These two were not migrated so the remote end was starting with the
>> decrementer expired.
>> 
>> I am seeing less frequent crashes with this patch (tested with -smp 4
>> and -smp 32). It certainly doesn't fix all issues but it looks like it
>> helps.
>> 
>> Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
>
> Nack.  This completely breaks migration compatibility for all ppc
> machines.  In order to maintain compatibility as Richard says new info
> has to go into a subsection, with a needed function that allows
> migration of existing machine types both to and from a new qemu
> version.

Ok, I'm still learning the tenets of migration. I'll give more thought
to that in the next versions.

>
> There are also some other problems.
>
>> ---
>>  target/ppc/machine.c | 17 +++++++++++++++++
>>  1 file changed, 17 insertions(+)
>> 
>> diff --git a/target/ppc/machine.c b/target/ppc/machine.c
>> index 1b63146ed1..7ee1984500 100644
>> --- a/target/ppc/machine.c
>> +++ b/target/ppc/machine.c
>> @@ -9,6 +9,7 @@
>>  #include "qemu/main-loop.h"
>>  #include "kvm_ppc.h"
>>  #include "power8-pmu.h"
>> +#include "hw/ppc/ppc.h"
>>  
>>  static void post_load_update_msr(CPUPPCState *env)
>>  {
>> @@ -666,6 +667,18 @@ static const VMStateDescription vmstate_compat = {
>>      }
>>  };
>>  
>> +static const VMStateDescription vmstate_tb_env = {
>> +    .name = "cpu/env/tb_env",
>
> Because spapr requires that all cpus have synchronized timebases, we
> migrate the timebase state from vmstate_spapr, not from each cpu
> individually, to make sure it stays synchronized across migration.  If
> that's not working right, that's a bug, but it needs to be fixed
> there, not just plastered over with extra information transmitted at
> cpu level.

Ok, so it that what guest_timebase is about? We shouldn't need to
migrate DECR individually then.

>> +    .version_id = 1,
>> +    .minimum_version_id = 1,
>> +    .fields = (VMStateField[]) {
>> +        VMSTATE_INT64(tb_offset, ppc_tb_t),
>
> tb_offset isn't a good thing to put directly in the migration stream.
> tb_offset has kind of non-obvious semantics to begin with: when we're
> dealing with TCG (which is your explicit case), there is no host TB,
> so what's this an offset from?  That's why vmstate_ppc_timebase uses
> an explicit guest timebase value matched with a time of day in real
> units.  Again, if there's a bug, that needs fixing there.

This should be in patch 4, but tb_offset is needed for the nested
case. When we enter the nested guest we increment tb_offset with
nested_tb_offset and decrement it when leaving the nested guest. So
tb_offset needs to carry this value over.

But maybe we could alternatively modify the nested code to just zero
tb_offset when leaving the guest instead? We could then remove
nested_tb_offset altogether.

>> +        VMSTATE_UINT64(decr_next, ppc_tb_t),
>> +        VMSTATE_TIMER_PTR(decr_timer, ppc_tb_t),
>
> You're attempting to migrate decr_next and decr_timer, but not the
> actual DECR value, which makes me suspect that *is* being migrated.
> In which case you should be able to reconstruct the next tick and
> timer state in a post_load function on receive.  If that's buggy, it
> needs to be fixed there.

There isn't any "actual DECR" when running TCG, is there? My
understanding is that it is embedded in the QEMUTimer.

Mark mentioned this years ago:

 "I can't see anything in __cpu_ppc_store_decr() that
 updates the spr[SPR_DECR] value when the decrementer register is
 changed"
 
 https://lists.gnu.org/archive/html/qemu-devel/2016-01/msg01114.html

Your answer was along the lines of:

 "we should be reconstructing the decrementer on
 the destination based on an offset from the timebase"

 https://lists.gnu.org/archive/html/qemu-devel/2016-01/msg01373.html

So if I'm getting this right, in TCG we don't touch SPR_DECR because we
only effectively care about what is in the decr_timer->expire_time.

And in KVM we don't migrate DECR explicitly because we use the tb_offset
and timebase_save/timebase_load to ensure the tb_offset in the
destination has the correct value.

But timebase_save/timebase_load are not used for TCG currently. So there
would be nothing transfering the current decr value to the other side.

>> +        VMSTATE_END_OF_LIST()
>> +    }
>> +};
>> +
>>  const VMStateDescription vmstate_ppc_cpu = {
>>      .name = "cpu",
>>      .version_id = 5,
>> @@ -696,12 +709,16 @@ const VMStateDescription vmstate_ppc_cpu = {
>>          /* Backward compatible internal state */
>>          VMSTATE_UINTTL(env.hflags_compat_nmsr, PowerPCCPU),
>>  
>> +        VMSTATE_STRUCT_POINTER_V(env.tb_env, PowerPCCPU, 1,
>> +                                 vmstate_tb_env, ppc_tb_t),
>> +
>>          /* Sanity checking */
>>          VMSTATE_UINTTL_TEST(mig_msr_mask, PowerPCCPU, cpu_pre_2_8_migration),
>>          VMSTATE_UINT64_TEST(mig_insns_flags, PowerPCCPU, cpu_pre_2_8_migration),
>>          VMSTATE_UINT64_TEST(mig_insns_flags2, PowerPCCPU,
>>                              cpu_pre_2_8_migration),
>>          VMSTATE_UINT32_TEST(mig_nb_BATs, PowerPCCPU, cpu_pre_2_8_migration),
>> +
>>          VMSTATE_END_OF_LIST()
>>      },
>>      .subsections = (const VMStateDescription*[]) {
David Gibson Feb. 28, 2022, 2:04 a.m. UTC | #4
On Fri, Feb 25, 2022 at 01:08:10PM -0300, Fabiano Rosas wrote:
> David Gibson <david@gibson.dropbear.id.au> writes:
> 
> > On Thu, Feb 24, 2022 at 03:58:14PM -0300, Fabiano Rosas wrote:
> >> These two were not migrated so the remote end was starting with the
> >> decrementer expired.
> >> 
> >> I am seeing less frequent crashes with this patch (tested with -smp 4
> >> and -smp 32). It certainly doesn't fix all issues but it looks like it
> >> helps.
> >> 
> >> Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
> >
> > Nack.  This completely breaks migration compatibility for all ppc
> > machines.  In order to maintain compatibility as Richard says new info
> > has to go into a subsection, with a needed function that allows
> > migration of existing machine types both to and from a new qemu
> > version.
> 
> Ok, I'm still learning the tenets of migration. I'll give more thought
> to that in the next versions.

Fair enough.  I'd had a very frustrating week for entirely unrelated
reasons, so I was probably a bit unfairly critical.

> > There are also some other problems.
> >
> >> ---
> >>  target/ppc/machine.c | 17 +++++++++++++++++
> >>  1 file changed, 17 insertions(+)
> >> 
> >> diff --git a/target/ppc/machine.c b/target/ppc/machine.c
> >> index 1b63146ed1..7ee1984500 100644
> >> --- a/target/ppc/machine.c
> >> +++ b/target/ppc/machine.c
> >> @@ -9,6 +9,7 @@
> >>  #include "qemu/main-loop.h"
> >>  #include "kvm_ppc.h"
> >>  #include "power8-pmu.h"
> >> +#include "hw/ppc/ppc.h"
> >>  
> >>  static void post_load_update_msr(CPUPPCState *env)
> >>  {
> >> @@ -666,6 +667,18 @@ static const VMStateDescription vmstate_compat = {
> >>      }
> >>  };
> >>  
> >> +static const VMStateDescription vmstate_tb_env = {
> >> +    .name = "cpu/env/tb_env",
> >
> > Because spapr requires that all cpus have synchronized timebases, we
> > migrate the timebase state from vmstate_spapr, not from each cpu
> > individually, to make sure it stays synchronized across migration.  If
> > that's not working right, that's a bug, but it needs to be fixed
> > there, not just plastered over with extra information transmitted at
> > cpu level.
> 
> Ok, so it that what guest_timebase is about? We shouldn't need to
> migrate DECR individually then.

Probably not.  Unlike the TB there is obviously per-cpu state that has
to be migrated for DECR, and I'm not immediately sure how that's
handled right now.  I think we would be a lot more broken if we
weren't currently migrating the DECRs in at least most ordinary
circumstances.

> >> +    .version_id = 1,
> >> +    .minimum_version_id = 1,
> >> +    .fields = (VMStateField[]) {
> >> +        VMSTATE_INT64(tb_offset, ppc_tb_t),
> >
> > tb_offset isn't a good thing to put directly in the migration stream.
> > tb_offset has kind of non-obvious semantics to begin with: when we're
> > dealing with TCG (which is your explicit case), there is no host TB,
> > so what's this an offset from?  That's why vmstate_ppc_timebase uses
> > an explicit guest timebase value matched with a time of day in real
> > units.  Again, if there's a bug, that needs fixing there.
> 
> This should be in patch 4, but tb_offset is needed for the nested
> case. When we enter the nested guest we increment tb_offset with
> nested_tb_offset and decrement it when leaving the nested guest. So
> tb_offset needs to carry this value over.

Yeah.. that's not really going to work.  We'll have to instead
reconstruct tb_offset from the real-time based stuff we have, then use
that on the destination where we need it.

> But maybe we could alternatively modify the nested code to just zero
> tb_offset when leaving the guest instead? We could then remove
> nested_tb_offset altogether.

Uh.. maybe?  I don't remember how the nested implementation works well
enough to quickly assess if that will work.

> 
> >> +        VMSTATE_UINT64(decr_next, ppc_tb_t),
> >> +        VMSTATE_TIMER_PTR(decr_timer, ppc_tb_t),
> >
> > You're attempting to migrate decr_next and decr_timer, but not the
> > actual DECR value, which makes me suspect that *is* being migrated.
> > In which case you should be able to reconstruct the next tick and
> > timer state in a post_load function on receive.  If that's buggy, it
> > needs to be fixed there.
> 
> There isn't any "actual DECR" when running TCG, is there? My
> understanding is that it is embedded in the QEMUTimer.
> 
> Mark mentioned this years ago:
> 
>  "I can't see anything in __cpu_ppc_store_decr() that
>  updates the spr[SPR_DECR] value when the decrementer register is
>  changed"
>  
>  https://lists.gnu.org/archive/html/qemu-devel/2016-01/msg01114.html
> 
> Your answer was along the lines of:
> 
>  "we should be reconstructing the decrementer on
>  the destination based on an offset from the timebase"
> 
>  https://lists.gnu.org/archive/html/qemu-devel/2016-01/msg01373.html
> 
> So if I'm getting this right, in TCG we don't touch SPR_DECR because we
> only effectively care about what is in the decr_timer->expire_time.
> 
> And in KVM we don't migrate DECR explicitly because we use the tb_offset
> and timebase_save/timebase_load to ensure the tb_offset in the
> destination has the correct value.
> 
> But timebase_save/timebase_load are not used for TCG currently. So there
> would be nothing transfering the current decr value to the other side.

Ah.. good points.  What we need to make sure of is that all the values
which spr_read_decr / gen_helper_load_decr needs to make it's
calculation are correctly migrated.

> >> +        VMSTATE_END_OF_LIST()
> >> +    }
> >> +};
> >> +
> >>  const VMStateDescription vmstate_ppc_cpu = {
> >>      .name = "cpu",
> >>      .version_id = 5,
> >> @@ -696,12 +709,16 @@ const VMStateDescription vmstate_ppc_cpu = {
> >>          /* Backward compatible internal state */
> >>          VMSTATE_UINTTL(env.hflags_compat_nmsr, PowerPCCPU),
> >>  
> >> +        VMSTATE_STRUCT_POINTER_V(env.tb_env, PowerPCCPU, 1,
> >> +                                 vmstate_tb_env, ppc_tb_t),
> >> +
> >>          /* Sanity checking */
> >>          VMSTATE_UINTTL_TEST(mig_msr_mask, PowerPCCPU, cpu_pre_2_8_migration),
> >>          VMSTATE_UINT64_TEST(mig_insns_flags, PowerPCCPU, cpu_pre_2_8_migration),
> >>          VMSTATE_UINT64_TEST(mig_insns_flags2, PowerPCCPU,
> >>                              cpu_pre_2_8_migration),
> >>          VMSTATE_UINT32_TEST(mig_nb_BATs, PowerPCCPU, cpu_pre_2_8_migration),
> >> +
> >>          VMSTATE_END_OF_LIST()
> >>      },
> >>      .subsections = (const VMStateDescription*[]) {
>
diff mbox series

Patch

diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index 1b63146ed1..7ee1984500 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -9,6 +9,7 @@ 
 #include "qemu/main-loop.h"
 #include "kvm_ppc.h"
 #include "power8-pmu.h"
+#include "hw/ppc/ppc.h"
 
 static void post_load_update_msr(CPUPPCState *env)
 {
@@ -666,6 +667,18 @@  static const VMStateDescription vmstate_compat = {
     }
 };
 
+static const VMStateDescription vmstate_tb_env = {
+    .name = "cpu/env/tb_env",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_INT64(tb_offset, ppc_tb_t),
+        VMSTATE_UINT64(decr_next, ppc_tb_t),
+        VMSTATE_TIMER_PTR(decr_timer, ppc_tb_t),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 const VMStateDescription vmstate_ppc_cpu = {
     .name = "cpu",
     .version_id = 5,
@@ -696,12 +709,16 @@  const VMStateDescription vmstate_ppc_cpu = {
         /* Backward compatible internal state */
         VMSTATE_UINTTL(env.hflags_compat_nmsr, PowerPCCPU),
 
+        VMSTATE_STRUCT_POINTER_V(env.tb_env, PowerPCCPU, 1,
+                                 vmstate_tb_env, ppc_tb_t),
+
         /* Sanity checking */
         VMSTATE_UINTTL_TEST(mig_msr_mask, PowerPCCPU, cpu_pre_2_8_migration),
         VMSTATE_UINT64_TEST(mig_insns_flags, PowerPCCPU, cpu_pre_2_8_migration),
         VMSTATE_UINT64_TEST(mig_insns_flags2, PowerPCCPU,
                             cpu_pre_2_8_migration),
         VMSTATE_UINT32_TEST(mig_nb_BATs, PowerPCCPU, cpu_pre_2_8_migration),
+
         VMSTATE_END_OF_LIST()
     },
     .subsections = (const VMStateDescription*[]) {