diff mbox series

[RFC,4/4] spapr: Add KVM-on-TCG migration support

Message ID 20220224185817.2207228-5-farosas@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series ppc: nested TCG migration (KVM-on-TCG) | expand

Commit Message

Fabiano Rosas Feb. 24, 2022, 6:58 p.m. UTC
This adds migration support for TCG pseries machines running a KVM-HV
guest.

The state that needs to be migrated is:

- the nested PTCR value;
- the in_nested flag;
- the nested_tb_offset.
- the saved host CPUPPCState structure;

Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>

---
(this migrates just fine with L2 running stress and 1 VCPU in L1. With
32 VCPUs in L1 there's crashes which I still don't understand. They might
be related to L1 migration being flaky right now)
---
 hw/ppc/spapr.c          | 19 +++++++++++
 hw/ppc/spapr_cpu_core.c | 76 +++++++++++++++++++++++++++++++++++++++++
 target/ppc/machine.c    | 44 ++++++++++++++++++++++++
 3 files changed, 139 insertions(+)

Comments

Nicholas Piggin Feb. 25, 2022, 12:51 a.m. UTC | #1
Excerpts from Fabiano Rosas's message of February 25, 2022 4:58 am:
> This adds migration support for TCG pseries machines running a KVM-HV
> guest.
> 
> The state that needs to be migrated is:
> 
> - the nested PTCR value;
> - the in_nested flag;
> - the nested_tb_offset.
> - the saved host CPUPPCState structure;
> 
> Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>

The series generally looks good to me, I guess patches 1 and 2 are
fixes that could go ahead.

Main thing about this is I was thinking of cutting down the CPUPPCState
structure for saving the host state when in the L2, and making a
specific structure for that that only contains what is required.

This patch could easily switch to that so it's no big deal AFAIKS.

> diff --git a/target/ppc/machine.c b/target/ppc/machine.c
> index 7ee1984500..ae09b1bcfe 100644
> --- a/target/ppc/machine.c
> +++ b/target/ppc/machine.c
> @@ -10,6 +10,7 @@
>  #include "kvm_ppc.h"
>  #include "power8-pmu.h"
>  #include "hw/ppc/ppc.h"
> +#include "hw/ppc/spapr_cpu_core.h"
>  
>  static void post_load_update_msr(CPUPPCState *env)
>  {
> @@ -679,6 +680,48 @@ static const VMStateDescription vmstate_tb_env = {
>      }
>  };
>  
> +static const VMStateDescription vmstate_hdecr = {
> +    .name = "cpu/hdecr",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT64(hdecr_next, ppc_tb_t),
> +        VMSTATE_TIMER_PTR(hdecr_timer, ppc_tb_t),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +static bool nested_needed(void *opaque)
> +{
> +    PowerPCCPU *cpu = opaque;
> +    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
> +
> +    return spapr_cpu->in_nested;
> +}

I don't know the migration code -- are you assured of having a
spapr CPU here?

Maybe this could call a helper function located near the spapr/nested
code like 'return ppc_cpu_need_hdec_migrate(cpu)' ?

Thanks,
Nick
David Gibson Feb. 25, 2022, 3:42 a.m. UTC | #2
On Thu, Feb 24, 2022 at 03:58:17PM -0300, Fabiano Rosas wrote:
> This adds migration support for TCG pseries machines running a KVM-HV
> guest.
> 
> The state that needs to be migrated is:
> 
> - the nested PTCR value;
> - the in_nested flag;
> - the nested_tb_offset.
> - the saved host CPUPPCState structure;
> 
> Signed-off-by: Fabiano Rosas <farosas@linux.ibm.com>
> 
> ---
> (this migrates just fine with L2 running stress and 1 VCPU in L1. With
> 32 VCPUs in L1 there's crashes which I still don't understand. They might
> be related to L1 migration being flaky right now)
> ---
>  hw/ppc/spapr.c          | 19 +++++++++++
>  hw/ppc/spapr_cpu_core.c | 76 +++++++++++++++++++++++++++++++++++++++++
>  target/ppc/machine.c    | 44 ++++++++++++++++++++++++
>  3 files changed, 139 insertions(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index f0b75b22bb..6e87c515db 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1934,6 +1934,13 @@ static bool spapr_patb_entry_needed(void *opaque)
>      return !!spapr->patb_entry;
>  }
>  
> +static bool spapr_nested_ptcr_needed(void *opaque)
> +{
> +    SpaprMachineState *spapr = opaque;
> +
> +    return !!spapr->nested_ptcr;
> +}
> +
>  static const VMStateDescription vmstate_spapr_patb_entry = {
>      .name = "spapr_patb_entry",
>      .version_id = 1,
> @@ -1945,6 +1952,17 @@ static const VMStateDescription vmstate_spapr_patb_entry = {
>      },
>  };
>  
> +static const VMStateDescription vmstate_spapr_nested_ptcr = {
> +    .name = "spapr_nested_ptcr",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = spapr_nested_ptcr_needed,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT64(nested_ptcr, SpaprMachineState),
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
>  static bool spapr_irq_map_needed(void *opaque)
>  {
>      SpaprMachineState *spapr = opaque;
> @@ -2069,6 +2087,7 @@ static const VMStateDescription vmstate_spapr = {
>          &vmstate_spapr_cap_fwnmi,
>          &vmstate_spapr_fwnmi,
>          &vmstate_spapr_cap_rpt_invalidate,
> +        &vmstate_spapr_nested_ptcr,

Ok, the nested_ptcr stuff looks good.

>          NULL
>      }
>  };
> diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> index efda7730f1..3ec13c0660 100644
> --- a/hw/ppc/spapr_cpu_core.c
> +++ b/hw/ppc/spapr_cpu_core.c
> @@ -25,6 +25,7 @@
>  #include "sysemu/reset.h"
>  #include "sysemu/hw_accel.h"
>  #include "qemu/error-report.h"
> +#include "migration/cpu.h"
>  
>  static void spapr_reset_vcpu(PowerPCCPU *cpu)
>  {
> @@ -174,6 +175,80 @@ static const VMStateDescription vmstate_spapr_cpu_vpa = {
>      }
>  };
>  
> +static bool nested_needed(void *opaque)
> +{
> +    SpaprCpuState *spapr_cpu = opaque;
> +
> +    return spapr_cpu->in_nested;
> +}
> +
> +static int nested_state_pre_save(void *opaque)
> +{
> +    CPUPPCState *env = opaque;
> +
> +    env->spr[SPR_LR] = env->lr;
> +    env->spr[SPR_CTR] = env->ctr;
> +    env->spr[SPR_XER] = cpu_read_xer(env);
> +    env->spr[SPR_CFAR] = env->cfar;
> +    return 0;
> +}
> +
> +static int nested_state_post_load(void *opaque, int version_id)
> +{
> +    CPUPPCState *env = opaque;
> +
> +    env->lr = env->spr[SPR_LR];
> +    env->ctr = env->spr[SPR_CTR];
> +    cpu_write_xer(env, env->spr[SPR_XER]);
> +    env->cfar = env->spr[SPR_CFAR];
> +
> +    return 0;
> +}
> +
> +static const VMStateDescription vmstate_nested_host_state = {
> +    .name = "spapr_nested_host_state",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .pre_save = nested_state_pre_save,
> +    .post_load = nested_state_post_load,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINTTL_ARRAY(gpr, CPUPPCState, 32),
> +        VMSTATE_UINTTL_ARRAY(spr, CPUPPCState, 1024),
> +        VMSTATE_UINT32_ARRAY(crf, CPUPPCState, 8),
> +        VMSTATE_UINTTL(nip, CPUPPCState),
> +        VMSTATE_UINTTL(msr, CPUPPCState),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +static int nested_cpu_pre_load(void *opaque)
> +{
> +    SpaprCpuState *spapr_cpu = opaque;
> +
> +    spapr_cpu->nested_host_state = g_try_malloc(sizeof(CPUPPCState));
> +    if (!spapr_cpu->nested_host_state) {
> +        return -1;
> +    }
> +
> +    return 0;
> +}
> +
> +static const VMStateDescription vmstate_spapr_cpu_nested = {
> +    .name = "spapr_cpu/nested",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = nested_needed,
> +    .pre_load = nested_cpu_pre_load,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_BOOL(in_nested, SpaprCpuState),
> +        VMSTATE_INT64(nested_tb_offset, SpaprCpuState),
> +        VMSTATE_STRUCT_POINTER_V(nested_host_state, SpaprCpuState, 1,
> +                                 vmstate_nested_host_state, CPUPPCState),
> +        VMSTATE_END_OF_LIST()
> +    },
> +};
> +
>  static const VMStateDescription vmstate_spapr_cpu_state = {
>      .name = "spapr_cpu",
>      .version_id = 1,
> @@ -184,6 +259,7 @@ static const VMStateDescription vmstate_spapr_cpu_state = {
>      },
>      .subsections = (const VMStateDescription * []) {
>          &vmstate_spapr_cpu_vpa,
> +        &vmstate_spapr_cpu_nested,
>          NULL
>      }

The vmstate_spapr_cpu_nested stuff looks good too, this is real
information that we weren't migrating and can't be recovered from elsewhere.

>  };
> diff --git a/target/ppc/machine.c b/target/ppc/machine.c
> index 7ee1984500..ae09b1bcfe 100644
> --- a/target/ppc/machine.c
> +++ b/target/ppc/machine.c
> @@ -10,6 +10,7 @@
>  #include "kvm_ppc.h"
>  #include "power8-pmu.h"
>  #include "hw/ppc/ppc.h"
> +#include "hw/ppc/spapr_cpu_core.h"
>  
>  static void post_load_update_msr(CPUPPCState *env)
>  {
> @@ -679,6 +680,48 @@ static const VMStateDescription vmstate_tb_env = {
>      }
>  };
>  
> +static const VMStateDescription vmstate_hdecr = {
> +    .name = "cpu/hdecr",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_UINT64(hdecr_next, ppc_tb_t),
> +        VMSTATE_TIMER_PTR(hdecr_timer, ppc_tb_t),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
> +static bool nested_needed(void *opaque)
> +{
> +    PowerPCCPU *cpu = opaque;
> +    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
> +
> +    return spapr_cpu->in_nested;
> +}
> +
> +static int nested_pre_load(void *opaque)
> +{
> +    PowerPCCPU *cpu = opaque;
> +    CPUPPCState *env = &cpu->env;
> +
> +    cpu_ppc_hdecr_init(env);
> +
> +    return 0;
> +}
> +
> +static const VMStateDescription vmstate_nested = {
> +    .name = "cpu/nested-guest",
> +    .version_id = 1,
> +    .minimum_version_id = 1,
> +    .needed = nested_needed,
> +    .pre_load = nested_pre_load,
> +    .fields = (VMStateField[]) {
> +        VMSTATE_STRUCT_POINTER_V(env.tb_env, PowerPCCPU, 1,
> +                                 vmstate_hdecr, ppc_tb_t),
> +        VMSTATE_END_OF_LIST()
> +    }
> +};
> +
>  const VMStateDescription vmstate_ppc_cpu = {
>      .name = "cpu",
>      .version_id = 5,
> @@ -734,6 +777,7 @@ const VMStateDescription vmstate_ppc_cpu = {
>          &vmstate_tlbemb,
>          &vmstate_tlbmas,
>          &vmstate_compat,
> +        &vmstate_nested,

The hdecr stuff doesn't seem quite right.  Notionally the L1 cpu,
since it is in PAPR mode, doesn't *have* an HDECR.  It's only the L0
nested-KVM extensions that allow it to kind of fake access to an
HDECR.  We're kind of abusing the HDECR fields in the cpu structure
for this.  At the very least I think the fake-HDECR migration stuff
needs to go in the spapr_cpu_state not the general cpu state, since it
would make no sense if the L1 were a powernv system.
Nicholas Piggin Feb. 25, 2022, 10:57 a.m. UTC | #3
Excerpts from David Gibson's message of February 25, 2022 1:42 pm:
> On Thu, Feb 24, 2022 at 03:58:17PM -0300, Fabiano Rosas wrote:
>> This adds migration support for TCG pseries machines running a KVM-HV
>> guest.
>> @@ -734,6 +777,7 @@ const VMStateDescription vmstate_ppc_cpu = {
>>          &vmstate_tlbemb,
>>          &vmstate_tlbmas,
>>          &vmstate_compat,
>> +        &vmstate_nested,
> 
> The hdecr stuff doesn't seem quite right.  Notionally the L1 cpu,
> since it is in PAPR mode, doesn't *have* an HDECR.  It's only the L0
> nested-KVM extensions that allow it to kind of fake access to an
> HDECR.  We're kind of abusing the HDECR fields in the cpu structure
> for this.  At the very least I think the fake-HDECR migration stuff
> needs to go in the spapr_cpu_state not the general cpu state, since it
> would make no sense if the L1 were a powernv system.

We could possibly just make a new timer for this, btw. It's not really
buying a lot and may be slightly confusing to share hdecr. It could
still cause a HDEC exception though.

(If we really wanted to divorce that confusion about the HV architecture
from the vhyp nested L0 implementation we could come up with an entirely
new set of "exit nested" kind of exceptions that don't look like HV
interrupts, but reusing the HV interrupts was simple and kind of neat
in a way).

Thanks,
Nick
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f0b75b22bb..6e87c515db 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1934,6 +1934,13 @@  static bool spapr_patb_entry_needed(void *opaque)
     return !!spapr->patb_entry;
 }
 
+static bool spapr_nested_ptcr_needed(void *opaque)
+{
+    SpaprMachineState *spapr = opaque;
+
+    return !!spapr->nested_ptcr;
+}
+
 static const VMStateDescription vmstate_spapr_patb_entry = {
     .name = "spapr_patb_entry",
     .version_id = 1,
@@ -1945,6 +1952,17 @@  static const VMStateDescription vmstate_spapr_patb_entry = {
     },
 };
 
+static const VMStateDescription vmstate_spapr_nested_ptcr = {
+    .name = "spapr_nested_ptcr",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = spapr_nested_ptcr_needed,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(nested_ptcr, SpaprMachineState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static bool spapr_irq_map_needed(void *opaque)
 {
     SpaprMachineState *spapr = opaque;
@@ -2069,6 +2087,7 @@  static const VMStateDescription vmstate_spapr = {
         &vmstate_spapr_cap_fwnmi,
         &vmstate_spapr_fwnmi,
         &vmstate_spapr_cap_rpt_invalidate,
+        &vmstate_spapr_nested_ptcr,
         NULL
     }
 };
diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
index efda7730f1..3ec13c0660 100644
--- a/hw/ppc/spapr_cpu_core.c
+++ b/hw/ppc/spapr_cpu_core.c
@@ -25,6 +25,7 @@ 
 #include "sysemu/reset.h"
 #include "sysemu/hw_accel.h"
 #include "qemu/error-report.h"
+#include "migration/cpu.h"
 
 static void spapr_reset_vcpu(PowerPCCPU *cpu)
 {
@@ -174,6 +175,80 @@  static const VMStateDescription vmstate_spapr_cpu_vpa = {
     }
 };
 
+static bool nested_needed(void *opaque)
+{
+    SpaprCpuState *spapr_cpu = opaque;
+
+    return spapr_cpu->in_nested;
+}
+
+static int nested_state_pre_save(void *opaque)
+{
+    CPUPPCState *env = opaque;
+
+    env->spr[SPR_LR] = env->lr;
+    env->spr[SPR_CTR] = env->ctr;
+    env->spr[SPR_XER] = cpu_read_xer(env);
+    env->spr[SPR_CFAR] = env->cfar;
+
+    return 0;
+}
+
+static int nested_state_post_load(void *opaque, int version_id)
+{
+    CPUPPCState *env = opaque;
+
+    env->lr = env->spr[SPR_LR];
+    env->ctr = env->spr[SPR_CTR];
+    cpu_write_xer(env, env->spr[SPR_XER]);
+    env->cfar = env->spr[SPR_CFAR];
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_nested_host_state = {
+    .name = "spapr_nested_host_state",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_save = nested_state_pre_save,
+    .post_load = nested_state_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINTTL_ARRAY(gpr, CPUPPCState, 32),
+        VMSTATE_UINTTL_ARRAY(spr, CPUPPCState, 1024),
+        VMSTATE_UINT32_ARRAY(crf, CPUPPCState, 8),
+        VMSTATE_UINTTL(nip, CPUPPCState),
+        VMSTATE_UINTTL(msr, CPUPPCState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static int nested_cpu_pre_load(void *opaque)
+{
+    SpaprCpuState *spapr_cpu = opaque;
+
+    spapr_cpu->nested_host_state = g_try_malloc(sizeof(CPUPPCState));
+    if (!spapr_cpu->nested_host_state) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_spapr_cpu_nested = {
+    .name = "spapr_cpu/nested",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = nested_needed,
+    .pre_load = nested_cpu_pre_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_BOOL(in_nested, SpaprCpuState),
+        VMSTATE_INT64(nested_tb_offset, SpaprCpuState),
+        VMSTATE_STRUCT_POINTER_V(nested_host_state, SpaprCpuState, 1,
+                                 vmstate_nested_host_state, CPUPPCState),
+        VMSTATE_END_OF_LIST()
+    },
+};
+
 static const VMStateDescription vmstate_spapr_cpu_state = {
     .name = "spapr_cpu",
     .version_id = 1,
@@ -184,6 +259,7 @@  static const VMStateDescription vmstate_spapr_cpu_state = {
     },
     .subsections = (const VMStateDescription * []) {
         &vmstate_spapr_cpu_vpa,
+        &vmstate_spapr_cpu_nested,
         NULL
     }
 };
diff --git a/target/ppc/machine.c b/target/ppc/machine.c
index 7ee1984500..ae09b1bcfe 100644
--- a/target/ppc/machine.c
+++ b/target/ppc/machine.c
@@ -10,6 +10,7 @@ 
 #include "kvm_ppc.h"
 #include "power8-pmu.h"
 #include "hw/ppc/ppc.h"
+#include "hw/ppc/spapr_cpu_core.h"
 
 static void post_load_update_msr(CPUPPCState *env)
 {
@@ -679,6 +680,48 @@  static const VMStateDescription vmstate_tb_env = {
     }
 };
 
+static const VMStateDescription vmstate_hdecr = {
+    .name = "cpu/hdecr",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(hdecr_next, ppc_tb_t),
+        VMSTATE_TIMER_PTR(hdecr_timer, ppc_tb_t),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static bool nested_needed(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
+
+    return spapr_cpu->in_nested;
+}
+
+static int nested_pre_load(void *opaque)
+{
+    PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
+
+    cpu_ppc_hdecr_init(env);
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_nested = {
+    .name = "cpu/nested-guest",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .needed = nested_needed,
+    .pre_load = nested_pre_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT_POINTER_V(env.tb_env, PowerPCCPU, 1,
+                                 vmstate_hdecr, ppc_tb_t),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 const VMStateDescription vmstate_ppc_cpu = {
     .name = "cpu",
     .version_id = 5,
@@ -734,6 +777,7 @@  const VMStateDescription vmstate_ppc_cpu = {
         &vmstate_tlbemb,
         &vmstate_tlbmas,
         &vmstate_compat,
+        &vmstate_nested,
         NULL
     }
 };