diff mbox series

[v4,5/5] spapr: Implement ibm,suspend-me

Message ID 20190716024726.17864-6-npiggin@gmail.com (mailing list archive)
State New, archived
Headers show
Series spapr: implement dispatch and suspend calls | expand

Commit Message

Nicholas Piggin July 16, 2019, 2:47 a.m. UTC
This has been useful to modify and test the Linux pseries suspend
code but it requires modification to the guest to call it (due to
being gated by other unimplemented features). It is not otherwise
used by Linux yet, but work is slowly progressing there.

This allows a (lightly modified) guest kernel to suspend with
`echo mem > /sys/power/state` and be resumed with system_wakeup
monitor command.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 hw/ppc/spapr.c         | 26 ++++++++++++++++++++++++++
 hw/ppc/spapr_rtas.c    | 32 ++++++++++++++++++++++++++++++++
 include/hw/ppc/spapr.h |  7 ++++++-
 3 files changed, 64 insertions(+), 1 deletion(-)

Comments

David Gibson July 16, 2019, 8:30 a.m. UTC | #1
On Tue, Jul 16, 2019 at 12:47:26PM +1000, Nicholas Piggin wrote:
> This has been useful to modify and test the Linux pseries suspend
> code but it requires modification to the guest to call it (due to
> being gated by other unimplemented features). It is not otherwise
> used by Linux yet, but work is slowly progressing there.
> 
> This allows a (lightly modified) guest kernel to suspend with
> `echo mem > /sys/power/state` and be resumed with system_wakeup
> monitor command.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  hw/ppc/spapr.c         | 26 ++++++++++++++++++++++++++
>  hw/ppc/spapr_rtas.c    | 32 ++++++++++++++++++++++++++++++++
>  include/hw/ppc/spapr.h |  7 ++++++-
>  3 files changed, 64 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 5c54e1cb9a..b85d41bb1e 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1710,6 +1710,11 @@ static void spapr_machine_reset(MachineState *machine)
>      void *fdt;
>      int rc;
>  
> +    if (spapr->suspend_reset) {
> +        spapr->suspend_reset = false;

Do we need to migrate this value?

> +        return;
> +    }
> +
>      spapr_caps_apply(spapr);
>  
>      first_ppc_cpu = POWERPC_CPU(first_cpu);
> @@ -2721,6 +2726,23 @@ static PCIHostState *spapr_create_default_phb(void)
>      return PCI_HOST_BRIDGE(dev);
>  }
>  
> +static Notifier wakeup;

I think this should be in sPAPRMachineState rather than global.

> +static void spapr_notify_wakeup(Notifier *notifier, void *data)
> +{
> +    WakeupReason *reason = data;
> +
> +    switch (*reason) {
> +    case QEMU_WAKEUP_REASON_RTC:
> +        break;
> +    case QEMU_WAKEUP_REASON_PMTIMER:
> +        break;
> +    case QEMU_WAKEUP_REASON_OTHER:
> +        break;
> +    default:
> +        break;
> +    }

So.. you have a bunch of switch cases, all of which ignore the input..

> +}
> +
>  /* pSeries LPAR / sPAPR hardware init */
>  static void spapr_machine_init(MachineState *machine)
>  {
> @@ -3078,6 +3100,10 @@ static void spapr_machine_init(MachineState *machine)
>  
>      qemu_register_boot_set(spapr_boot_set, spapr);
>  
> +    wakeup.notify = spapr_notify_wakeup;
> +    qemu_register_wakeup_notifier(&wakeup);
> +    qemu_register_wakeup_support();
> +
>      if (kvm_enabled()) {
>          /* to stop and start vmclock */
>          qemu_add_vm_change_state_handler(cpu_ppc_clock_vm_state_change,
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index a618a2ac0f..60a007ec38 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -216,6 +216,36 @@ static void rtas_stop_self(PowerPCCPU *cpu, SpaprMachineState *spapr,
>      qemu_cpu_kick(cs);
>  }
>  
> +static void rtas_ibm_suspend_me(PowerPCCPU *cpu, SpaprMachineState *spapr,
> +                           uint32_t token, uint32_t nargs,
> +                           target_ulong args,
> +                           uint32_t nret, target_ulong rets)
> +{
> +    CPUState *cs;
> +
> +    if (nargs != 0 || nret != 1) {
> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> +        return;
> +    }
> +
> +    CPU_FOREACH(cs) {
> +        PowerPCCPU *c = POWERPC_CPU(cs);
> +        CPUPPCState *e = &c->env;
> +        if (c == cpu)
> +            continue;
> +
> +	/* See h_join */
> +        if (!cs->halted || (e->msr & (1ULL << MSR_EE))) {
> +            rtas_st(rets, 0, H_MULTI_THREADS_ACTIVE);
> +            return;
> +        }
> +    }
> +
> +    spapr->suspend_reset = true;
> +    qemu_system_suspend_request();
> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> +}
> +
>  static inline int sysparm_st(target_ulong addr, target_ulong len,
>                               const void *val, uint16_t vallen)
>  {
> @@ -483,6 +513,8 @@ static void core_rtas_register_types(void)
>                          rtas_query_cpu_stopped_state);
>      spapr_rtas_register(RTAS_START_CPU, "start-cpu", rtas_start_cpu);
>      spapr_rtas_register(RTAS_STOP_SELF, "stop-self", rtas_stop_self);
> +    spapr_rtas_register(RTAS_IBM_SUSPEND_ME, "ibm,suspend-me",
> +                        rtas_ibm_suspend_me);
>      spapr_rtas_register(RTAS_IBM_GET_SYSTEM_PARAMETER,
>                          "ibm,get-system-parameter",
>                          rtas_ibm_get_system_parameter);
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 5d36eec9d0..df0b0c15da 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -171,6 +171,10 @@ struct SpaprMachineState {
>      bool use_hotplug_event_source;
>      SpaprEventSource *event_sources;
>  
> +    /* Machine has been suspended, so the next machine_reset should not
> +     * reset state, but just return and allow execution to resume. */
> +    bool suspend_reset;

Hrm, this seems odd, but maybe it's part of the existing suspend
design.  Why would system_reset resume a suspend, rather than having a
specific operation for that.

> +
>      /* ibm,client-architecture-support option negotiation */
>      bool cas_reboot;
>      bool cas_legacy_guest_workaround;
> @@ -631,8 +635,9 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
>  #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
>  #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
>  #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
> +#define RTAS_IBM_SUSPEND_ME                     (RTAS_TOKEN_BASE + 0x2A)
>  
> -#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
> +#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2B)
>  
>  /* RTAS ibm,get-system-parameter token values */
>  #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
Nicholas Piggin July 16, 2019, 11:15 a.m. UTC | #2
David Gibson's on July 16, 2019 6:30 pm:
> On Tue, Jul 16, 2019 at 12:47:26PM +1000, Nicholas Piggin wrote:
>> This has been useful to modify and test the Linux pseries suspend
>> code but it requires modification to the guest to call it (due to
>> being gated by other unimplemented features). It is not otherwise
>> used by Linux yet, but work is slowly progressing there.
>> 
>> This allows a (lightly modified) guest kernel to suspend with
>> `echo mem > /sys/power/state` and be resumed with system_wakeup
>> monitor command.
>> 
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>>  hw/ppc/spapr.c         | 26 ++++++++++++++++++++++++++
>>  hw/ppc/spapr_rtas.c    | 32 ++++++++++++++++++++++++++++++++
>>  include/hw/ppc/spapr.h |  7 ++++++-
>>  3 files changed, 64 insertions(+), 1 deletion(-)
>> 
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 5c54e1cb9a..b85d41bb1e 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -1710,6 +1710,11 @@ static void spapr_machine_reset(MachineState *machine)
>>      void *fdt;
>>      int rc;
>>  
>> +    if (spapr->suspend_reset) {
>> +        spapr->suspend_reset = false;
> 
> Do we need to migrate this value?

I suppose we do if we can migrate a suspended machine?

> 
>> +        return;
>> +    }
>> +
>>      spapr_caps_apply(spapr);
>>  
>>      first_ppc_cpu = POWERPC_CPU(first_cpu);
>> @@ -2721,6 +2726,23 @@ static PCIHostState *spapr_create_default_phb(void)
>>      return PCI_HOST_BRIDGE(dev);
>>  }
>>  
>> +static Notifier wakeup;
> 
> I think this should be in sPAPRMachineState rather than global.

Sure.

> 
>> +static void spapr_notify_wakeup(Notifier *notifier, void *data)
>> +{
>> +    WakeupReason *reason = data;
>> +
>> +    switch (*reason) {
>> +    case QEMU_WAKEUP_REASON_RTC:
>> +        break;
>> +    case QEMU_WAKEUP_REASON_PMTIMER:
>> +        break;
>> +    case QEMU_WAKEUP_REASON_OTHER:
>> +        break;
>> +    default:
>> +        break;
>> +    }
> 
> So.. you have a bunch of switch cases, all of which ignore the input..

Yeah I kind of just copy and pasted I think. This part of the patch
may not have been quite as cooked as I remembered :\

>> +}
>> +
>>  /* pSeries LPAR / sPAPR hardware init */
>>  static void spapr_machine_init(MachineState *machine)
>>  {
>> @@ -3078,6 +3100,10 @@ static void spapr_machine_init(MachineState *machine)
>>  
>>      qemu_register_boot_set(spapr_boot_set, spapr);
>>  
>> +    wakeup.notify = spapr_notify_wakeup;
>> +    qemu_register_wakeup_notifier(&wakeup);
>> +    qemu_register_wakeup_support();
>> +
>>      if (kvm_enabled()) {
>>          /* to stop and start vmclock */
>>          qemu_add_vm_change_state_handler(cpu_ppc_clock_vm_state_change,
>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>> index a618a2ac0f..60a007ec38 100644
>> --- a/hw/ppc/spapr_rtas.c
>> +++ b/hw/ppc/spapr_rtas.c
>> @@ -216,6 +216,36 @@ static void rtas_stop_self(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>      qemu_cpu_kick(cs);
>>  }
>>  
>> +static void rtas_ibm_suspend_me(PowerPCCPU *cpu, SpaprMachineState *spapr,
>> +                           uint32_t token, uint32_t nargs,
>> +                           target_ulong args,
>> +                           uint32_t nret, target_ulong rets)
>> +{
>> +    CPUState *cs;
>> +
>> +    if (nargs != 0 || nret != 1) {
>> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>> +        return;
>> +    }
>> +
>> +    CPU_FOREACH(cs) {
>> +        PowerPCCPU *c = POWERPC_CPU(cs);
>> +        CPUPPCState *e = &c->env;
>> +        if (c == cpu)
>> +            continue;
>> +
>> +	/* See h_join */
>> +        if (!cs->halted || (e->msr & (1ULL << MSR_EE))) {
>> +            rtas_st(rets, 0, H_MULTI_THREADS_ACTIVE);
>> +            return;
>> +        }
>> +    }
>> +
>> +    spapr->suspend_reset = true;
>> +    qemu_system_suspend_request();
>> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> +}
>> +
>>  static inline int sysparm_st(target_ulong addr, target_ulong len,
>>                               const void *val, uint16_t vallen)
>>  {
>> @@ -483,6 +513,8 @@ static void core_rtas_register_types(void)
>>                          rtas_query_cpu_stopped_state);
>>      spapr_rtas_register(RTAS_START_CPU, "start-cpu", rtas_start_cpu);
>>      spapr_rtas_register(RTAS_STOP_SELF, "stop-self", rtas_stop_self);
>> +    spapr_rtas_register(RTAS_IBM_SUSPEND_ME, "ibm,suspend-me",
>> +                        rtas_ibm_suspend_me);
>>      spapr_rtas_register(RTAS_IBM_GET_SYSTEM_PARAMETER,
>>                          "ibm,get-system-parameter",
>>                          rtas_ibm_get_system_parameter);
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 5d36eec9d0..df0b0c15da 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -171,6 +171,10 @@ struct SpaprMachineState {
>>      bool use_hotplug_event_source;
>>      SpaprEventSource *event_sources;
>>  
>> +    /* Machine has been suspended, so the next machine_reset should not
>> +     * reset state, but just return and allow execution to resume. */
>> +    bool suspend_reset;
> 
> Hrm, this seems odd, but maybe it's part of the existing suspend
> design.  Why would system_reset resume a suspend, rather than having a
> specific operation for that.

It is where `system_wakeup` cmd pops out, via qemu_system_reset,
main_loop_should_exit. I'm not sure if we have any existing state
we can use. runstate_is_running() doesn't seem to work because of
CAS I guess (maybe CAS is what makes spapr so much different from
x86 in terms of resetting the world here?)

Thanks,
Nick
David Gibson July 17, 2019, 1:54 a.m. UTC | #3
On Tue, Jul 16, 2019 at 09:15:23PM +1000, Nicholas Piggin wrote:
65;5603;1c> David Gibson's on July 16, 2019 6:30 pm:
> > On Tue, Jul 16, 2019 at 12:47:26PM +1000, Nicholas Piggin wrote:
> >> This has been useful to modify and test the Linux pseries suspend
> >> code but it requires modification to the guest to call it (due to
> >> being gated by other unimplemented features). It is not otherwise
> >> used by Linux yet, but work is slowly progressing there.
> >> 
> >> This allows a (lightly modified) guest kernel to suspend with
> >> `echo mem > /sys/power/state` and be resumed with system_wakeup
> >> monitor command.
> >> 
> >> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> >> ---
> >>  hw/ppc/spapr.c         | 26 ++++++++++++++++++++++++++
> >>  hw/ppc/spapr_rtas.c    | 32 ++++++++++++++++++++++++++++++++
> >>  include/hw/ppc/spapr.h |  7 ++++++-
> >>  3 files changed, 64 insertions(+), 1 deletion(-)
> >> 
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index 5c54e1cb9a..b85d41bb1e 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -1710,6 +1710,11 @@ static void spapr_machine_reset(MachineState *machine)
> >>      void *fdt;
> >>      int rc;
> >>  
> >> +    if (spapr->suspend_reset) {
> >> +        spapr->suspend_reset = false;
> > 
> > Do we need to migrate this value?
> 
> I suppose we do if we can migrate a suspended machine?

I don't see why we couldn't.  And it might not happen because of
explicit user choice in a managed environment like RHV or openstack.

> >> +        return;
> >> +    }
> >> +
> >>      spapr_caps_apply(spapr);
> >>  
> >>      first_ppc_cpu = POWERPC_CPU(first_cpu);
> >> @@ -2721,6 +2726,23 @@ static PCIHostState *spapr_create_default_phb(void)
> >>      return PCI_HOST_BRIDGE(dev);
> >>  }
> >>  
> >> +static Notifier wakeup;
> > 
> > I think this should be in sPAPRMachineState rather than global.
> 
> Sure.
> 
> > 
> >> +static void spapr_notify_wakeup(Notifier *notifier, void *data)
> >> +{
> >> +    WakeupReason *reason = data;
> >> +
> >> +    switch (*reason) {
> >> +    case QEMU_WAKEUP_REASON_RTC:
> >> +        break;
> >> +    case QEMU_WAKEUP_REASON_PMTIMER:
> >> +        break;
> >> +    case QEMU_WAKEUP_REASON_OTHER:
> >> +        break;
> >> +    default:
> >> +        break;
> >> +    }
> > 
> > So.. you have a bunch of switch cases, all of which ignore the input..
> 
> Yeah I kind of just copy and pasted I think. This part of the patch
> may not have been quite as cooked as I remembered :\

Heh :).

> >> +}
> >> +
> >>  /* pSeries LPAR / sPAPR hardware init */
> >>  static void spapr_machine_init(MachineState *machine)
> >>  {
> >> @@ -3078,6 +3100,10 @@ static void spapr_machine_init(MachineState *machine)
> >>  
> >>      qemu_register_boot_set(spapr_boot_set, spapr);
> >>  
> >> +    wakeup.notify = spapr_notify_wakeup;
> >> +    qemu_register_wakeup_notifier(&wakeup);
> >> +    qemu_register_wakeup_support();
> >> +
> >>      if (kvm_enabled()) {
> >>          /* to stop and start vmclock */
> >>          qemu_add_vm_change_state_handler(cpu_ppc_clock_vm_state_change,
> >> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> >> index a618a2ac0f..60a007ec38 100644
> >> --- a/hw/ppc/spapr_rtas.c
> >> +++ b/hw/ppc/spapr_rtas.c
> >> @@ -216,6 +216,36 @@ static void rtas_stop_self(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >>      qemu_cpu_kick(cs);
> >>  }
> >>  
> >> +static void rtas_ibm_suspend_me(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >> +                           uint32_t token, uint32_t nargs,
> >> +                           target_ulong args,
> >> +                           uint32_t nret, target_ulong rets)
> >> +{
> >> +    CPUState *cs;
> >> +
> >> +    if (nargs != 0 || nret != 1) {
> >> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> >> +        return;
> >> +    }
> >> +
> >> +    CPU_FOREACH(cs) {
> >> +        PowerPCCPU *c = POWERPC_CPU(cs);
> >> +        CPUPPCState *e = &c->env;
> >> +        if (c == cpu)
> >> +            continue;
> >> +
> >> +	/* See h_join */
> >> +        if (!cs->halted || (e->msr & (1ULL << MSR_EE))) {
> >> +            rtas_st(rets, 0, H_MULTI_THREADS_ACTIVE);
> >> +            return;
> >> +        }
> >> +    }
> >> +
> >> +    spapr->suspend_reset = true;
> >> +    qemu_system_suspend_request();
> >> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> >> +}
> >> +
> >>  static inline int sysparm_st(target_ulong addr, target_ulong len,
> >>                               const void *val, uint16_t vallen)
> >>  {
> >> @@ -483,6 +513,8 @@ static void core_rtas_register_types(void)
> >>                          rtas_query_cpu_stopped_state);
> >>      spapr_rtas_register(RTAS_START_CPU, "start-cpu", rtas_start_cpu);
> >>      spapr_rtas_register(RTAS_STOP_SELF, "stop-self", rtas_stop_self);
> >> +    spapr_rtas_register(RTAS_IBM_SUSPEND_ME, "ibm,suspend-me",
> >> +                        rtas_ibm_suspend_me);
> >>      spapr_rtas_register(RTAS_IBM_GET_SYSTEM_PARAMETER,
> >>                          "ibm,get-system-parameter",
> >>                          rtas_ibm_get_system_parameter);
> >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> >> index 5d36eec9d0..df0b0c15da 100644
> >> --- a/include/hw/ppc/spapr.h
> >> +++ b/include/hw/ppc/spapr.h
> >> @@ -171,6 +171,10 @@ struct SpaprMachineState {
> >>      bool use_hotplug_event_source;
> >>      SpaprEventSource *event_sources;
> >>  
> >> +    /* Machine has been suspended, so the next machine_reset should not
> >> +     * reset state, but just return and allow execution to resume. */
> >> +    bool suspend_reset;
> > 
> > Hrm, this seems odd, but maybe it's part of the existing suspend
> > design.  Why would system_reset resume a suspend, rather than having a
> > specific operation for that.
> 
> It is where `system_wakeup` cmd pops out, via qemu_system_reset,
> main_loop_should_exit. I'm not sure if we have any existing state
> we can use. runstate_is_running() doesn't seem to work because of
> CAS I guess (maybe CAS is what makes spapr so much different from
> x86 in terms of resetting the world here?)

CAS certainly complicates things.
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 5c54e1cb9a..b85d41bb1e 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1710,6 +1710,11 @@  static void spapr_machine_reset(MachineState *machine)
     void *fdt;
     int rc;
 
+    if (spapr->suspend_reset) {
+        spapr->suspend_reset = false;
+        return;
+    }
+
     spapr_caps_apply(spapr);
 
     first_ppc_cpu = POWERPC_CPU(first_cpu);
@@ -2721,6 +2726,23 @@  static PCIHostState *spapr_create_default_phb(void)
     return PCI_HOST_BRIDGE(dev);
 }
 
+static Notifier wakeup;
+static void spapr_notify_wakeup(Notifier *notifier, void *data)
+{
+    WakeupReason *reason = data;
+
+    switch (*reason) {
+    case QEMU_WAKEUP_REASON_RTC:
+        break;
+    case QEMU_WAKEUP_REASON_PMTIMER:
+        break;
+    case QEMU_WAKEUP_REASON_OTHER:
+        break;
+    default:
+        break;
+    }
+}
+
 /* pSeries LPAR / sPAPR hardware init */
 static void spapr_machine_init(MachineState *machine)
 {
@@ -3078,6 +3100,10 @@  static void spapr_machine_init(MachineState *machine)
 
     qemu_register_boot_set(spapr_boot_set, spapr);
 
+    wakeup.notify = spapr_notify_wakeup;
+    qemu_register_wakeup_notifier(&wakeup);
+    qemu_register_wakeup_support();
+
     if (kvm_enabled()) {
         /* to stop and start vmclock */
         qemu_add_vm_change_state_handler(cpu_ppc_clock_vm_state_change,
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index a618a2ac0f..60a007ec38 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -216,6 +216,36 @@  static void rtas_stop_self(PowerPCCPU *cpu, SpaprMachineState *spapr,
     qemu_cpu_kick(cs);
 }
 
+static void rtas_ibm_suspend_me(PowerPCCPU *cpu, SpaprMachineState *spapr,
+                           uint32_t token, uint32_t nargs,
+                           target_ulong args,
+                           uint32_t nret, target_ulong rets)
+{
+    CPUState *cs;
+
+    if (nargs != 0 || nret != 1) {
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+        return;
+    }
+
+    CPU_FOREACH(cs) {
+        PowerPCCPU *c = POWERPC_CPU(cs);
+        CPUPPCState *e = &c->env;
+        if (c == cpu)
+            continue;
+
+	/* See h_join */
+        if (!cs->halted || (e->msr & (1ULL << MSR_EE))) {
+            rtas_st(rets, 0, H_MULTI_THREADS_ACTIVE);
+            return;
+        }
+    }
+
+    spapr->suspend_reset = true;
+    qemu_system_suspend_request();
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
 static inline int sysparm_st(target_ulong addr, target_ulong len,
                              const void *val, uint16_t vallen)
 {
@@ -483,6 +513,8 @@  static void core_rtas_register_types(void)
                         rtas_query_cpu_stopped_state);
     spapr_rtas_register(RTAS_START_CPU, "start-cpu", rtas_start_cpu);
     spapr_rtas_register(RTAS_STOP_SELF, "stop-self", rtas_stop_self);
+    spapr_rtas_register(RTAS_IBM_SUSPEND_ME, "ibm,suspend-me",
+                        rtas_ibm_suspend_me);
     spapr_rtas_register(RTAS_IBM_GET_SYSTEM_PARAMETER,
                         "ibm,get-system-parameter",
                         rtas_ibm_get_system_parameter);
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 5d36eec9d0..df0b0c15da 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -171,6 +171,10 @@  struct SpaprMachineState {
     bool use_hotplug_event_source;
     SpaprEventSource *event_sources;
 
+    /* Machine has been suspended, so the next machine_reset should not
+     * reset state, but just return and allow execution to resume. */
+    bool suspend_reset;
+
     /* ibm,client-architecture-support option negotiation */
     bool cas_reboot;
     bool cas_legacy_guest_workaround;
@@ -631,8 +635,9 @@  target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
 #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
 #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
 #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
+#define RTAS_IBM_SUSPEND_ME                     (RTAS_TOKEN_BASE + 0x2A)
 
-#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
+#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2B)
 
 /* RTAS ibm,get-system-parameter token values */
 #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20