diff mbox series

[v2,7/8] ppc/spapr: Implement FWNMI System Reset delivery

Message ID 20200316142613.121089-8-npiggin@gmail.com (mailing list archive)
State New, archived
Headers show
Series FWNMI fixes / changes | expand

Commit Message

Nicholas Piggin March 16, 2020, 2:26 p.m. UTC
PAPR requires that if "ibm,nmi-register" succeeds, then the hypervisor
delivers all system reset and machine check exceptions to the registered
addresses.

System Resets are delivered with registers set to the architected state,
and with no interlock.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
 hw/ppc/spapr.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 44 insertions(+), 2 deletions(-)

Comments

Mahesh J Salgaonkar March 16, 2020, 5:35 p.m. UTC | #1
On 2020-03-17 00:26:12 Tue, Nicholas Piggin wrote:
> PAPR requires that if "ibm,nmi-register" succeeds, then the hypervisor
> delivers all system reset and machine check exceptions to the registered
> addresses.
> 
> System Resets are delivered with registers set to the architected state,
> and with no interlock.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
>  hw/ppc/spapr.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 44 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 25221d843c..78e649f47d 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -967,7 +967,29 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
>      _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
>                       maxdomains, sizeof(maxdomains)));
> 
> -    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_SIZE));
> +    /*
> +     * FWNMI reserves RTAS_ERROR_LOG_MAX for the machine check error log,
> +     * and 16 bytes per CPU for system reset error log plus an extra 8 bytes.
> +     *
> +     * The system reset requirements are driven by existing Linux and PowerVM
> +     * implementation which (contrary to PAPR) saves r3 in the error log
> +     * structure like machine check, so Linux expects to find the saved r3
> +     * value at the address in r3 upon FWNMI-enabled sreset interrupt (and
> +     * does not look at the error value).
> +     *
> +     * System reset interrupts are not subject to interlock like machine
> +     * check, so this memory area could be corrupted if the sreset is
> +     * interrupted by a machine check (or vice versa) if it was shared. To
> +     * prevent this, system reset uses per-CPU areas for the sreset save
> +     * area. A system reset that interrupts a system reset handler could
> +     * still overwrite this area, but Linux doesn't try to recover in that
> +     * case anyway.
> +     *
> +     * The extra 8 bytes is required because Linux's FWNMI error log check
> +     * is off-by-one.
> +     */
> +    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_ERROR_LOG_MAX +
> +			  ms->smp.max_cpus * sizeof(uint64_t)*2 + sizeof(uint64_t)));

Currently the rtas region is only of size 2048 (i.e RTAS_ERROR_LOG_MAX).
Do we need SLOF change to increase rtas area as well ? Otherwise QEMU
may corrupt guest memory area OR Am I wrong ?

Thanks,
-Mahesh/

>      _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max",
>                            RTAS_ERROR_LOG_MAX));
>      _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate",
> @@ -3399,8 +3421,28 @@ static void spapr_machine_finalizefn(Object *obj)
> 
>  void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg)
>  {
> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> +
>      cpu_synchronize_state(cs);
> -    ppc_cpu_do_system_reset(cs, -1);
> +    /* If FWNMI is inactive, addr will be -1, which will deliver to 0x100 */
> +    if (spapr->fwnmi_system_reset_addr != -1) {
> +        uint64_t rtas_addr, addr;
> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> +        CPUPPCState *env = &cpu->env;
> +
> +        /* get rtas addr from fdt */
> +        rtas_addr = spapr_get_rtas_addr();
> +        if (!rtas_addr) {
> +            qemu_system_guest_panicked(NULL);
> +            return;
> +        }
> +
> +        addr = rtas_addr + RTAS_ERROR_LOG_MAX + cs->cpu_index * sizeof(uint64_t)*2;
> +        stq_be_phys(&address_space_memory, addr, env->gpr[3]);
> +        stq_be_phys(&address_space_memory, addr + sizeof(uint64_t), 0);
> +        env->gpr[3] = addr;
> +    }
> +    ppc_cpu_do_system_reset(cs, spapr->fwnmi_system_reset_addr);
>  }
> 
>  static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
> -- 
> 2.23.0
> 
>
Greg Kurz March 16, 2020, 5:52 p.m. UTC | #2
On Mon, 16 Mar 2020 23:05:00 +0530
Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:

> On 2020-03-17 00:26:12 Tue, Nicholas Piggin wrote:
> > PAPR requires that if "ibm,nmi-register" succeeds, then the hypervisor
> > delivers all system reset and machine check exceptions to the registered
> > addresses.
> > 
> > System Resets are delivered with registers set to the architected state,
> > and with no interlock.
> > 
> > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > ---
> >  hw/ppc/spapr.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 44 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 25221d843c..78e649f47d 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -967,7 +967,29 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
> >      _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
> >                       maxdomains, sizeof(maxdomains)));
> > 
> > -    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_SIZE));
> > +    /*
> > +     * FWNMI reserves RTAS_ERROR_LOG_MAX for the machine check error log,
> > +     * and 16 bytes per CPU for system reset error log plus an extra 8 bytes.
> > +     *
> > +     * The system reset requirements are driven by existing Linux and PowerVM
> > +     * implementation which (contrary to PAPR) saves r3 in the error log
> > +     * structure like machine check, so Linux expects to find the saved r3
> > +     * value at the address in r3 upon FWNMI-enabled sreset interrupt (and
> > +     * does not look at the error value).
> > +     *
> > +     * System reset interrupts are not subject to interlock like machine
> > +     * check, so this memory area could be corrupted if the sreset is
> > +     * interrupted by a machine check (or vice versa) if it was shared. To
> > +     * prevent this, system reset uses per-CPU areas for the sreset save
> > +     * area. A system reset that interrupts a system reset handler could
> > +     * still overwrite this area, but Linux doesn't try to recover in that
> > +     * case anyway.
> > +     *
> > +     * The extra 8 bytes is required because Linux's FWNMI error log check
> > +     * is off-by-one.
> > +     */
> > +    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_ERROR_LOG_MAX +
> > +			  ms->smp.max_cpus * sizeof(uint64_t)*2 + sizeof(uint64_t)));
> 
> Currently the rtas region is only of size 2048 (i.e RTAS_ERROR_LOG_MAX).
> Do we need SLOF change to increase rtas area as well ? Otherwise QEMU
> may corrupt guest memory area OR Am I wrong ?
> 

A change is pending for SLOF to use the "rtas-size" property
provided by QEMU:

https://patchwork.ozlabs.org/patch/1255264/

> Thanks,
> -Mahesh/
> 
> >      _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max",
> >                            RTAS_ERROR_LOG_MAX));
> >      _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate",
> > @@ -3399,8 +3421,28 @@ static void spapr_machine_finalizefn(Object *obj)
> > 
> >  void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg)
> >  {
> > +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> > +
> >      cpu_synchronize_state(cs);
> > -    ppc_cpu_do_system_reset(cs, -1);
> > +    /* If FWNMI is inactive, addr will be -1, which will deliver to 0x100 */
> > +    if (spapr->fwnmi_system_reset_addr != -1) {
> > +        uint64_t rtas_addr, addr;
> > +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> > +        CPUPPCState *env = &cpu->env;
> > +
> > +        /* get rtas addr from fdt */
> > +        rtas_addr = spapr_get_rtas_addr();
> > +        if (!rtas_addr) {
> > +            qemu_system_guest_panicked(NULL);
> > +            return;
> > +        }
> > +
> > +        addr = rtas_addr + RTAS_ERROR_LOG_MAX + cs->cpu_index * sizeof(uint64_t)*2;
> > +        stq_be_phys(&address_space_memory, addr, env->gpr[3]);
> > +        stq_be_phys(&address_space_memory, addr + sizeof(uint64_t), 0);
> > +        env->gpr[3] = addr;
> > +    }
> > +    ppc_cpu_do_system_reset(cs, spapr->fwnmi_system_reset_addr);
> >  }
> > 
> >  static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
> > -- 
> > 2.23.0
> > 
> > 
>
David Gibson March 16, 2020, 11:30 p.m. UTC | #3
On Tue, Mar 17, 2020 at 12:26:12AM +1000, Nicholas Piggin wrote:
> PAPR requires that if "ibm,nmi-register" succeeds, then the hypervisor
> delivers all system reset and machine check exceptions to the registered
> addresses.
> 
> System Resets are delivered with registers set to the architected state,
> and with no interlock.
> 
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>

Applied to ppc-for-5.0.

> ---
>  hw/ppc/spapr.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 44 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 25221d843c..78e649f47d 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -967,7 +967,29 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
>      _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
>                       maxdomains, sizeof(maxdomains)));
>  
> -    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_SIZE));
> +    /*
> +     * FWNMI reserves RTAS_ERROR_LOG_MAX for the machine check error log,
> +     * and 16 bytes per CPU for system reset error log plus an extra 8 bytes.
> +     *
> +     * The system reset requirements are driven by existing Linux and PowerVM
> +     * implementation which (contrary to PAPR) saves r3 in the error log
> +     * structure like machine check, so Linux expects to find the saved r3
> +     * value at the address in r3 upon FWNMI-enabled sreset interrupt (and
> +     * does not look at the error value).
> +     *
> +     * System reset interrupts are not subject to interlock like machine
> +     * check, so this memory area could be corrupted if the sreset is
> +     * interrupted by a machine check (or vice versa) if it was shared. To
> +     * prevent this, system reset uses per-CPU areas for the sreset save
> +     * area. A system reset that interrupts a system reset handler could
> +     * still overwrite this area, but Linux doesn't try to recover in that
> +     * case anyway.
> +     *
> +     * The extra 8 bytes is required because Linux's FWNMI error log check
> +     * is off-by-one.
> +     */
> +    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_ERROR_LOG_MAX +
> +			  ms->smp.max_cpus * sizeof(uint64_t)*2 + sizeof(uint64_t)));
>      _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max",
>                            RTAS_ERROR_LOG_MAX));
>      _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate",
> @@ -3399,8 +3421,28 @@ static void spapr_machine_finalizefn(Object *obj)
>  
>  void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg)
>  {
> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> +
>      cpu_synchronize_state(cs);
> -    ppc_cpu_do_system_reset(cs, -1);
> +    /* If FWNMI is inactive, addr will be -1, which will deliver to 0x100 */
> +    if (spapr->fwnmi_system_reset_addr != -1) {
> +        uint64_t rtas_addr, addr;
> +        PowerPCCPU *cpu = POWERPC_CPU(cs);
> +        CPUPPCState *env = &cpu->env;
> +
> +        /* get rtas addr from fdt */
> +        rtas_addr = spapr_get_rtas_addr();
> +        if (!rtas_addr) {
> +            qemu_system_guest_panicked(NULL);
> +            return;
> +        }
> +
> +        addr = rtas_addr + RTAS_ERROR_LOG_MAX + cs->cpu_index * sizeof(uint64_t)*2;
> +        stq_be_phys(&address_space_memory, addr, env->gpr[3]);
> +        stq_be_phys(&address_space_memory, addr + sizeof(uint64_t), 0);
> +        env->gpr[3] = addr;
> +    }
> +    ppc_cpu_do_system_reset(cs, spapr->fwnmi_system_reset_addr);
>  }
>  
>  static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)
David Gibson March 16, 2020, 11:31 p.m. UTC | #4
On Mon, Mar 16, 2020 at 06:52:54PM +0100, Greg Kurz wrote:
> On Mon, 16 Mar 2020 23:05:00 +0530
> Mahesh J Salgaonkar <mahesh@linux.vnet.ibm.com> wrote:
> 
> > On 2020-03-17 00:26:12 Tue, Nicholas Piggin wrote:
> > > PAPR requires that if "ibm,nmi-register" succeeds, then the hypervisor
> > > delivers all system reset and machine check exceptions to the registered
> > > addresses.
> > > 
> > > System Resets are delivered with registers set to the architected state,
> > > and with no interlock.
> > > 
> > > Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> > > ---
> > >  hw/ppc/spapr.c | 46 ++++++++++++++++++++++++++++++++++++++++++++--
> > >  1 file changed, 44 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > index 25221d843c..78e649f47d 100644
> > > --- a/hw/ppc/spapr.c
> > > +++ b/hw/ppc/spapr.c
> > > @@ -967,7 +967,29 @@ static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
> > >      _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
> > >                       maxdomains, sizeof(maxdomains)));
> > > 
> > > -    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_SIZE));
> > > +    /*
> > > +     * FWNMI reserves RTAS_ERROR_LOG_MAX for the machine check error log,
> > > +     * and 16 bytes per CPU for system reset error log plus an extra 8 bytes.
> > > +     *
> > > +     * The system reset requirements are driven by existing Linux and PowerVM
> > > +     * implementation which (contrary to PAPR) saves r3 in the error log
> > > +     * structure like machine check, so Linux expects to find the saved r3
> > > +     * value at the address in r3 upon FWNMI-enabled sreset interrupt (and
> > > +     * does not look at the error value).
> > > +     *
> > > +     * System reset interrupts are not subject to interlock like machine
> > > +     * check, so this memory area could be corrupted if the sreset is
> > > +     * interrupted by a machine check (or vice versa) if it was shared. To
> > > +     * prevent this, system reset uses per-CPU areas for the sreset save
> > > +     * area. A system reset that interrupts a system reset handler could
> > > +     * still overwrite this area, but Linux doesn't try to recover in that
> > > +     * case anyway.
> > > +     *
> > > +     * The extra 8 bytes is required because Linux's FWNMI error log check
> > > +     * is off-by-one.
> > > +     */
> > > +    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_ERROR_LOG_MAX +
> > > +			  ms->smp.max_cpus * sizeof(uint64_t)*2 + sizeof(uint64_t)));
> > 
> > Currently the rtas region is only of size 2048 (i.e RTAS_ERROR_LOG_MAX).
> > Do we need SLOF change to increase rtas area as well ? Otherwise QEMU
> > may corrupt guest memory area OR Am I wrong ?
> > 
> 
> A change is pending for SLOF to use the "rtas-size" property
> provided by QEMU:
> 
> https://patchwork.ozlabs.org/patch/1255264/

In the meantime, this is still correct.  Because we rebuild the device
tree at CAS time, the qemu supplied value will be the one the guest
sees in the end.  We obviously want that qemu update to avoid
confusion, but we don't need it for things to work.
diff mbox series

Patch

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 25221d843c..78e649f47d 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -967,7 +967,29 @@  static void spapr_dt_rtas(SpaprMachineState *spapr, void *fdt)
     _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains",
                      maxdomains, sizeof(maxdomains)));
 
-    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_SIZE));
+    /*
+     * FWNMI reserves RTAS_ERROR_LOG_MAX for the machine check error log,
+     * and 16 bytes per CPU for system reset error log plus an extra 8 bytes.
+     *
+     * The system reset requirements are driven by existing Linux and PowerVM
+     * implementation which (contrary to PAPR) saves r3 in the error log
+     * structure like machine check, so Linux expects to find the saved r3
+     * value at the address in r3 upon FWNMI-enabled sreset interrupt (and
+     * does not look at the error value).
+     *
+     * System reset interrupts are not subject to interlock like machine
+     * check, so this memory area could be corrupted if the sreset is
+     * interrupted by a machine check (or vice versa) if it was shared. To
+     * prevent this, system reset uses per-CPU areas for the sreset save
+     * area. A system reset that interrupts a system reset handler could
+     * still overwrite this area, but Linux doesn't try to recover in that
+     * case anyway.
+     *
+     * The extra 8 bytes is required because Linux's FWNMI error log check
+     * is off-by-one.
+     */
+    _FDT(fdt_setprop_cell(fdt, rtas, "rtas-size", RTAS_ERROR_LOG_MAX +
+			  ms->smp.max_cpus * sizeof(uint64_t)*2 + sizeof(uint64_t)));
     _FDT(fdt_setprop_cell(fdt, rtas, "rtas-error-log-max",
                           RTAS_ERROR_LOG_MAX));
     _FDT(fdt_setprop_cell(fdt, rtas, "rtas-event-scan-rate",
@@ -3399,8 +3421,28 @@  static void spapr_machine_finalizefn(Object *obj)
 
 void spapr_do_system_reset_on_cpu(CPUState *cs, run_on_cpu_data arg)
 {
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
     cpu_synchronize_state(cs);
-    ppc_cpu_do_system_reset(cs, -1);
+    /* If FWNMI is inactive, addr will be -1, which will deliver to 0x100 */
+    if (spapr->fwnmi_system_reset_addr != -1) {
+        uint64_t rtas_addr, addr;
+        PowerPCCPU *cpu = POWERPC_CPU(cs);
+        CPUPPCState *env = &cpu->env;
+
+        /* get rtas addr from fdt */
+        rtas_addr = spapr_get_rtas_addr();
+        if (!rtas_addr) {
+            qemu_system_guest_panicked(NULL);
+            return;
+        }
+
+        addr = rtas_addr + RTAS_ERROR_LOG_MAX + cs->cpu_index * sizeof(uint64_t)*2;
+        stq_be_phys(&address_space_memory, addr, env->gpr[3]);
+        stq_be_phys(&address_space_memory, addr + sizeof(uint64_t), 0);
+        env->gpr[3] = addr;
+    }
+    ppc_cpu_do_system_reset(cs, spapr->fwnmi_system_reset_addr);
 }
 
 static void spapr_nmi(NMIState *n, int cpu_index, Error **errp)