diff mbox series

[v8,06/13] acpi/ghes: add support for generic error injection via QAPI

Message ID 2c8970b5d54d17b601dc65d778cc8b5fb288984b.1723793768.git.mchehab+huawei@kernel.org (mailing list archive)
State New, archived
Headers show
Series Add ACPI CPER firmware first error injection on ARM emulation | expand

Commit Message

Mauro Carvalho Chehab Aug. 16, 2024, 7:37 a.m. UTC
Provide a generic interface for error injection via GHESv2.

This patch is co-authored:
    - original ghes logic to inject a simple ARM record by Shiju Jose;
    - generic logic to handle block addresses by Jonathan Cameron;
    - generic GHESv2 error inject by Mauro Carvalho Chehab;

Co-authored-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Co-authored-by: Shiju Jose <shiju.jose@huawei.com>
Co-authored-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 hw/acpi/ghes.c      | 57 +++++++++++++++++++++++++++++++++++++++++++++
 hw/acpi/ghes_cper.c |  2 +-
 2 files changed, 58 insertions(+), 1 deletion(-)

Comments

Igor Mammedov Aug. 19, 2024, 12:51 p.m. UTC | #1
On Fri, 16 Aug 2024 09:37:38 +0200
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:

> Provide a generic interface for error injection via GHESv2.
> 
> This patch is co-authored:
>     - original ghes logic to inject a simple ARM record by Shiju Jose;
>     - generic logic to handle block addresses by Jonathan Cameron;
>     - generic GHESv2 error inject by Mauro Carvalho Chehab;
> 
> Co-authored-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Co-authored-by: Shiju Jose <shiju.jose@huawei.com>
> Co-authored-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
> ---
>  hw/acpi/ghes.c      | 57 +++++++++++++++++++++++++++++++++++++++++++++
>  hw/acpi/ghes_cper.c |  2 +-
>  2 files changed, 58 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
> index 7870f51e2a9e..a3ae710dcf81 100644
> --- a/hw/acpi/ghes.c
> +++ b/hw/acpi/ghes.c
> @@ -500,6 +500,63 @@ int acpi_ghes_record_errors(enum AcpiGhesNotifyType notify,
>  NotifierList acpi_generic_error_notifiers =
>      NOTIFIER_LIST_INITIALIZER(error_device_notifiers);
>  
> +void ghes_record_cper_errors(uint8_t *cper, size_t len,
> +                             enum AcpiGhesNotifyType notify, Error **errp)
> +{
> +    uint64_t cper_addr, read_ack_start_addr;
> +    enum AcpiHestSourceId source;
> +    AcpiGedState *acpi_ged_state;
> +    AcpiGhesState *ags;
> +    uint64_t read_ack;
> +
> +    if (ghes_notify_to_source_id(notify, &source)) {
> +        error_setg(errp,
> +                   "GHES: Invalid error block/ack address(es) for notify %d",
> +                   notify);
> +        return;
> +    }
> +
> +    acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
> +                                                       NULL));
> +    g_assert(acpi_ged_state);
> +    ags = &acpi_ged_state->ghes_state;
> +
> +    cper_addr = le64_to_cpu(ags->ghes_addr_le);
       ^^^ suggest to rename to error_block_address
       that way reader can easily match it with spec.

> +    cper_addr += ACPI_HEST_SRC_ID_COUNT * sizeof(uint64_t);
and it would be better to merge this with previous line to be more clear
 + to avoid shifting meaning of variable between lines.

> +    read_ack_start_addr = cper_addr + source * sizeof(uint64_t);

> +    cper_addr += ACPI_HEST_SRC_ID_COUNT * sizeof(uint64_t);
> +    cper_addr += source * ACPI_GHES_MAX_RAW_DATA_LENGTH;
I'd avoid changing meaning of variable, it adds up to confusion.
Anyway, what the point of of above math?  

> +
> +    cpu_physical_memory_read(read_ack_start_addr,
> +                             &read_ack, sizeof(uint64_t));
s/sizeof(uint64_t)/sizeof(read_ack)/
ditto elsewhere

> +
> +    /* zero means OSPM does not acknowledge the error */
> +    if (!read_ack) {
> +        error_setg(errp,
> +                   "Last CPER record was not acknowledged yet");

> +        read_ack = 1;
> +        cpu_physical_memory_write(read_ack_start_addr,
> +                                  &read_ack, (uint64_t));
we don't do this for SEV so, why are you setting it to 1 here?


> +        return;
> +    }
> +
> +    read_ack = cpu_to_le64(0);
> +    cpu_physical_memory_write(read_ack_start_addr,
> +                              &read_ack, sizeof(uint64_t));
> +
> +    /* Build CPER record */
> +
> +    if (len > ACPI_GHES_MAX_RAW_DATA_LENGTH) {
> +        error_setg(errp, "GHES CPER record is too big: %ld", len);
> +    }
move check at start of function?

> +
> +    /* Write the generic error data entry into guest memory */
> +    cpu_physical_memory_write(cper_addr, cper, len);
> +
> +    notifier_list_notify(&acpi_generic_error_notifiers, NULL);
> +}
> +
>  bool acpi_ghes_present(void)
>  {
>      AcpiGedState *acpi_ged_state;
> diff --git a/hw/acpi/ghes_cper.c b/hw/acpi/ghes_cper.c
> index 92ca84d738de..2328dbff7012 100644
> --- a/hw/acpi/ghes_cper.c
> +++ b/hw/acpi/ghes_cper.c
> @@ -29,5 +29,5 @@ void qmp_ghes_cper(const char *qmp_cper,
>          return;
>      }
>  
> -    /* TODO: call a function at ghes */
> +    ghes_record_cper_errors(cper, len, ACPI_GHES_NOTIFY_GPIO, errp);
>  }
Mauro Carvalho Chehab Aug. 25, 2024, 3:29 a.m. UTC | #2
Em Mon, 19 Aug 2024 14:51:36 +0200
Igor Mammedov <imammedo@redhat.com> escreveu:

> > +        read_ack = 1;
> > +        cpu_physical_memory_write(read_ack_start_addr,
> > +                                  &read_ack, (uint64_t));  
> we don't do this for SEV so, why are you setting it to 1 here?

According with:
https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-hardware-error-source-version-2-ghesv2-type-10

   "These are the steps the OS must take once detecting an error from a particular GHESv2 error source:

    OSPM detects error (via interrupt/exception or polling the block status)

    OSPM copies the error status block

    OSPM clears the block status field of the error status block

    OSPM acknowledges the error via Read Ack register. For example:

        OSPM reads the Read Ack register –> X

        OSPM writes –> (( X & ReadAckPreserve) | ReadAckWrite)"


So, basically the guest OS takes some time to detect that an error
is raised. When it detects, it needs to mark that the error was
handled.

IMO, this is needed, independently of the notification mechanism.

Regards,
Mauro
Igor Mammedov Sept. 11, 2024, 1:21 p.m. UTC | #3
On Sun, 25 Aug 2024 05:29:23 +0200
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:

> Em Mon, 19 Aug 2024 14:51:36 +0200
> Igor Mammedov <imammedo@redhat.com> escreveu:
> 
> > > +        read_ack = 1;
> > > +        cpu_physical_memory_write(read_ack_start_addr,
> > > +                                  &read_ack, (uint64_t));    
> > we don't do this for SEV so, why are you setting it to 1 here?  
> 
> According with:
> https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-hardware-error-source-version-2-ghesv2-type-10
> 
>    "These are the steps the OS must take once detecting an error from a particular GHESv2 error source:
> 
>     OSPM detects error (via interrupt/exception or polling the block status)
> 
>     OSPM copies the error status block
> 
>     OSPM clears the block status field of the error status block
> 
>     OSPM acknowledges the error via Read Ack register. For example:
> 
>         OSPM reads the Read Ack register –> X
> 
>         OSPM writes –> (( X & ReadAckPreserve) | ReadAckWrite)"
> 
> 
> So, basically the guest OS takes some time to detect that an error
> is raised. When it detects, it needs to mark that the error was
> handled.

what you are doing here by setting read_ack = 1,
is making ack on behalf of OSPM when OSPM haven't handled existing error yet.

Essentially making HW/FW do the job of OSPM. That looks wrong to me.
From HW/FW side read_ack register should be thought as read-only.

> 
> IMO, this is needed, independently of the notification mechanism.
> 
> Regards,
> Mauro
>
Jonathan Cameron Sept. 11, 2024, 3:34 p.m. UTC | #4
On Wed, 11 Sep 2024 15:21:32 +0200
Igor Mammedov <imammedo@redhat.com> wrote:

> On Sun, 25 Aug 2024 05:29:23 +0200
> Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
> 
> > Em Mon, 19 Aug 2024 14:51:36 +0200
> > Igor Mammedov <imammedo@redhat.com> escreveu:
> >   
> > > > +        read_ack = 1;
> > > > +        cpu_physical_memory_write(read_ack_start_addr,
> > > > +                                  &read_ack, (uint64_t));      
> > > we don't do this for SEV so, why are you setting it to 1 here?    
> > 
> > According with:
> > https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-hardware-error-source-version-2-ghesv2-type-10
> > 
> >    "These are the steps the OS must take once detecting an error from a particular GHESv2 error source:
> > 
> >     OSPM detects error (via interrupt/exception or polling the block status)
> > 
> >     OSPM copies the error status block
> > 
> >     OSPM clears the block status field of the error status block
> > 
> >     OSPM acknowledges the error via Read Ack register. For example:
> > 
> >         OSPM reads the Read Ack register –> X
> > 
> >         OSPM writes –> (( X & ReadAckPreserve) | ReadAckWrite)"
> > 
> > 
> > So, basically the guest OS takes some time to detect that an error
> > is raised. When it detects, it needs to mark that the error was
> > handled.  
> 
> what you are doing here by setting read_ack = 1,
> is making ack on behalf of OSPM when OSPM haven't handled existing error yet.
> 
> Essentially making HW/FW do the job of OSPM. That looks wrong to me.
> From HW/FW side read_ack register should be thought as read-only.

It's not read-only because HW/FW has to clear it so that HW/FW can detect
when the OSPM next writes it.

Agreed this write to 1 looks wrong, but the one a few lines further down (to zero
it) is correct.

My bug a long time back I think.

Jonathan

> 
> > 
> > IMO, this is needed, independently of the notification mechanism.
> > 
> > Regards,
> > Mauro
> >   
> 
>
Igor Mammedov Sept. 12, 2024, 12:42 p.m. UTC | #5
On Wed, 11 Sep 2024 16:34:36 +0100
Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote:

> On Wed, 11 Sep 2024 15:21:32 +0200
> Igor Mammedov <imammedo@redhat.com> wrote:
> 
> > On Sun, 25 Aug 2024 05:29:23 +0200
> > Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
> >   
> > > Em Mon, 19 Aug 2024 14:51:36 +0200
> > > Igor Mammedov <imammedo@redhat.com> escreveu:
> > >     
> > > > > +        read_ack = 1;
> > > > > +        cpu_physical_memory_write(read_ack_start_addr,
> > > > > +                                  &read_ack, (uint64_t));        
> > > > we don't do this for SEV so, why are you setting it to 1 here?      
> > > 
> > > According with:
> > > https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-hardware-error-source-version-2-ghesv2-type-10
> > > 
> > >    "These are the steps the OS must take once detecting an error from a particular GHESv2 error source:
> > > 
> > >     OSPM detects error (via interrupt/exception or polling the block status)
> > > 
> > >     OSPM copies the error status block
> > > 
> > >     OSPM clears the block status field of the error status block
> > > 
> > >     OSPM acknowledges the error via Read Ack register. For example:
> > > 
> > >         OSPM reads the Read Ack register –> X
> > > 
> > >         OSPM writes –> (( X & ReadAckPreserve) | ReadAckWrite)"
> > > 
> > > 
> > > So, basically the guest OS takes some time to detect that an error
> > > is raised. When it detects, it needs to mark that the error was
> > > handled.    
> > 
> > what you are doing here by setting read_ack = 1,
> > is making ack on behalf of OSPM when OSPM haven't handled existing error yet.
> > 
> > Essentially making HW/FW do the job of OSPM. That looks wrong to me.
> > From HW/FW side read_ack register should be thought as read-only.  
> 
> It's not read-only because HW/FW has to clear it so that HW/FW can detect
> when the OSPM next writes it.

By readonly, I've meant that hw shall not do above mentioned write
(bad phrasing on my side).

> 
> Agreed this write to 1 looks wrong, but the one a few lines further down (to zero
> it) is correct.

yep, hw should clear register.
It would be better to so on OSPM ACK, but alas we can't intercept that,
so the next option would be to do that at the time when we add a new error block

> 
> My bug a long time back I think.
> 
> Jonathan
> 
> >   
> > > 
> > > IMO, this is needed, independently of the notification mechanism.
> > > 
> > > Regards,
> > > Mauro
> > >     
> > 
> >   
>
Mauro Carvalho Chehab Sept. 13, 2024, 5:20 a.m. UTC | #6
Em Thu, 12 Sep 2024 14:42:33 +0200
Igor Mammedov <imammedo@redhat.com> escreveu:

> On Wed, 11 Sep 2024 16:34:36 +0100
> Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote:
> 
> > On Wed, 11 Sep 2024 15:21:32 +0200
> > Igor Mammedov <imammedo@redhat.com> wrote:
> > 
> > > On Sun, 25 Aug 2024 05:29:23 +0200
> > > Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
> > >   
> > > > Em Mon, 19 Aug 2024 14:51:36 +0200
> > > > Igor Mammedov <imammedo@redhat.com> escreveu:
> > > >     
> > > > > > +        read_ack = 1;
> > > > > > +        cpu_physical_memory_write(read_ack_start_addr,
> > > > > > +                                  &read_ack, (uint64_t));        
> > > > > we don't do this for SEV so, why are you setting it to 1 here?    

The diffstat doesn't really help here. The full code is:

    /* zero means OSPM does not acknowledge the error */
    if (!read_ack) {
        error_setg(errp,
                   "Last CPER record was not acknowledged yet");
        read_ack = 1;
        cpu_physical_memory_write(read_ack_start_addr,
                                  &read_ack, sizeof(read_ack));
        return;
    }

> > > what you are doing here by setting read_ack = 1,
> > > is making ack on behalf of OSPM when OSPM haven't handled existing error yet.
> > > 
> > > Essentially making HW/FW do the job of OSPM. That looks wrong to me.
> > > From HW/FW side read_ack register should be thought as read-only.  
> > 
> > It's not read-only because HW/FW has to clear it so that HW/FW can detect
> > when the OSPM next writes it.
> 
> By readonly, I've meant that hw shall not do above mentioned write
> (bad phrasing on my side).

The above code is actually an error handling condition: if for some
reason errors are triggered too fast, there's a bug on QEMU or there is
a bug at the OSPM, an error message is raised and the logic resets the 
record to a sane state. So, on a next error, OSPM will get it.

As described at https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html?highlight=asynchronous#generic-hardware-error-source:

   "Some platforms may describe multiple Generic Hardware Error Source
    structures with different notification types, as defined in 
    Table 18.10. For example, a platform may describe one error source
    for the handling of synchronous errors (e.g. MCE or SEA), and a 
    second source for handling asynchronous errors (e.g. SCI or
    External Interrupt)."

Basically, the error logic there seems to fit for the asynchronous
case, detecting if another error happened before OSPM handles the
first one.

IMO, there are a couple of alternatives to handle such case:

1. Keep the code as-is: if this ever happens, an error message will
   be issued. If SEA/MCE gets implemented synchronously on HW/FW/OSPM,
   the above code will never be called;
2. Change the logic to do that only for asynchronous sources
   (currently, only if source ID is QMP);
3. Add a special QMP message to reset the notification ack. Probably
   would use Notification type as an input parameter;
4. Have a much more complex code to implement asynchronous notifications,
   with a queue to receive HEST errors and a separate thread to deliver
   errors to OSPM asynchronously. If we go this way, QMP would be
   returning the number of error messages queued, allowing error injection
   code to know if OSPM has troubles delivering errors;
5. Just return an error code without doing any resets. To me, this is 
   the worse scenario.

I don't like (5), as if something bad happens, there's nothing to be
done.

For QMP error injection (4) seems is overkill. It may be needed in the
future if we end implementing a logic where host OS informs guest about
hardware problems, and such errors use asynchronous notifications.

I would also avoid implementing (3) at least for now, as reporting
such error via QMP seems enough for the QMP usecase.

So, if ok for you, I'll change the code to (2).


> > Agreed this write to 1 looks wrong, but the one a few lines further down (to zero
> > it) is correct.
> 
> yep, hw should clear register.
> It would be better to so on OSPM ACK, but alas we can't intercept that,
> so the next option would be to do that at the time when we add a new error block
> 
> > 
> > My bug a long time back I think.
> > 
> > Jonathan
> > 
> > >   
> > > > 
> > > > IMO, this is needed, independently of the notification mechanism.
> > > > 
> > > > Regards,
> > > > Mauro
> > > >     
> > > 
> > >   
> > 
> 



Thanks,
Mauro
Jonathan Cameron Sept. 13, 2024, 10:13 a.m. UTC | #7
On Fri, 13 Sep 2024 07:20:25 +0200
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:

> Em Thu, 12 Sep 2024 14:42:33 +0200
> Igor Mammedov <imammedo@redhat.com> escreveu:
> 
> > On Wed, 11 Sep 2024 16:34:36 +0100
> > Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote:
> >   
> > > On Wed, 11 Sep 2024 15:21:32 +0200
> > > Igor Mammedov <imammedo@redhat.com> wrote:
> > >   
> > > > On Sun, 25 Aug 2024 05:29:23 +0200
> > > > Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
> > > >     
> > > > > Em Mon, 19 Aug 2024 14:51:36 +0200
> > > > > Igor Mammedov <imammedo@redhat.com> escreveu:
> > > > >       
> > > > > > > +        read_ack = 1;
> > > > > > > +        cpu_physical_memory_write(read_ack_start_addr,
> > > > > > > +                                  &read_ack, (uint64_t));          
> > > > > > we don't do this for SEV so, why are you setting it to 1 here?      
> 
> The diffstat doesn't really help here. The full code is:
> 
>     /* zero means OSPM does not acknowledge the error */
>     if (!read_ack) {
>         error_setg(errp,
>                    "Last CPER record was not acknowledged yet");
>         read_ack = 1;
>         cpu_physical_memory_write(read_ack_start_addr,
>                                   &read_ack, sizeof(read_ack));
>         return;
>     }
> 
> > > > what you are doing here by setting read_ack = 1,
> > > > is making ack on behalf of OSPM when OSPM haven't handled existing error yet.
> > > > 
> > > > Essentially making HW/FW do the job of OSPM. That looks wrong to me.
> > > > From HW/FW side read_ack register should be thought as read-only.    
> > > 
> > > It's not read-only because HW/FW has to clear it so that HW/FW can detect
> > > when the OSPM next writes it.  
> > 
> > By readonly, I've meant that hw shall not do above mentioned write
> > (bad phrasing on my side).  
> 
> The above code is actually an error handling condition: if for some
> reason errors are triggered too fast, there's a bug on QEMU or there is
> a bug at the OSPM, an error message is raised and the logic resets the 
> record to a sane state. So, on a next error, OSPM will get it.
> 
> As described at https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html?highlight=asynchronous#generic-hardware-error-source:
> 
>    "Some platforms may describe multiple Generic Hardware Error Source
>     structures with different notification types, as defined in 
>     Table 18.10. For example, a platform may describe one error source
>     for the handling of synchronous errors (e.g. MCE or SEA), and a 
>     second source for handling asynchronous errors (e.g. SCI or
>     External Interrupt)."
> 
> Basically, the error logic there seems to fit for the asynchronous
> case, detecting if another error happened before OSPM handles the
> first one.

Agreed - the error logic to act as backpressure for the tool injecting
the error makes sense - it's just hardware acknowledging to paper
over slow software that is an issue.

> 
> IMO, there are a couple of alternatives to handle such case:
> 
> 1. Keep the code as-is: if this ever happens, an error message will
>    be issued. If SEA/MCE gets implemented synchronously on HW/FW/OSPM,
>    the above code will never be called;
> 2. Change the logic to do that only for asynchronous sources
>    (currently, only if source ID is QMP);
> 3. Add a special QMP message to reset the notification ack. Probably
>    would use Notification type as an input parameter;
> 4. Have a much more complex code to implement asynchronous notifications,
>    with a queue to receive HEST errors and a separate thread to deliver
>    errors to OSPM asynchronously. If we go this way, QMP would be
>    returning the number of error messages queued, allowing error injection
>    code to know if OSPM has troubles delivering errors;

Is this not better done in the injection code outside of qemu?
So detect the error in that and if it happens back off and try again
later?  Basically EBUSY done in an inelegant way.

> 5. Just return an error code without doing any resets. To me, this is 
>    the worse scenario.
> 
> I don't like (5), as if something bad happens, there's nothing to be
> done.

If it happens on a real system nothing is done either. So I'm not sure
we need to handle that.  Or maybe real hardware reinjects the interrupt
if the OSPM hasn't done anything about it for a while.

> 
> For QMP error injection (4) seems is overkill. It may be needed in the
> future if we end implementing a logic where host OS informs guest about
> hardware problems, and such errors use asynchronous notifications.
> 
> I would also avoid implementing (3) at least for now, as reporting
> such error via QMP seems enough for the QMP usecase.
> 
> So, if ok for you, I'll change the code to (2).

Whilst I don't feel strongly about it, I think 5 is unfortunately the
correct option if we aren't going to queue errors in qemu (so make it
an injection tool problem).

> 
> 
> > > Agreed this write to 1 looks wrong, but the one a few lines further down (to zero
> > > it) is correct.  
> > 
> > yep, hw should clear register.
> > It would be better to so on OSPM ACK, but alas we can't intercept that,
> > so the next option would be to do that at the time when we add a new error block
> >   
> > > 
> > > My bug a long time back I think.
> > > 
> > > Jonathan
> > >   
> > > >     
> > > > > 
> > > > > IMO, this is needed, independently of the notification mechanism.
> > > > > 
> > > > > Regards,
> > > > > Mauro
> > > > >       
> > > > 
> > > >     
> > >   
> >   
> 
> 
> 
> Thanks,
> Mauro
Igor Mammedov Sept. 13, 2024, 12:28 p.m. UTC | #8
On Fri, 13 Sep 2024 11:13:00 +0100
Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote:

> On Fri, 13 Sep 2024 07:20:25 +0200
> Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
> 
> > Em Thu, 12 Sep 2024 14:42:33 +0200
> > Igor Mammedov <imammedo@redhat.com> escreveu:
> >   
> > > On Wed, 11 Sep 2024 16:34:36 +0100
> > > Jonathan Cameron <Jonathan.Cameron@Huawei.com> wrote:
> > >     
> > > > On Wed, 11 Sep 2024 15:21:32 +0200
> > > > Igor Mammedov <imammedo@redhat.com> wrote:
> > > >     
> > > > > On Sun, 25 Aug 2024 05:29:23 +0200
> > > > > Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:
> > > > >       
> > > > > > Em Mon, 19 Aug 2024 14:51:36 +0200
> > > > > > Igor Mammedov <imammedo@redhat.com> escreveu:
> > > > > >         
> > > > > > > > +        read_ack = 1;
> > > > > > > > +        cpu_physical_memory_write(read_ack_start_addr,
> > > > > > > > +                                  &read_ack, (uint64_t));            
> > > > > > > we don't do this for SEV so, why are you setting it to 1 here?        
> > 
> > The diffstat doesn't really help here. The full code is:
> > 
> >     /* zero means OSPM does not acknowledge the error */
> >     if (!read_ack) {
> >         error_setg(errp,
> >                    "Last CPER record was not acknowledged yet");
> >         read_ack = 1;
> >         cpu_physical_memory_write(read_ack_start_addr,
> >                                   &read_ack, sizeof(read_ack));
> >         return;
> >     }
> >   
> > > > > what you are doing here by setting read_ack = 1,
> > > > > is making ack on behalf of OSPM when OSPM haven't handled existing error yet.
> > > > > 
> > > > > Essentially making HW/FW do the job of OSPM. That looks wrong to me.
> > > > > From HW/FW side read_ack register should be thought as read-only.      
> > > > 
> > > > It's not read-only because HW/FW has to clear it so that HW/FW can detect
> > > > when the OSPM next writes it.    
> > > 
> > > By readonly, I've meant that hw shall not do above mentioned write
> > > (bad phrasing on my side).    
> > 
> > The above code is actually an error handling condition: if for some
> > reason errors are triggered too fast, there's a bug on QEMU or there is
> > a bug at the OSPM, an error message is raised and the logic resets the 
> > record to a sane state. So, on a next error, OSPM will get it.
> > 
> > As described at https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html?highlight=asynchronous#generic-hardware-error-source:
> > 
> >    "Some platforms may describe multiple Generic Hardware Error Source
> >     structures with different notification types, as defined in 
> >     Table 18.10. For example, a platform may describe one error source
> >     for the handling of synchronous errors (e.g. MCE or SEA), and a 
> >     second source for handling asynchronous errors (e.g. SCI or
> >     External Interrupt)."
> > 
> > Basically, the error logic there seems to fit for the asynchronous
> > case, detecting if another error happened before OSPM handles the
> > first one.  
> 
> Agreed - the error logic to act as backpressure for the tool injecting
> the error makes sense - it's just hardware acknowledging to paper
> over slow software that is an issue.

on top of that, read_ack is serving as sync primitive
If one disregards it and starts overwriting error block regardless of
ack value, One will be inducing race condition, where OSPM might be
accessing error_block while HW is in process of overwriting it.

> 
> > 
> > IMO, there are a couple of alternatives to handle such case:
> > 
> > 1. Keep the code as-is: if this ever happens, an error message will
> >    be issued. If SEA/MCE gets implemented synchronously on HW/FW/OSPM,
> >    the above code will never be called;
> > 2. Change the logic to do that only for asynchronous sources
> >    (currently, only if source ID is QMP);
> > 3. Add a special QMP message to reset the notification ack. Probably
> >    would use Notification type as an input parameter;
> > 4. Have a much more complex code to implement asynchronous notifications,
> >    with a queue to receive HEST errors and a separate thread to deliver
> >    errors to OSPM asynchronously. If we go this way, QMP would be
> >    returning the number of error messages queued, allowing error injection
> >    code to know if OSPM has troubles delivering errors;  
> 
> Is this not better done in the injection code outside of qemu?
> So detect the error in that and if it happens back off and try again
> later?  Basically EBUSY done in an inelegant way.
> 
> > 5. Just return an error code without doing any resets. To me, this is 
> >    the worse scenario.
> > 
> > I don't like (5), as if something bad happens, there's nothing to be
> > done.  
> 
> If it happens on a real system nothing is done either. So I'm not sure
> we need to handle that.  Or maybe real hardware reinjects the interrupt
> if the OSPM hasn't done anything about it for a while.
> 
> > 
> > For QMP error injection (4) seems is overkill. It may be needed in the
> > future if we end implementing a logic where host OS informs guest about
> > hardware problems, and such errors use asynchronous notifications.
> > 
> > I would also avoid implementing (3) at least for now, as reporting
> > such error via QMP seems enough for the QMP usecase.
> > 
> > So, if ok for you, I'll change the code to (2).  
> 
> Whilst I don't feel strongly about it, I think 5 is unfortunately the
> correct option if we aren't going to queue errors in qemu (so make it
> an injection tool problem).

+1 to option (5)

> > 
> >   
> > > > Agreed this write to 1 looks wrong, but the one a few lines further down (to zero
> > > > it) is correct.    
> > > 
> > > yep, hw should clear register.
> > > It would be better to so on OSPM ACK, but alas we can't intercept that,
> > > so the next option would be to do that at the time when we add a new error block
> > >     
> > > > 
> > > > My bug a long time back I think.
> > > > 
> > > > Jonathan
> > > >     
> > > > >       
> > > > > > 
> > > > > > IMO, this is needed, independently of the notification mechanism.
> > > > > > 
> > > > > > Regards,
> > > > > > Mauro
> > > > > >         
> > > > > 
> > > > >       
> > > >     
> > >     
> > 
> > 
> > 
> > Thanks,
> > Mauro  
>
Mauro Carvalho Chehab Sept. 14, 2024, 5:38 a.m. UTC | #9
Em Fri, 13 Sep 2024 14:28:02 +0200
Igor Mammedov <imammedo@redhat.com> escreveu:

> > > 5. Just return an error code without doing any resets. To me, this is 
> > >    the worse scenario.
> > > 
> > > I don't like (5), as if something bad happens, there's nothing to be
> > > done.    
> > 
> > If it happens on a real system nothing is done either. So I'm not sure
> > we need to handle that.  Or maybe real hardware reinjects the interrupt
> > if the OSPM hasn't done anything about it for a while.
> >   
> > > 
> > > For QMP error injection (4) seems is overkill. It may be needed in the
> > > future if we end implementing a logic where host OS informs guest about
> > > hardware problems, and such errors use asynchronous notifications.
> > > 
> > > I would also avoid implementing (3) at least for now, as reporting
> > > such error via QMP seems enough for the QMP usecase.
> > > 
> > > So, if ok for you, I'll change the code to (2).    
> > 
> > Whilst I don't feel strongly about it, I think 5 is unfortunately the
> > correct option if we aren't going to queue errors in qemu (so make it
> > an injection tool problem).  
> 
> +1 to option (5)

Ok, will do (5) then.

Thanks,
Mauro
diff mbox series

Patch

diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c
index 7870f51e2a9e..a3ae710dcf81 100644
--- a/hw/acpi/ghes.c
+++ b/hw/acpi/ghes.c
@@ -500,6 +500,63 @@  int acpi_ghes_record_errors(enum AcpiGhesNotifyType notify,
 NotifierList acpi_generic_error_notifiers =
     NOTIFIER_LIST_INITIALIZER(error_device_notifiers);
 
+void ghes_record_cper_errors(uint8_t *cper, size_t len,
+                             enum AcpiGhesNotifyType notify, Error **errp)
+{
+    uint64_t cper_addr, read_ack_start_addr;
+    enum AcpiHestSourceId source;
+    AcpiGedState *acpi_ged_state;
+    AcpiGhesState *ags;
+    uint64_t read_ack;
+
+    if (ghes_notify_to_source_id(notify, &source)) {
+        error_setg(errp,
+                   "GHES: Invalid error block/ack address(es) for notify %d",
+                   notify);
+        return;
+    }
+
+    acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED,
+                                                       NULL));
+    g_assert(acpi_ged_state);
+    ags = &acpi_ged_state->ghes_state;
+
+    cper_addr = le64_to_cpu(ags->ghes_addr_le);
+    cper_addr += ACPI_HEST_SRC_ID_COUNT * sizeof(uint64_t);
+    read_ack_start_addr = cper_addr + source * sizeof(uint64_t);
+
+    cper_addr += ACPI_HEST_SRC_ID_COUNT * sizeof(uint64_t);
+    cper_addr += source * ACPI_GHES_MAX_RAW_DATA_LENGTH;
+
+    cpu_physical_memory_read(read_ack_start_addr,
+                             &read_ack, sizeof(uint64_t));
+
+    /* zero means OSPM does not acknowledge the error */
+    if (!read_ack) {
+        error_setg(errp,
+                   "Last CPER record was not acknowledged yet");
+        read_ack = 1;
+        cpu_physical_memory_write(read_ack_start_addr,
+                                  &read_ack, sizeof(uint64_t));
+        return;
+    }
+
+    read_ack = cpu_to_le64(0);
+    cpu_physical_memory_write(read_ack_start_addr,
+                              &read_ack, sizeof(uint64_t));
+
+    /* Build CPER record */
+
+    if (len > ACPI_GHES_MAX_RAW_DATA_LENGTH) {
+        error_setg(errp, "GHES CPER record is too big: %ld", len);
+    }
+
+    /* Write the generic error data entry into guest memory */
+    cpu_physical_memory_write(cper_addr, cper, len);
+
+    notifier_list_notify(&acpi_generic_error_notifiers, NULL);
+}
+
 bool acpi_ghes_present(void)
 {
     AcpiGedState *acpi_ged_state;
diff --git a/hw/acpi/ghes_cper.c b/hw/acpi/ghes_cper.c
index 92ca84d738de..2328dbff7012 100644
--- a/hw/acpi/ghes_cper.c
+++ b/hw/acpi/ghes_cper.c
@@ -29,5 +29,5 @@  void qmp_ghes_cper(const char *qmp_cper,
         return;
     }
 
-    /* TODO: call a function at ghes */
+    ghes_record_cper_errors(cper, len, ACPI_GHES_NOTIFY_GPIO, errp);
 }