Message ID | bed4b2da51e0c894cc255f712b67e2e57295d826.1726293808.git.mchehab+huawei@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Add ACPI CPER firmware first error injection on ARM emulation | expand |
On Sat, 14 Sep 2024 08:13:23 +0200 Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote: > The GHES migration logic at GED should now support HEST table > location too. > > Increase migration version and change needed to check for both > ghes_addr_le and hest_addr_le But I don't think it will work like this (but I might be easily wrong) However I don't know enough to properly review this patch, CCing Peter & Fabiano > Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> > --- > hw/acpi/generic_event_device.c | 11 ++++++----- > 1 file changed, 6 insertions(+), 5 deletions(-) > > diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c > index 15b4c3ebbf24..4e5e387ee2df 100644 > --- a/hw/acpi/generic_event_device.c > +++ b/hw/acpi/generic_event_device.c > @@ -343,10 +343,11 @@ static const VMStateDescription vmstate_ged_state = { > > static const VMStateDescription vmstate_ghes = { > .name = "acpi-ghes", > - .version_id = 1, > - .minimum_version_id = 1, > + .version_id = 2, > + .minimum_version_id = 2, > .fields = (const VMStateField[]) { > VMSTATE_UINT64(ghes_addr_le, AcpiGhesState), > + VMSTATE_UINT64(hest_addr_le, AcpiGhesState), > VMSTATE_END_OF_LIST() > }, > }; > @@ -354,13 +355,13 @@ static const VMStateDescription vmstate_ghes = { > static bool ghes_needed(void *opaque) > { > AcpiGedState *s = opaque; > - return s->ghes_state.ghes_addr_le; > + return s->ghes_state.ghes_addr_le && s->ghes_state.hest_addr_le; > } what I would do: add ghes_needed_v2(): return s->ghes_state.hest_addr_le; and then instead of reusing vmstate_ghes_state would add new vmstate_ghes_v2_state subsection that migrates only VMSTATE_UINT64(hest_addr_le, AcpiGhesState) field. btw: we probably don't need ghes_addr_le for new code that uses HEST to lookup relevant error status block. but we should still keep it for 9.1 and older machine types as they expect/use it. Separate subsections would work with this req just fine. > static const VMStateDescription vmstate_ghes_state = { > .name = "acpi-ged/ghes", > - .version_id = 1, > - .minimum_version_id = 1, > + .version_id = 2, > + .minimum_version_id = 2, > .needed = ghes_needed, > .fields = (const VMStateField[]) { > VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
On Sat, 14 Sep 2024 08:13:23 +0200 Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote: > The GHES migration logic at GED should now support HEST table > location too. > > Increase migration version and change needed to check for both > ghes_addr_le and hest_addr_le. > > Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> > --- > hw/acpi/generic_event_device.c | 11 ++++++----- > 1 file changed, 6 insertions(+), 5 deletions(-) > > diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c > index 15b4c3ebbf24..4e5e387ee2df 100644 > --- a/hw/acpi/generic_event_device.c > +++ b/hw/acpi/generic_event_device.c > @@ -343,10 +343,11 @@ static const VMStateDescription vmstate_ged_state = { > > static const VMStateDescription vmstate_ghes = { > .name = "acpi-ghes", > - .version_id = 1, > - .minimum_version_id = 1, > + .version_id = 2, > + .minimum_version_id = 2, > .fields = (const VMStateField[]) { > VMSTATE_UINT64(ghes_addr_le, AcpiGhesState), > + VMSTATE_UINT64(hest_addr_le, AcpiGhesState), > VMSTATE_END_OF_LIST() > }, > }; > @@ -354,13 +355,13 @@ static const VMStateDescription vmstate_ghes = { > static bool ghes_needed(void *opaque) > { > AcpiGedState *s = opaque; > - return s->ghes_state.ghes_addr_le; ^^^^^^^^^^^^ another thing, perhaps we should rename it to 'hardware_errors_addr' to make it less confusing > + return s->ghes_state.ghes_addr_le && s->ghes_state.hest_addr_le; > } > > static const VMStateDescription vmstate_ghes_state = { > .name = "acpi-ged/ghes", > - .version_id = 1, > - .minimum_version_id = 1, > + .version_id = 2, > + .minimum_version_id = 2, > .needed = ghes_needed, > .fields = (const VMStateField[]) { > VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
On Tue, Sep 17, 2024 at 11:19:21AM +0200, Igor Mammedov wrote: > On Sat, 14 Sep 2024 08:13:23 +0200 > Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote: > > > The GHES migration logic at GED should now support HEST table > > location too. > > > > Increase migration version and change needed to check for both > > ghes_addr_le and hest_addr_le > But I don't think it will work like this (but I might be easily wrong) > However I don't know enough to properly review this patch, CCing Peter & Fabiano > > > Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> > > --- > > hw/acpi/generic_event_device.c | 11 ++++++----- > > 1 file changed, 6 insertions(+), 5 deletions(-) > > > > diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c > > index 15b4c3ebbf24..4e5e387ee2df 100644 > > --- a/hw/acpi/generic_event_device.c > > +++ b/hw/acpi/generic_event_device.c > > @@ -343,10 +343,11 @@ static const VMStateDescription vmstate_ged_state = { > > > > static const VMStateDescription vmstate_ghes = { > > .name = "acpi-ghes", > > - .version_id = 1, > > - .minimum_version_id = 1, > > + .version_id = 2, > > + .minimum_version_id = 2, > > .fields = (const VMStateField[]) { > > VMSTATE_UINT64(ghes_addr_le, AcpiGhesState), > > + VMSTATE_UINT64(hest_addr_le, AcpiGhesState), > > VMSTATE_END_OF_LIST() > > }, > > }; > > @@ -354,13 +355,13 @@ static const VMStateDescription vmstate_ghes = { > > static bool ghes_needed(void *opaque) > > { > > AcpiGedState *s = opaque; > > - return s->ghes_state.ghes_addr_le; > > + return s->ghes_state.ghes_addr_le && s->ghes_state.hest_addr_le; > > } > > what I would do: > add ghes_needed_v2(): return s->ghes_state.hest_addr_le; > > and then instead of reusing vmstate_ghes_state would add new > vmstate_ghes_v2_state subsection that migrates only > VMSTATE_UINT64(hest_addr_le, AcpiGhesState) > field. > > btw: we probably don't need ghes_addr_le for new code that > uses HEST to lookup relevant error status block. > but we should still keep it for 9.1 and older machine types > as they expect/use it. Separate subsections would work with > this req just fine. Right, if we need bi-directional migration we need above and a compat property (or VMSTATE_UINT64_TEST() would work too, iiuc). OTOH VMSD versioning only works for forward migration, not backward. > > > static const VMStateDescription vmstate_ghes_state = { > > .name = "acpi-ged/ghes", > > - .version_id = 1, > > - .minimum_version_id = 1, > > + .version_id = 2, > > + .minimum_version_id = 2, (and IIUC if we set min ver=2, even forward migration should fail.. better test it with an old binary, migrating back and forth) > > .needed = ghes_needed, > > .fields = (const VMStateField[]) { > > VMSTATE_STRUCT(ghes_state, AcpiGedState, 1, > Thanks,
Em Tue, 17 Sep 2024 14:01:46 +0200 Igor Mammedov <imammedo@redhat.com> escreveu: > > @@ -354,13 +355,13 @@ static const VMStateDescription vmstate_ghes = { > > static bool ghes_needed(void *opaque) > > { > > AcpiGedState *s = opaque; > > - return s->ghes_state.ghes_addr_le; > ^^^^^^^^^^^^ > another thing, perhaps we should rename it to 'hardware_errors_addr' > to make it less confusing At the cleanups, I added a rename patch. I opted to a shorter name: hw_error_le. Thanks, Mauro
Em Tue, 17 Sep 2024 11:22:31 -0400 Peter Xu <peterx@redhat.com> escreveu: > On Tue, Sep 17, 2024 at 11:19:21AM +0200, Igor Mammedov wrote: > > On Sat, 14 Sep 2024 08:13:23 +0200 > > Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote: > > > > what I would do: > > add ghes_needed_v2(): return s->ghes_state.hest_addr_le; > > > > and then instead of reusing vmstate_ghes_state would add new > > vmstate_ghes_v2_state subsection that migrates only > > VMSTATE_UINT64(hest_addr_le, AcpiGhesState) > > field. > > > > btw: we probably don't need ghes_addr_le for new code that > > uses HEST to lookup relevant error status block. > > but we should still keep it for 9.1 and older machine types > > as they expect/use it. Separate subsections would work with > > this req just fine. Ok, so, if I understood it right, the enclosed patch should do the job, right? > Right, if we need bi-directional migration we need above and a compat > property (or VMSTATE_UINT64_TEST() would work too, iiuc). > > OTOH VMSD versioning only works for forward migration, not backward. I don't think bi-directional migration would work. See, with old version, we have: - Just one Error source block structure, only for ARMv8 using synchronous notification (SEA). - The offsets to access the error block structure and the memory position where the OSPM will acknowledge the error assumes a single error source; - such offsets come from ghes_addr_le bios pointer (we will rename it to hw_addr_le at the cleanup patch series). With the new versions, we'll have: - at least two notification sources on ARMv8 (SEA and GPIO). We may end adding more with time; - other architectures may also have support for bios-first hardware errors; - the number of error block structures is now read from HEST table (so it needs that hest_addr_le will be stored at AcpiGedState; - the offsets to retrieve the addresses are now relative to the offset at hest_addr_le. The new error injection code, which uses either hest_addr_le or ghes_addr_le (now hw_addr_le) should be able to properly generate errors from a VM created on 9.1, as it will check if hest_addr_le is not zero. If it is zero, it will call a backward-compatible code: acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED, NULL)); if (!acpi_ged_state) { error_setg(errp, "Can't find ACPI_GED object"); return; } ags = &acpi_ged_state->ghes_state; if (!ags->hest_addr_le) { /* Assumes just a single source_id */ get_ghes_offsets(le64_to_cpu(ags->hw_error_le), &cper_addr, &read_ack_register_addr); } else { /* do a for at the HEST table seeking for an specific source_id */ get_hest_offsets(source_id, le64_to_cpu(ags->hest_addr_le), &cper_addr, &read_ack_register_addr, errp); } Now, a VM created with 9.2 will have multiple sources. The location of the read_ack_register_addr depends on the number of sources, which can't be retrieved without a VIOS pointer to the location of the HEST table (e. g. ags->hest_addr_le). So, a 9.1 QEMU with a VM created on 9.2 won't be doing the right thing with regards to the vaule of the ack offset, thus with RAS errors not working. I can't see any way to make it work. > > > > > static const VMStateDescription vmstate_ghes_state = { > > > .name = "acpi-ged/ghes", > > > - .version_id = 1, > > > - .minimum_version_id = 1, > > > + .version_id = 2, > > > + .minimum_version_id = 2, > > (and IIUC if we set min ver=2, even forward migration should fail.. better > test it with an old binary, migrating back and forth) > > > > .needed = ghes_needed, > > > .fields = (const VMStateField[]) { > > > VMSTATE_STRUCT(ghes_state, AcpiGedState, 1, > > > > Thanks, > Thanks, Mauro --- [PATCH] acpi/generic_event_device: Update GHES migration to cover hest addr The GHES migration logic at GED should now support HEST table location too. Increase migration version and change needed to check for both ghes_addr_le and hest_addr_le. Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index c5acfb204e5f..bd996d01390c 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -377,6 +377,34 @@ static const VMStateDescription vmstate_ghes_state = { } }; +static const VMStateDescription vmstate_hest = { + .name = "acpi-ghes", + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_UINT64(hest_addr_le, AcpiGhesState), + VMSTATE_END_OF_LIST() + }, +}; + +static bool hest_needed(void *opaque) +{ + AcpiGedState *s = opaque; + return s->ghes_state.hest_addr_le; +} + +static const VMStateDescription vmstate_hest_state = { + .name = "acpi-ged/ghes", + .version_id = 1, + .minimum_version_id = 1, + .needed = hest_needed, + .fields = (const VMStateField[]) { + VMSTATE_STRUCT(ghes_state, AcpiGedState, 1, + vmstate_hest, AcpiGhesState), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_acpi_ged = { .name = "acpi-ged", .version_id = 1, @@ -388,6 +416,7 @@ static const VMStateDescription vmstate_acpi_ged = { .subsections = (const VMStateDescription * const []) { &vmstate_memhp_state, &vmstate_ghes_state, + &vmstate_hest_state, NULL } };
diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index 15b4c3ebbf24..4e5e387ee2df 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -343,10 +343,11 @@ static const VMStateDescription vmstate_ged_state = { static const VMStateDescription vmstate_ghes = { .name = "acpi-ghes", - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, .fields = (const VMStateField[]) { VMSTATE_UINT64(ghes_addr_le, AcpiGhesState), + VMSTATE_UINT64(hest_addr_le, AcpiGhesState), VMSTATE_END_OF_LIST() }, }; @@ -354,13 +355,13 @@ static const VMStateDescription vmstate_ghes = { static bool ghes_needed(void *opaque) { AcpiGedState *s = opaque; - return s->ghes_state.ghes_addr_le; + return s->ghes_state.ghes_addr_le && s->ghes_state.hest_addr_le; } static const VMStateDescription vmstate_ghes_state = { .name = "acpi-ged/ghes", - .version_id = 1, - .minimum_version_id = 1, + .version_id = 2, + .minimum_version_id = 2, .needed = ghes_needed, .fields = (const VMStateField[]) { VMSTATE_STRUCT(ghes_state, AcpiGedState, 1,
The GHES migration logic at GED should now support HEST table location too. Increase migration version and change needed to check for both ghes_addr_le and hest_addr_le. Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org> --- hw/acpi/generic_event_device.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-)