diff mbox series

[RFC,1/1] ACPI / APEI: Fix for overwriting aer info when error status data have multiple sections

Message ID 20230915174435.779-1-shiju.jose@huawei.com (mailing list archive)
State Superseded
Headers show
Series [RFC,1/1] ACPI / APEI: Fix for overwriting aer info when error status data have multiple sections | expand

Commit Message

Shiju Jose Sept. 15, 2023, 5:44 p.m. UTC
From: Shiju Jose <shiju.jose@huawei.com>

ghes_handle_aer() lacks synchronization with aer_recover_work_func(),
so when error status data have multiple sections, aer_recover_work_func()
may use estatus data for aer_capability_regs after it has been overwritten.

The problem statement is here,
https://lore.kernel.org/all/20230901225755.GA90053@bhelgaas/

In ghes_handle_aer() allocates memory for aer_capability_regs from the
ghes_estatus_pool and copy data for aer_capability_regs from the estatus
buffer. Free the memory in aer_recover_work_func() after processing the
data using the ghes_estatus_pool_region_free() added.

Reported-by: Bjorn Helgaas <helgaas@kernel.org>
Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
---
 drivers/acpi/apei/ghes.c | 23 ++++++++++++++++++++++-
 drivers/pci/pcie/aer.c   | 10 ++++++++++
 include/acpi/ghes.h      |  1 +
 3 files changed, 33 insertions(+), 1 deletion(-)

Comments

Bjorn Helgaas Sept. 15, 2023, 10:02 p.m. UTC | #1
[+cc Dave, since CXL is also fiddling with aer.c]

On Sat, Sep 16, 2023 at 01:44:35AM +0800, shiju.jose@huawei.com wrote:
> From: Shiju Jose <shiju.jose@huawei.com>
> 
> ghes_handle_aer() lacks synchronization with aer_recover_work_func(),
> so when error status data have multiple sections, aer_recover_work_func()
> may use estatus data for aer_capability_regs after it has been overwritten.
> 
> The problem statement is here,
> https://lore.kernel.org/all/20230901225755.GA90053@bhelgaas/
> 
> In ghes_handle_aer() allocates memory for aer_capability_regs from the
> ghes_estatus_pool and copy data for aer_capability_regs from the estatus
> buffer. Free the memory in aer_recover_work_func() after processing the
> data using the ghes_estatus_pool_region_free() added.

Thanks for working this up!  I had it on my to-do list, but obviously
had not gotten to it yet.

> Reported-by: Bjorn Helgaas <helgaas@kernel.org>
> Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
> ---
>  drivers/acpi/apei/ghes.c | 23 ++++++++++++++++++++++-
>  drivers/pci/pcie/aer.c   | 10 ++++++++++
>  include/acpi/ghes.h      |  1 +
>  3 files changed, 33 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index ef59d6ea16da..63ad0541db38 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -209,6 +209,20 @@ int ghes_estatus_pool_init(unsigned int num_ghes)
>  	return -ENOMEM;
>  }
>  
> +/**
> + * ghes_estatus_pool_region_free - free previously allocated memory
> + *				   from the ghes_estatus_pool.
> + * @addr: address of memory to free.
> + * @size: size of memory to free.
> + *
> + * Returns none.
> + */
> +void ghes_estatus_pool_region_free(unsigned long addr, u32 size)
> +{
> +	gen_pool_free(ghes_estatus_pool, addr, size);
> +}
> +EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free);
> +
>  static int map_gen_v2(struct ghes *ghes)
>  {
>  	return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
> @@ -564,6 +578,7 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
>  	    pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
>  		unsigned int devfn;
>  		int aer_severity;
> +		u8 *aer_info;
>  
>  		devfn = PCI_DEVFN(pcie_err->device_id.device,
>  				  pcie_err->device_id.function);
> @@ -577,11 +592,17 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
>  		if (gdata->flags & CPER_SEC_RESET)
>  			aer_severity = AER_FATAL;
>  
> +		aer_info = (void *)gen_pool_alloc(ghes_estatus_pool,
> +						  sizeof(struct aer_capability_regs));
> +		if (!aer_info)
> +			return;
> +		memcpy(aer_info, pcie_err->aer_info, sizeof(struct aer_capability_regs));

This is a very straightforward approach to fixing this, and it looks
pretty reasonable, although I'd rather not have to pull more GHES
stuff into aer.c (ghes.h and ghes_estatus_pool_region_free()).

What I had in mind was to put a queue of aer_capability_regs on the
PCI side that could be used by both the APEI path and the native path.

In the APEI path, platform firmware reads the error information from
the hardware, and it feeds into PCI AER via aer_recover_queue().

In the native path, Linux should be reading reads the same error
information from the hardware, but it feeds into PCI AER quite
differently, via aer_process_err_devices() and handle_error_source().

These paths are fundamentally doing the exact same thing, but the data
handling and dmesg logging are needlessly different.  I'd like to see
them get a little more unified, so the native path could someday also
feed into aer_recover_queue().

Does that make any sense?

>  		aer_recover_queue(pcie_err->device_id.segment,
>  				  pcie_err->device_id.bus,
>  				  devfn, aer_severity,
>  				  (struct aer_capability_regs *)
> -				  pcie_err->aer_info);
> +				  aer_info);
>  	}
>  #endif
>  }
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index e85ff946e8c8..388b614c11fd 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -29,6 +29,7 @@
>  #include <linux/kfifo.h>
>  #include <linux/slab.h>
>  #include <acpi/apei.h>
> +#include <acpi/ghes.h>
>  #include <ras/ras_event.h>
>  
>  #include "../pci.h"
> @@ -996,6 +997,15 @@ static void aer_recover_work_func(struct work_struct *work)
>  			continue;
>  		}
>  		cper_print_aer(pdev, entry.severity, entry.regs);
> +		/*
> +		 * Memory for aer_capability_regs(entry.regs) is being allocated from the
> +		 * ghes_estatus_pool to protect it from overwriting when multiple sections
> +		 * are present in the error status. Thus free the same after processing
> +		 * the data.
> +		 */
> +		ghes_estatus_pool_region_free((unsigned long)entry.regs,
> +					      sizeof(struct aer_capability_regs));
> +
>  		if (entry.severity == AER_NONFATAL)
>  			pcie_do_recovery(pdev, pci_channel_io_normal,
>  					 aer_root_reset);
> diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
> index 3c8bba9f1114..40d89e161076 100644
> --- a/include/acpi/ghes.h
> +++ b/include/acpi/ghes.h
> @@ -78,6 +78,7 @@ static inline struct list_head *ghes_get_devices(void) { return NULL; }
>  #endif
>  
>  int ghes_estatus_pool_init(unsigned int num_ghes);
> +void ghes_estatus_pool_region_free(unsigned long addr, u32 size);
>  
>  static inline int acpi_hest_get_version(struct acpi_hest_generic_data *gdata)
>  {
> -- 
> 2.34.1
>
Shiju Jose Sept. 18, 2023, 1:07 p.m. UTC | #2
Hi Bjorn,

>-----Original Message-----
>From: Bjorn Helgaas <helgaas@kernel.org>
>Sent: 15 September 2023 23:02
>To: Shiju Jose <shiju.jose@huawei.com>
>Cc: rafael@kernel.org; lenb@kernel.org; tony.luck@intel.com;
>james.morse@arm.com; bp@alien8.de; ying.huang@intel.com; linux-
>acpi@vger.kernel.org; linux-pci@vger.kernel.org; linux-kernel@vger.kernel.org;
>Linuxarm <linuxarm@huawei.com>; Jonathan Cameron
><jonathan.cameron@huawei.com>; tanxiaofei <tanxiaofei@huawei.com>;
>Zengtao (B) <prime.zeng@hisilicon.com>; Dave Jiang <dave.jiang@intel.com>
>Subject: Re: [RFC PATCH 1/1] ACPI / APEI: Fix for overwriting aer info when error
>status data have multiple sections
>
>[+cc Dave, since CXL is also fiddling with aer.c]
>
>On Sat, Sep 16, 2023 at 01:44:35AM +0800, shiju.jose@huawei.com wrote:
>> From: Shiju Jose <shiju.jose@huawei.com>
>>
>> ghes_handle_aer() lacks synchronization with aer_recover_work_func(),
>> so when error status data have multiple sections,
>> aer_recover_work_func() may use estatus data for aer_capability_regs after it
>has been overwritten.
>>
>> The problem statement is here,
>> https://lore.kernel.org/all/20230901225755.GA90053@bhelgaas/
>>
>> In ghes_handle_aer() allocates memory for aer_capability_regs from the
>> ghes_estatus_pool and copy data for aer_capability_regs from the
>> estatus buffer. Free the memory in aer_recover_work_func() after
>> processing the data using the ghes_estatus_pool_region_free() added.
>
>Thanks for working this up!  I had it on my to-do list, but obviously had not gotten
>to it yet.
>
>> Reported-by: Bjorn Helgaas <helgaas@kernel.org>
>> Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
>> ---
>>  drivers/acpi/apei/ghes.c | 23 ++++++++++++++++++++++-
>>  drivers/pci/pcie/aer.c   | 10 ++++++++++
>>  include/acpi/ghes.h      |  1 +
>>  3 files changed, 33 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index
>> ef59d6ea16da..63ad0541db38 100644
>> --- a/drivers/acpi/apei/ghes.c
>> +++ b/drivers/acpi/apei/ghes.c
>> @@ -209,6 +209,20 @@ int ghes_estatus_pool_init(unsigned int num_ghes)
>>  	return -ENOMEM;
>>  }
>>
>> +/**
>> + * ghes_estatus_pool_region_free - free previously allocated memory
>> + *				   from the ghes_estatus_pool.
>> + * @addr: address of memory to free.
>> + * @size: size of memory to free.
>> + *
>> + * Returns none.
>> + */
>> +void ghes_estatus_pool_region_free(unsigned long addr, u32 size) {
>> +	gen_pool_free(ghes_estatus_pool, addr, size); }
>> +EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free);
>> +
>>  static int map_gen_v2(struct ghes *ghes)  {
>>  	return
>> apei_map_generic_address(&ghes->generic_v2->read_ack_register);
>> @@ -564,6 +578,7 @@ static void ghes_handle_aer(struct
>acpi_hest_generic_data *gdata)
>>  	    pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
>>  		unsigned int devfn;
>>  		int aer_severity;
>> +		u8 *aer_info;
>>
>>  		devfn = PCI_DEVFN(pcie_err->device_id.device,
>>  				  pcie_err->device_id.function);
>> @@ -577,11 +592,17 @@ static void ghes_handle_aer(struct
>acpi_hest_generic_data *gdata)
>>  		if (gdata->flags & CPER_SEC_RESET)
>>  			aer_severity = AER_FATAL;
>>
>> +		aer_info = (void *)gen_pool_alloc(ghes_estatus_pool,
>> +						  sizeof(struct
>aer_capability_regs));
>> +		if (!aer_info)
>> +			return;
>> +		memcpy(aer_info, pcie_err->aer_info, sizeof(struct
>> +aer_capability_regs));
>
>This is a very straightforward approach to fixing this, and it looks pretty
>reasonable, although I'd rather not have to pull more GHES stuff into aer.c
>(ghes.h and ghes_estatus_pool_region_free()).
>
>What I had in mind was to put a queue of aer_capability_regs on the PCI side
>that could be used by both the APEI path and the native path.
>
>In the APEI path, platform firmware reads the error information from the
>hardware, and it feeds into PCI AER via aer_recover_queue().
>
>In the native path, Linux should be reading reads the same error information
>from the hardware, but it feeds into PCI AER quite differently, via
>aer_process_err_devices() and handle_error_source().
>
>These paths are fundamentally doing the exact same thing, but the data
>handling and dmesg logging are needlessly different.  I'd like to see them get a
>little more unified, so the native path could someday also feed into
>aer_recover_queue().
>
>Does that make any sense?

Thanks for letting us know.
Make sense, solution with in the AER looks better. 

>
>>  		aer_recover_queue(pcie_err->device_id.segment,
>>  				  pcie_err->device_id.bus,
>>  				  devfn, aer_severity,
>>  				  (struct aer_capability_regs *)
>> -				  pcie_err->aer_info);
>> +				  aer_info);
>>  	}
>>  #endif
>>  }
>> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index
>> e85ff946e8c8..388b614c11fd 100644
>> --- a/drivers/pci/pcie/aer.c
>> +++ b/drivers/pci/pcie/aer.c
>> @@ -29,6 +29,7 @@
>>  #include <linux/kfifo.h>
>>  #include <linux/slab.h>
>>  #include <acpi/apei.h>
>> +#include <acpi/ghes.h>
>>  #include <ras/ras_event.h>
>>
>>  #include "../pci.h"
>> @@ -996,6 +997,15 @@ static void aer_recover_work_func(struct work_struct
>*work)
>>  			continue;
>>  		}
>>  		cper_print_aer(pdev, entry.severity, entry.regs);
>> +		/*
>> +		 * Memory for aer_capability_regs(entry.regs) is being
>allocated from the
>> +		 * ghes_estatus_pool to protect it from overwriting when
>multiple sections
>> +		 * are present in the error status. Thus free the same after
>processing
>> +		 * the data.
>> +		 */
>> +		ghes_estatus_pool_region_free((unsigned long)entry.regs,
>> +					      sizeof(struct aer_capability_regs));
>> +
>>  		if (entry.severity == AER_NONFATAL)
>>  			pcie_do_recovery(pdev, pci_channel_io_normal,
>>  					 aer_root_reset);
>> diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index
>> 3c8bba9f1114..40d89e161076 100644
>> --- a/include/acpi/ghes.h
>> +++ b/include/acpi/ghes.h
>> @@ -78,6 +78,7 @@ static inline struct list_head
>> *ghes_get_devices(void) { return NULL; }  #endif
>>
>>  int ghes_estatus_pool_init(unsigned int num_ghes);
>> +void ghes_estatus_pool_region_free(unsigned long addr, u32 size);
>>
>>  static inline int acpi_hest_get_version(struct acpi_hest_generic_data
>> *gdata)  {
>> --
>> 2.34.1
>>

Thanks,
Shiju
Bjorn Helgaas Sept. 18, 2023, 6:16 p.m. UTC | #3
[+to Rafael, probably the logical place to apply this]

On Mon, Sep 18, 2023 at 01:07:45PM +0000, Shiju Jose wrote:
> >-----Original Message-----
> >From: Bjorn Helgaas <helgaas@kernel.org>
> > ...

> >On Sat, Sep 16, 2023 at 01:44:35AM +0800, shiju.jose@huawei.com wrote:
> >> From: Shiju Jose <shiju.jose@huawei.com>
> >>
> >> ghes_handle_aer() lacks synchronization with
> >> aer_recover_work_func(), so when error status data have multiple
> >> sections, aer_recover_work_func() may use estatus data for
> >> aer_capability_regs after it has been overwritten.
> >>
> >> The problem statement is here,
> >> https://lore.kernel.org/all/20230901225755.GA90053@bhelgaas/
> >>
> >> In ghes_handle_aer() allocates memory for aer_capability_regs from the
> >> ghes_estatus_pool and copy data for aer_capability_regs from the
> >> estatus buffer. Free the memory in aer_recover_work_func() after
> >> processing the data using the ghes_estatus_pool_region_free() added.
> >
> >Thanks for working this up!  I had it on my to-do list, but
> >obviously had not gotten to it yet.
> >
> >> Reported-by: Bjorn Helgaas <helgaas@kernel.org>
> >> Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
> >> ---
> >>  drivers/acpi/apei/ghes.c | 23 ++++++++++++++++++++++-
> >>  drivers/pci/pcie/aer.c   | 10 ++++++++++
> >>  include/acpi/ghes.h      |  1 +
> >>  3 files changed, 33 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index
> >> ef59d6ea16da..63ad0541db38 100644
> >> --- a/drivers/acpi/apei/ghes.c
> >> +++ b/drivers/acpi/apei/ghes.c
> >> @@ -209,6 +209,20 @@ int ghes_estatus_pool_init(unsigned int num_ghes)
> >>  	return -ENOMEM;
> >>  }
> >>
> >> +/**
> >> + * ghes_estatus_pool_region_free - free previously allocated memory
> >> + *				   from the ghes_estatus_pool.
> >> + * @addr: address of memory to free.
> >> + * @size: size of memory to free.
> >> + *
> >> + * Returns none.
> >> + */
> >> +void ghes_estatus_pool_region_free(unsigned long addr, u32 size) {
> >> +	gen_pool_free(ghes_estatus_pool, addr, size); }
> >> +EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free);
> >> +
> >>  static int map_gen_v2(struct ghes *ghes)  {
> >>  	return
> >> apei_map_generic_address(&ghes->generic_v2->read_ack_register);
> >> @@ -564,6 +578,7 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
> >>  	    pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
> >>  		unsigned int devfn;
> >>  		int aer_severity;
> >> +		u8 *aer_info;
> >>
> >>  		devfn = PCI_DEVFN(pcie_err->device_id.device,
> >>  				  pcie_err->device_id.function);
> >> @@ -577,11 +592,17 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
> >>  		if (gdata->flags & CPER_SEC_RESET)
> >>  			aer_severity = AER_FATAL;
> >>
> >> +		aer_info = (void *)gen_pool_alloc(ghes_estatus_pool,
> >> +						  sizeof(struct aer_capability_regs));
> >> +		if (!aer_info)
> >> +			return;
> >> +		memcpy(aer_info, pcie_err->aer_info, sizeof(struct
> >> +aer_capability_regs));
> >
> >This is a very straightforward approach to fixing this, and it looks pretty
> >reasonable, although I'd rather not have to pull more GHES stuff into aer.c
> >(ghes.h and ghes_estatus_pool_region_free()).
> >
> >What I had in mind was to put a queue of aer_capability_regs on the PCI side
> >that could be used by both the APEI path and the native path.
> >
> >In the APEI path, platform firmware reads the error information from the
> >hardware, and it feeds into PCI AER via aer_recover_queue().
> >
> >In the native path, Linux should be reading reads the same error information
> >from the hardware, but it feeds into PCI AER quite differently, via
> >aer_process_err_devices() and handle_error_source().
> >
> >These paths are fundamentally doing the exact same thing, but the data
> >handling and dmesg logging are needlessly different.  I'd like to see them get a
> >little more unified, so the native path could someday also feed into
> >aer_recover_queue().
> >
> >Does that make any sense?
> 
> Thanks for letting us know.
> Make sense, solution with in the AER looks better. 

Thanks for considering this.  The AER rework I have in mind is a
longer-term project, so unless you have time and interest in doing
that, I think we should apply your patch since it's all ready to go.

You pointed to the problem statement, but I think it would be good to
include a brief outline directly in the commit log, e.g., something
like this:

  ghes_handle_aer() passes AER data to the PCI core for logging and
  recovery by calling aer_recover_queue() with a pointer to struct
  aer_capability_regs.

  The problem was that aer_recover_queue() queues the pointer directly
  without copying the aer_capability_regs data.  The pointer was to
  the ghes->estatus buffer, which could be reused before
  aer_recover_work_func() reads the data.

  To avoid this problem, allocate a new aer_capability_regs structure
  from the ghes_estatus_pool, copy the AER data from the ghes->estatus
  buffer into it, pass a pointer to the new struct to
  aer_recover_queue(), and free it after aer_recover_work_func() has
  processed it.

What's your take on this, Rafael?  The biggest change is in
drivers/acpi, so if you want to take it, feel free to add my:

  Acked-by: Bjorn Helgaas <bhelgaas@google.com>

> >>  		aer_recover_queue(pcie_err->device_id.segment,
> >>  				  pcie_err->device_id.bus,
> >>  				  devfn, aer_severity,
> >>  				  (struct aer_capability_regs *)
> >> -				  pcie_err->aer_info);
> >> +				  aer_info);
> >>  	}
> >>  #endif
> >>  }
> >> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index
> >> e85ff946e8c8..388b614c11fd 100644
> >> --- a/drivers/pci/pcie/aer.c
> >> +++ b/drivers/pci/pcie/aer.c
> >> @@ -29,6 +29,7 @@
> >>  #include <linux/kfifo.h>
> >>  #include <linux/slab.h>
> >>  #include <acpi/apei.h>
> >> +#include <acpi/ghes.h>
> >>  #include <ras/ras_event.h>
> >>
> >>  #include "../pci.h"
> >> @@ -996,6 +997,15 @@ static void aer_recover_work_func(struct work_struct
> >*work)
> >>  			continue;
> >>  		}
> >>  		cper_print_aer(pdev, entry.severity, entry.regs);
> >> +		/*
> >> +		 * Memory for aer_capability_regs(entry.regs) is being
> >allocated from the
> >> +		 * ghes_estatus_pool to protect it from overwriting when
> >multiple sections
> >> +		 * are present in the error status. Thus free the same after
> >processing
> >> +		 * the data.
> >> +		 */
> >> +		ghes_estatus_pool_region_free((unsigned long)entry.regs,
> >> +					      sizeof(struct aer_capability_regs));
> >> +
> >>  		if (entry.severity == AER_NONFATAL)
> >>  			pcie_do_recovery(pdev, pci_channel_io_normal,
> >>  					 aer_root_reset);
> >> diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index
> >> 3c8bba9f1114..40d89e161076 100644
> >> --- a/include/acpi/ghes.h
> >> +++ b/include/acpi/ghes.h
> >> @@ -78,6 +78,7 @@ static inline struct list_head
> >> *ghes_get_devices(void) { return NULL; }  #endif
> >>
> >>  int ghes_estatus_pool_init(unsigned int num_ghes);
> >> +void ghes_estatus_pool_region_free(unsigned long addr, u32 size);
> >>
> >>  static inline int acpi_hest_get_version(struct acpi_hest_generic_data
> >> *gdata)  {
> >> --
> >> 2.34.1
> >>
> 
> Thanks,
> Shiju
Rafael J. Wysocki Sept. 18, 2023, 6:22 p.m. UTC | #4
On Mon, Sep 18, 2023 at 8:16 PM Bjorn Helgaas <helgaas@kernel.org> wrote:
>
> [+to Rafael, probably the logical place to apply this]
>
> On Mon, Sep 18, 2023 at 01:07:45PM +0000, Shiju Jose wrote:
> > >-----Original Message-----
> > >From: Bjorn Helgaas <helgaas@kernel.org>
> > > ...
>
> > >On Sat, Sep 16, 2023 at 01:44:35AM +0800, shiju.jose@huawei.com wrote:
> > >> From: Shiju Jose <shiju.jose@huawei.com>
> > >>
> > >> ghes_handle_aer() lacks synchronization with
> > >> aer_recover_work_func(), so when error status data have multiple
> > >> sections, aer_recover_work_func() may use estatus data for
> > >> aer_capability_regs after it has been overwritten.
> > >>
> > >> The problem statement is here,
> > >> https://lore.kernel.org/all/20230901225755.GA90053@bhelgaas/
> > >>
> > >> In ghes_handle_aer() allocates memory for aer_capability_regs from the
> > >> ghes_estatus_pool and copy data for aer_capability_regs from the
> > >> estatus buffer. Free the memory in aer_recover_work_func() after
> > >> processing the data using the ghes_estatus_pool_region_free() added.
> > >
> > >Thanks for working this up!  I had it on my to-do list, but
> > >obviously had not gotten to it yet.
> > >
> > >> Reported-by: Bjorn Helgaas <helgaas@kernel.org>
> > >> Signed-off-by: Shiju Jose <shiju.jose@huawei.com>
> > >> ---
> > >>  drivers/acpi/apei/ghes.c | 23 ++++++++++++++++++++++-
> > >>  drivers/pci/pcie/aer.c   | 10 ++++++++++
> > >>  include/acpi/ghes.h      |  1 +
> > >>  3 files changed, 33 insertions(+), 1 deletion(-)
> > >>
> > >> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index
> > >> ef59d6ea16da..63ad0541db38 100644
> > >> --- a/drivers/acpi/apei/ghes.c
> > >> +++ b/drivers/acpi/apei/ghes.c
> > >> @@ -209,6 +209,20 @@ int ghes_estatus_pool_init(unsigned int num_ghes)
> > >>    return -ENOMEM;
> > >>  }
> > >>
> > >> +/**
> > >> + * ghes_estatus_pool_region_free - free previously allocated memory
> > >> + *                                   from the ghes_estatus_pool.
> > >> + * @addr: address of memory to free.
> > >> + * @size: size of memory to free.
> > >> + *
> > >> + * Returns none.
> > >> + */
> > >> +void ghes_estatus_pool_region_free(unsigned long addr, u32 size) {
> > >> +  gen_pool_free(ghes_estatus_pool, addr, size); }
> > >> +EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free);
> > >> +
> > >>  static int map_gen_v2(struct ghes *ghes)  {
> > >>    return
> > >> apei_map_generic_address(&ghes->generic_v2->read_ack_register);
> > >> @@ -564,6 +578,7 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
> > >>        pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
> > >>            unsigned int devfn;
> > >>            int aer_severity;
> > >> +          u8 *aer_info;
> > >>
> > >>            devfn = PCI_DEVFN(pcie_err->device_id.device,
> > >>                              pcie_err->device_id.function);
> > >> @@ -577,11 +592,17 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
> > >>            if (gdata->flags & CPER_SEC_RESET)
> > >>                    aer_severity = AER_FATAL;
> > >>
> > >> +          aer_info = (void *)gen_pool_alloc(ghes_estatus_pool,
> > >> +                                            sizeof(struct aer_capability_regs));
> > >> +          if (!aer_info)
> > >> +                  return;
> > >> +          memcpy(aer_info, pcie_err->aer_info, sizeof(struct
> > >> +aer_capability_regs));
> > >
> > >This is a very straightforward approach to fixing this, and it looks pretty
> > >reasonable, although I'd rather not have to pull more GHES stuff into aer.c
> > >(ghes.h and ghes_estatus_pool_region_free()).
> > >
> > >What I had in mind was to put a queue of aer_capability_regs on the PCI side
> > >that could be used by both the APEI path and the native path.
> > >
> > >In the APEI path, platform firmware reads the error information from the
> > >hardware, and it feeds into PCI AER via aer_recover_queue().
> > >
> > >In the native path, Linux should be reading reads the same error information
> > >from the hardware, but it feeds into PCI AER quite differently, via
> > >aer_process_err_devices() and handle_error_source().
> > >
> > >These paths are fundamentally doing the exact same thing, but the data
> > >handling and dmesg logging are needlessly different.  I'd like to see them get a
> > >little more unified, so the native path could someday also feed into
> > >aer_recover_queue().
> > >
> > >Does that make any sense?
> >
> > Thanks for letting us know.
> > Make sense, solution with in the AER looks better.
>
> Thanks for considering this.  The AER rework I have in mind is a
> longer-term project, so unless you have time and interest in doing
> that, I think we should apply your patch since it's all ready to go.
>
> You pointed to the problem statement, but I think it would be good to
> include a brief outline directly in the commit log, e.g., something
> like this:

Agreed.

>   ghes_handle_aer() passes AER data to the PCI core for logging and
>   recovery by calling aer_recover_queue() with a pointer to struct
>   aer_capability_regs.
>
>   The problem was that aer_recover_queue() queues the pointer directly
>   without copying the aer_capability_regs data.  The pointer was to
>   the ghes->estatus buffer, which could be reused before
>   aer_recover_work_func() reads the data.
>
>   To avoid this problem, allocate a new aer_capability_regs structure
>   from the ghes_estatus_pool, copy the AER data from the ghes->estatus
>   buffer into it, pass a pointer to the new struct to
>   aer_recover_queue(), and free it after aer_recover_work_func() has
>   processed it.
>
> What's your take on this, Rafael?

I can apply it if the changelog is updated along the lines of the above.

>  The biggest change is in
> drivers/acpi, so if you want to take it, feel free to add my:
>
>   Acked-by: Bjorn Helgaas <bhelgaas@google.com>

Thanks!
diff mbox series

Patch

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index ef59d6ea16da..63ad0541db38 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -209,6 +209,20 @@  int ghes_estatus_pool_init(unsigned int num_ghes)
 	return -ENOMEM;
 }
 
+/**
+ * ghes_estatus_pool_region_free - free previously allocated memory
+ *				   from the ghes_estatus_pool.
+ * @addr: address of memory to free.
+ * @size: size of memory to free.
+ *
+ * Returns none.
+ */
+void ghes_estatus_pool_region_free(unsigned long addr, u32 size)
+{
+	gen_pool_free(ghes_estatus_pool, addr, size);
+}
+EXPORT_SYMBOL_GPL(ghes_estatus_pool_region_free);
+
 static int map_gen_v2(struct ghes *ghes)
 {
 	return apei_map_generic_address(&ghes->generic_v2->read_ack_register);
@@ -564,6 +578,7 @@  static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
 	    pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO) {
 		unsigned int devfn;
 		int aer_severity;
+		u8 *aer_info;
 
 		devfn = PCI_DEVFN(pcie_err->device_id.device,
 				  pcie_err->device_id.function);
@@ -577,11 +592,17 @@  static void ghes_handle_aer(struct acpi_hest_generic_data *gdata)
 		if (gdata->flags & CPER_SEC_RESET)
 			aer_severity = AER_FATAL;
 
+		aer_info = (void *)gen_pool_alloc(ghes_estatus_pool,
+						  sizeof(struct aer_capability_regs));
+		if (!aer_info)
+			return;
+		memcpy(aer_info, pcie_err->aer_info, sizeof(struct aer_capability_regs));
+
 		aer_recover_queue(pcie_err->device_id.segment,
 				  pcie_err->device_id.bus,
 				  devfn, aer_severity,
 				  (struct aer_capability_regs *)
-				  pcie_err->aer_info);
+				  aer_info);
 	}
 #endif
 }
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index e85ff946e8c8..388b614c11fd 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -29,6 +29,7 @@ 
 #include <linux/kfifo.h>
 #include <linux/slab.h>
 #include <acpi/apei.h>
+#include <acpi/ghes.h>
 #include <ras/ras_event.h>
 
 #include "../pci.h"
@@ -996,6 +997,15 @@  static void aer_recover_work_func(struct work_struct *work)
 			continue;
 		}
 		cper_print_aer(pdev, entry.severity, entry.regs);
+		/*
+		 * Memory for aer_capability_regs(entry.regs) is being allocated from the
+		 * ghes_estatus_pool to protect it from overwriting when multiple sections
+		 * are present in the error status. Thus free the same after processing
+		 * the data.
+		 */
+		ghes_estatus_pool_region_free((unsigned long)entry.regs,
+					      sizeof(struct aer_capability_regs));
+
 		if (entry.severity == AER_NONFATAL)
 			pcie_do_recovery(pdev, pci_channel_io_normal,
 					 aer_root_reset);
diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h
index 3c8bba9f1114..40d89e161076 100644
--- a/include/acpi/ghes.h
+++ b/include/acpi/ghes.h
@@ -78,6 +78,7 @@  static inline struct list_head *ghes_get_devices(void) { return NULL; }
 #endif
 
 int ghes_estatus_pool_init(unsigned int num_ghes);
+void ghes_estatus_pool_region_free(unsigned long addr, u32 size);
 
 static inline int acpi_hest_get_version(struct acpi_hest_generic_data *gdata)
 {