@@ -26,6 +26,7 @@
#include <linux/init.h>
#include <linux/acpi.h>
#include <linux/io.h>
+#include <acpi/ghes.h>
#include "apei-internal.h"
@@ -34,33 +35,36 @@
static int bert_disable;
-static void __init bert_print_all(struct acpi_bert_region *region,
- unsigned int region_len)
+static int bert_process_region(struct acpi_bert_region *region,
+ unsigned int region_len,
+ int (*process)(struct acpi_hest_generic_status *estatus,
+ void *data), void *data)
{
struct acpi_hest_generic_status *estatus =
(struct acpi_hest_generic_status *)region;
int remain = region_len;
u32 estatus_len;
+ int rc;
if (!estatus->block_status)
- return;
+ return -ENXIO;
while (remain > sizeof(struct acpi_bert_region)) {
if (cper_estatus_check(estatus)) {
pr_err(FW_BUG "Invalid error record.\n");
- return;
+ return -ENXIO;
}
estatus_len = cper_estatus_len(estatus);
if (remain < estatus_len) {
- pr_err(FW_BUG "Truncated status block (length: %u).\n",
- estatus_len);
- return;
+ pr_err(FW_BUG "Truncated status block (len: %u).\n",
+ estatus_len);
+ return -ENXIO;
}
- pr_info_once("Error records from previous boot:\n");
-
- cper_estatus_print(KERN_INFO HW_ERR, estatus);
+ rc = process(estatus, data);
+ if (rc < 0)
+ return rc;
/*
* Because the boot error source is "one-time polled" type,
@@ -72,10 +76,22 @@ static void __init bert_print_all(struct acpi_bert_region *region,
estatus = (void *)estatus + estatus_len;
/* No more error records. */
if (!estatus->block_status)
- return;
+ return -ENXIO;
remain -= estatus_len;
}
+
+ return 0;
+}
+
+static int __init bert_print(struct acpi_hest_generic_status *estatus,
+ void *data)
+{
+ pr_info_once("Error records from previous boot:\n");
+
+ cper_estatus_print(KERN_INFO HW_ERR, estatus);
+
+ return 0;
}
static int __init setup_bert_disable(char *str)
@@ -86,7 +102,7 @@ static int __init setup_bert_disable(char *str)
}
__setup("bert_disable", setup_bert_disable);
-static int __init bert_check_table(struct acpi_table_bert *bert_tab)
+static int bert_check_table(struct acpi_table_bert *bert_tab)
{
if (bert_tab->header.length < sizeof(struct acpi_table_bert) ||
bert_tab->region_length < sizeof(struct acpi_bert_region))
@@ -138,7 +154,8 @@ static int __init bert_init(void)
goto out_fini;
boot_error_region = ioremap_cache(bert_tab->address, region_len);
if (boot_error_region) {
- bert_print_all(boot_error_region, region_len);
+ bert_process_region(boot_error_region, region_len,
+ bert_print, NULL);
iounmap(boot_error_region);
} else {
rc = -ENOMEM;
@@ -152,3 +169,97 @@ static int __init bert_init(void)
}
late_initcall(bert_init);
+
+struct mem_err_cb_ctx
+{
+ void (*cb)(void *data, u64 addr, u64 len);
+ void *data;
+ u64 addr;
+ u64 len;
+};
+
+static int bert_process_mem_err(struct acpi_hest_generic_status *estatus,
+ void *data)
+{
+ struct mem_err_cb_ctx *ctx = data;
+ u16 severity;
+ u64 end = ctx->addr + ctx->len - 1;
+ struct acpi_hest_generic_data *gdata;
+ int found = 0;
+
+ severity = estatus->error_severity;
+ if (severity != CPER_SEV_CORRECTED) {
+ apei_estatus_for_each_section(estatus, gdata) {
+ guid_t *sec_type =
+ (guid_t *)gdata->section_type;
+ struct cper_sec_mem_err *mem_err =
+ acpi_hest_get_payload(gdata);
+
+ if (!guid_equal(sec_type,
+ &CPER_SEC_PLATFORM_MEM))
+ continue;
+
+ if (!(mem_err->validation_bits &
+ CPER_MEM_VALID_PA))
+ continue;
+
+ if (ctx->addr > mem_err->physical_addr ||
+ end < mem_err->physical_addr)
+ continue;
+
+ ctx->cb(ctx->data, mem_err->physical_addr,
+ L1_CACHE_BYTES);
+ found++;
+ }
+ }
+
+ return found;
+}
+
+int bert_find_mem_error_record(void (*cb)(void *data, u64 addr, u64 len),
+ void *data, u64 addr, u64 len)
+{
+ acpi_status status;
+ int rc;
+ unsigned int region_len;
+ struct acpi_bert_region *bert_region;
+ struct acpi_table_bert *bert_tab;
+ struct mem_err_cb_ctx ctx = {
+ .cb = cb,
+ .data = data,
+ .addr = addr,
+ .len = len,
+ };
+
+ if (acpi_disabled)
+ return 0;
+
+ status = acpi_get_table(ACPI_SIG_BERT, 0,
+ (struct acpi_table_header **)&bert_tab);
+ if (status == AE_NOT_FOUND)
+ return 0;
+
+ if (ACPI_FAILURE(status))
+ return -EINVAL;
+
+ rc = bert_check_table(bert_tab);
+ if (rc)
+ return rc;
+
+ region_len = bert_tab->region_length;
+ bert_region = acpi_os_map_memory(bert_tab->address, region_len);
+ if (!bert_region) {
+ rc = -ENOMEM;
+ goto put_table;
+ }
+
+ rc = bert_process_region(bert_region, region_len,
+ bert_process_mem_err, &ctx);
+
+ acpi_os_unmap_memory(bert_region, region_len);
+put_table:
+ acpi_put_table((struct acpi_table_header *)bert_tab);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(bert_find_mem_error_record);
@@ -1146,6 +1146,10 @@ int __acpi_probe_device_table(struct acpi_probe_entry *start, int nr);
(&ACPI_PROBE_TABLE_END(t) - \
&ACPI_PROBE_TABLE(t))); \
})
+
+int bert_find_mem_error_record(
+ void (*cb)(void *data, u64 addr, u64 len),
+ void *data, u64 addr, u64 len);
#else
static inline int acpi_dev_get_property(struct acpi_device *adev,
const char *name, acpi_object_type type,
@@ -1247,6 +1251,12 @@ acpi_graph_get_remote_endpoint(const struct fwnode_handle *fwnode,
(void *) data }
#define acpi_probe_device_table(t) ({ int __r = 0; __r;})
+int bert_find_mem_error_record(
+ void (*cb)(void *data, u64 addr, u64 len),
+ void *data, u64 addr, u64 len)
+{
+ return -EOPNOTSUPP;
+}
#endif
#ifdef CONFIG_ACPI_TABLE_UPGRADE
Adding helper function for searching through BERT records and matching memory based errors that matches in the given resource range given. A callback function is passed in from the caller to process the matched memory records. This is in preparation for adding bad memory ranges fir nvdimm from the BERT. Signed-off-by: Dave Jiang <dave.jiang@intel.com> Cc: Ying Huang <ying.huang@intel.com> --- drivers/acpi/apei/bert.c | 137 ++++++++++++++++++++++++++++++++++++++++++---- include/linux/acpi.h | 10 +++ 2 files changed, 134 insertions(+), 13 deletions(-)