diff mbox series

[v2] efi/cper: align ARM CPER type with UEFI 2.9A/2.10 specs

Message ID 60da74c80a0b05ea4a5b4b7f2eda1b58d555edce.1718794335.git.mchehab+huawei@kernel.org (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series [v2] efi/cper: align ARM CPER type with UEFI 2.9A/2.10 specs | expand

Commit Message

Mauro Carvalho Chehab June 19, 2024, 10:52 a.m. UTC
Up to UEFI spec, the type byte of CPER struct was defined simply
as:

Type at byte offset 4:

	- Cache error
	- TLB Error
	- Bus Error
	- Micro-architectural Error
	All other values are reserved

Yet, there was no information about how this would be encoded.

Spec 2.9A corrected it by defining:

	- Bit 1 - Cache Error
	- Bit 2 - TLB Error
	- Bit 3 - Bus Error
	- Bit 4 - Micro-architectural Error
	All other values are reserved

Spec 2.10 also preserve the same encoding as 2.9A

See: https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#arm-processor-error-information

Adjust CPER handling code for ARM to properly handle UEFI 2.9A and
2.10 encoding.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
---
 drivers/acpi/apei/ghes.c        | 19 +++++++++++---
 drivers/firmware/efi/cper-arm.c | 44 ++++++++++++++-------------------
 include/linux/cper.h            |  9 +++----
 3 files changed, 37 insertions(+), 35 deletions(-)

Comments

Jonathan Cameron June 19, 2024, 1:31 p.m. UTC | #1
On Wed, 19 Jun 2024 12:52:38 +0200
Mauro Carvalho Chehab <mchehab+huawei@kernel.org> wrote:

> Up to UEFI spec, the type byte of CPER struct was defined simply
> as:
> 
> Type at byte offset 4:
> 
> 	- Cache error
> 	- TLB Error
> 	- Bus Error
> 	- Micro-architectural Error
> 	All other values are reserved
> 
> Yet, there was no information about how this would be encoded.
> 
> Spec 2.9A corrected it by defining:
> 
> 	- Bit 1 - Cache Error
> 	- Bit 2 - TLB Error
> 	- Bit 3 - Bus Error
> 	- Bit 4 - Micro-architectural Error
> 	All other values are reserved
> 
> Spec 2.10 also preserve the same encoding as 2.9A
> 
> See: https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#arm-processor-error-information
> 
> Adjust CPER handling code for ARM to properly handle UEFI 2.9A and
> 2.10 encoding.

Hi Mauro,

I'd be tempted to use "ARM Processor" throughout this patch description
as could in theory be something else and currently the link
is the only way to tell!

A few comments inline.

Good catch on the spec change btw.

> 
> Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
> ---
>  drivers/acpi/apei/ghes.c        | 19 +++++++++++---
>  drivers/firmware/efi/cper-arm.c | 44 ++++++++++++++-------------------
>  include/linux/cper.h            |  9 +++----
>  3 files changed, 37 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index ed32bbecb4a3..365de4115508 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -546,9 +546,12 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
>  	p = (char *)(err + 1);
>  	for (i = 0; i < err->err_info_num; i++) {
>  		struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
> -		bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR);
> +		bool is_cache = (err_info->type & CPER_ARM_CACHE_ERROR);

Matches local style I guess but the () are unnecessary.

>  		bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
> -		const char *error_type = "unknown error";
> +		char error_type[120] = "";

> +		char *s = error_type;
> +		int len = 0;
> +		int i;

Shadowing i which is bad for readability.

>  
>  		/*
>  		 * The field (err_info->error_info & BIT(26)) is fixed to set to
> @@ -562,8 +565,16 @@ static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
>  			continue;
>  		}
>  
> -		if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs))
> -			error_type = cper_proc_error_type_strs[err_info->type];
> +		for (i = 0; i < ARRAY_SIZE(cper_proc_error_type_strs); i++) {
> +			if (!(err_info->type & (1U << i)))
> +				continue;
> +
> +			len += snprintf(s, sizeof(err_info->type) - len, "%s ", cper_proc_error_type_strs[i]);

Size of the index into the type string array?  I'm confused.
sizeof(error_type) maybe?

Also, maybe break that long line of code before cper_*


> +			s += len;
> +		}
> +
> +		if (!*error_type)
> +			strscpy(error_type, "unknown error", sizeof(error_type));

Perhaps should handle multiple bits where only one is unknown?
So maybe compare with a mask of known bits and print this on the end (perhaps
including which bit)?

>  
>  		pr_warn_ratelimited(FW_WARN GHES_PFX
>  				    "Unhandled processor error type: %s\n",
> diff --git a/drivers/firmware/efi/cper-arm.c b/drivers/firmware/efi/cper-arm.c
> index fa9c1c3bf168..f57641eb548a 100644
> --- a/drivers/firmware/efi/cper-arm.c
> +++ b/drivers/firmware/efi/cper-arm.c
> @@ -93,15 +93,11 @@ static void cper_print_arm_err_info(const char *pfx, u32 type,
>  	bool proc_context_corrupt, corrected, precise_pc, restartable_pc;
>  	bool time_out, access_mode;
>  
> -	/* If the type is unknown, bail. */
> -	if (type > CPER_ARM_MAX_TYPE)
> -		return;
> -
>  	/*
>  	 * Vendor type errors have error information values that are vendor
>  	 * specific.
>  	 */
> -	if (type == CPER_ARM_VENDOR_ERROR)
> +	if (type & CPER_ARM_VENDOR_ERROR)
>  		return;
>  
>  	if (error_info & CPER_ARM_ERR_VALID_TRANSACTION_TYPE) {
> @@ -116,43 +112,38 @@ static void cper_print_arm_err_info(const char *pfx, u32 type,
>  	if (error_info & CPER_ARM_ERR_VALID_OPERATION_TYPE) {
>  		op_type = ((error_info >> CPER_ARM_ERR_OPERATION_SHIFT)
>  			   & CPER_ARM_ERR_OPERATION_MASK);
> -		switch (type) {
> -		case CPER_ARM_CACHE_ERROR:
> +		if (type & CPER_ARM_CACHE_ERROR) {
>  			if (op_type < ARRAY_SIZE(arm_cache_err_op_strs)) {
> -				printk("%soperation type: %s\n", pfx,
> +				printk("%scache error: %s\n", pfx,
>  				       arm_cache_err_op_strs[op_type]);
Can we keep that this is an operation type in print?
"%scache error, operation type: %s\n" perhaps?

>  			}
> -			break;
> -		case CPER_ARM_TLB_ERROR:
> +		}
> +		if (type & CPER_ARM_TLB_ERROR) {
>  			if (op_type < ARRAY_SIZE(arm_tlb_err_op_strs)) {
> -				printk("%soperation type: %s\n", pfx,
> +				printk("%sTLB error: %s\n", pfx,
>  				       arm_tlb_err_op_strs[op_type]);
>  			}
> -			break;
> -		case CPER_ARM_BUS_ERROR:
> +		}
> +		if (type & CPER_ARM_BUS_ERROR) {
>  			if (op_type < ARRAY_SIZE(arm_bus_err_op_strs)) {
> -				printk("%soperation type: %s\n", pfx,
> +				printk("%sbus error: %s\n", pfx,
>  				       arm_bus_err_op_strs[op_type]);
>  			}
> -			break;
>  		}
>  	}
>  
>  	if (error_info & CPER_ARM_ERR_VALID_LEVEL) {
>  		level = ((error_info >> CPER_ARM_ERR_LEVEL_SHIFT)
>  			 & CPER_ARM_ERR_LEVEL_MASK);

Not a today thing, but would be lovely to use FIELD_GET()
for all these with appropriately fixed up mask definitions.
Right now it is inconsistent as the valid entries are handled
as shifted values, and we have GENMASK(X,0) plus a shift for these.

> -		switch (type) {
> -		case CPER_ARM_CACHE_ERROR:
> +		if (type & CPER_ARM_CACHE_ERROR)
>  			printk("%scache level: %d\n", pfx, level);
> -			break;
> -		case CPER_ARM_TLB_ERROR:
> +
> +		if (type & CPER_ARM_TLB_ERROR)
>  			printk("%sTLB level: %d\n", pfx, level);
> -			break;
> -		case CPER_ARM_BUS_ERROR:
> +
> +		if (type & CPER_ARM_BUS_ERROR)
>  			printk("%saffinity level at which the bus error occurred: %d\n",
>  			       pfx, level);
> -			break;
> -		}
>  	}
diff mbox series

Patch

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index ed32bbecb4a3..365de4115508 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -546,9 +546,12 @@  static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
 	p = (char *)(err + 1);
 	for (i = 0; i < err->err_info_num; i++) {
 		struct cper_arm_err_info *err_info = (struct cper_arm_err_info *)p;
-		bool is_cache = (err_info->type == CPER_ARM_CACHE_ERROR);
+		bool is_cache = (err_info->type & CPER_ARM_CACHE_ERROR);
 		bool has_pa = (err_info->validation_bits & CPER_ARM_INFO_VALID_PHYSICAL_ADDR);
-		const char *error_type = "unknown error";
+		char error_type[120] = "";
+		char *s = error_type;
+		int len = 0;
+		int i;
 
 		/*
 		 * The field (err_info->error_info & BIT(26)) is fixed to set to
@@ -562,8 +565,16 @@  static bool ghes_handle_arm_hw_error(struct acpi_hest_generic_data *gdata,
 			continue;
 		}
 
-		if (err_info->type < ARRAY_SIZE(cper_proc_error_type_strs))
-			error_type = cper_proc_error_type_strs[err_info->type];
+		for (i = 0; i < ARRAY_SIZE(cper_proc_error_type_strs); i++) {
+			if (!(err_info->type & (1U << i)))
+				continue;
+
+			len += snprintf(s, sizeof(err_info->type) - len, "%s ", cper_proc_error_type_strs[i]);
+			s += len;
+		}
+
+		if (!*error_type)
+			strscpy(error_type, "unknown error", sizeof(error_type));
 
 		pr_warn_ratelimited(FW_WARN GHES_PFX
 				    "Unhandled processor error type: %s\n",
diff --git a/drivers/firmware/efi/cper-arm.c b/drivers/firmware/efi/cper-arm.c
index fa9c1c3bf168..f57641eb548a 100644
--- a/drivers/firmware/efi/cper-arm.c
+++ b/drivers/firmware/efi/cper-arm.c
@@ -93,15 +93,11 @@  static void cper_print_arm_err_info(const char *pfx, u32 type,
 	bool proc_context_corrupt, corrected, precise_pc, restartable_pc;
 	bool time_out, access_mode;
 
-	/* If the type is unknown, bail. */
-	if (type > CPER_ARM_MAX_TYPE)
-		return;
-
 	/*
 	 * Vendor type errors have error information values that are vendor
 	 * specific.
 	 */
-	if (type == CPER_ARM_VENDOR_ERROR)
+	if (type & CPER_ARM_VENDOR_ERROR)
 		return;
 
 	if (error_info & CPER_ARM_ERR_VALID_TRANSACTION_TYPE) {
@@ -116,43 +112,38 @@  static void cper_print_arm_err_info(const char *pfx, u32 type,
 	if (error_info & CPER_ARM_ERR_VALID_OPERATION_TYPE) {
 		op_type = ((error_info >> CPER_ARM_ERR_OPERATION_SHIFT)
 			   & CPER_ARM_ERR_OPERATION_MASK);
-		switch (type) {
-		case CPER_ARM_CACHE_ERROR:
+		if (type & CPER_ARM_CACHE_ERROR) {
 			if (op_type < ARRAY_SIZE(arm_cache_err_op_strs)) {
-				printk("%soperation type: %s\n", pfx,
+				printk("%scache error: %s\n", pfx,
 				       arm_cache_err_op_strs[op_type]);
 			}
-			break;
-		case CPER_ARM_TLB_ERROR:
+		}
+		if (type & CPER_ARM_TLB_ERROR) {
 			if (op_type < ARRAY_SIZE(arm_tlb_err_op_strs)) {
-				printk("%soperation type: %s\n", pfx,
+				printk("%sTLB error: %s\n", pfx,
 				       arm_tlb_err_op_strs[op_type]);
 			}
-			break;
-		case CPER_ARM_BUS_ERROR:
+		}
+		if (type & CPER_ARM_BUS_ERROR) {
 			if (op_type < ARRAY_SIZE(arm_bus_err_op_strs)) {
-				printk("%soperation type: %s\n", pfx,
+				printk("%sbus error: %s\n", pfx,
 				       arm_bus_err_op_strs[op_type]);
 			}
-			break;
 		}
 	}
 
 	if (error_info & CPER_ARM_ERR_VALID_LEVEL) {
 		level = ((error_info >> CPER_ARM_ERR_LEVEL_SHIFT)
 			 & CPER_ARM_ERR_LEVEL_MASK);
-		switch (type) {
-		case CPER_ARM_CACHE_ERROR:
+		if (type & CPER_ARM_CACHE_ERROR)
 			printk("%scache level: %d\n", pfx, level);
-			break;
-		case CPER_ARM_TLB_ERROR:
+
+		if (type & CPER_ARM_TLB_ERROR)
 			printk("%sTLB level: %d\n", pfx, level);
-			break;
-		case CPER_ARM_BUS_ERROR:
+
+		if (type & CPER_ARM_BUS_ERROR)
 			printk("%saffinity level at which the bus error occurred: %d\n",
 			       pfx, level);
-			break;
-		}
 	}
 
 	if (error_info & CPER_ARM_ERR_VALID_PROC_CONTEXT_CORRUPT) {
@@ -289,9 +280,10 @@  void cper_print_proc_arm(const char *pfx,
 				       newpfx);
 		}
 
-		printk("%serror_type: %d, %s\n", newpfx, err_info->type,
-			err_info->type < ARRAY_SIZE(cper_proc_error_type_strs) ?
-			cper_proc_error_type_strs[err_info->type] : "unknown");
+		printk("%s""error_type: 0x%02x\n", pfx, err_info->type);
+		cper_print_bits(pfx, err_info->type,
+				cper_proc_error_type_strs,
+				ARRAY_SIZE(cper_proc_error_type_strs));
 		if (err_info->validation_bits & CPER_ARM_INFO_VALID_ERR_INFO) {
 			printk("%serror_info: 0x%016llx\n", newpfx,
 			       err_info->error_info);
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 265b0f8fc0b3..afc6d41b4e67 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -293,11 +293,10 @@  enum {
 #define CPER_ARM_INFO_FLAGS_PROPAGATED		BIT(2)
 #define CPER_ARM_INFO_FLAGS_OVERFLOW		BIT(3)
 
-#define CPER_ARM_CACHE_ERROR			0
-#define CPER_ARM_TLB_ERROR			1
-#define CPER_ARM_BUS_ERROR			2
-#define CPER_ARM_VENDOR_ERROR			3
-#define CPER_ARM_MAX_TYPE			CPER_ARM_VENDOR_ERROR
+#define CPER_ARM_CACHE_ERROR			BIT(1)
+#define CPER_ARM_TLB_ERROR			BIT(2)
+#define CPER_ARM_BUS_ERROR			BIT(3)
+#define CPER_ARM_VENDOR_ERROR			BIT(4)
 
 #define CPER_ARM_ERR_VALID_TRANSACTION_TYPE	BIT(0)
 #define CPER_ARM_ERR_VALID_OPERATION_TYPE	BIT(1)