diff mbox series

[2/2] efi/cper, cxl: Decode CXL Error Log

Message ID 20221007211714.71129-3-Smita.KoralahalliChannabasappa@amd.com
State Superseded
Headers show
Series efi/cper, cxl: Decode CXL Protocol Errors CPER | expand

Commit Message

Smita Koralahalli Oct. 7, 2022, 9:17 p.m. UTC
Print the CXL Error Log field as found in CXL Protocol Error Section.

The CXL RAS Capability structure will be reused by OS First Handling
and the duplication/appropriate placement will be addressed eventually.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
---
 drivers/firmware/efi/cper_cxl.c | 21 +++++++++++++++++++++
 include/linux/cxl_err.h         | 21 +++++++++++++++++++++
 2 files changed, 42 insertions(+)
 create mode 100644 include/linux/cxl_err.h

Comments

Jonathan Cameron Oct. 10, 2022, 2:34 p.m. UTC | #1
On Fri, 7 Oct 2022 21:17:14 +0000
Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> wrote:

> Print the CXL Error Log field as found in CXL Protocol Error Section.
> 
> The CXL RAS Capability structure will be reused by OS First Handling
> and the duplication/appropriate placement will be addressed eventually.
> 
> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>

Ah. This clearly answers at least a few comments from my patch one review.
I should have read on!

> ---
>  drivers/firmware/efi/cper_cxl.c | 21 +++++++++++++++++++++
>  include/linux/cxl_err.h         | 21 +++++++++++++++++++++
>  2 files changed, 42 insertions(+)
>  create mode 100644 include/linux/cxl_err.h
> 
> diff --git a/drivers/firmware/efi/cper_cxl.c b/drivers/firmware/efi/cper_cxl.c
> index e5f48f0de1a4..c3d1d0770aef 100644
> --- a/drivers/firmware/efi/cper_cxl.c
> +++ b/drivers/firmware/efi/cper_cxl.c
> @@ -8,6 +8,7 @@
>   */
>  
>  #include <linux/cper.h>
> +#include <linux/cxl_err.h>
>  #include "cper_cxl.h"
>  
>  #define PROT_ERR_VALID_AGENT_TYPE		BIT_ULL(0)
> @@ -16,6 +17,7 @@
>  #define PROT_ERR_VALID_SERIAL_NUMBER		BIT_ULL(3)
>  #define PROT_ERR_VALID_CAPABILITY		BIT_ULL(4)
>  #define PROT_ERR_VALID_DVSEC			BIT_ULL(5)
> +#define PROT_ERR_VALID_ERROR_LOG		BIT_ULL(6)
>  
>  static const char * const prot_err_agent_type_strs[] = {
>  	"Restricted CXL Device",
> @@ -84,4 +86,23 @@ void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_e
>  			break;
>  		}
>  	}
> +
> +	if (prot_err->valid_bits & PROT_ERR_VALID_ERROR_LOG) {
> +		size_t size = sizeof(*prot_err) + prot_err->dvsec_len;
> +		struct ras_capability_regs *cxl_ras;
> +
> +		pr_info("%s Error log length: 0x%04x\n", pfx, prot_err->err_len);
> +
> +		pr_info("%s CXL Error Log:\n", pfx);
> +		cxl_ras = (struct ras_capability_regs *)((long)prot_err + size);
> +		pr_info("%s cxl_ras_uncor_status: 0x%08x, cxl_ras_uncor_mask: 0x%08x\n",
> +			pfx, cxl_ras->uncor_status, cxl_ras->uncor_mask);
Is it worth splitting these up, so that we get a human readable line with the
individual fields broken out?

> +		pr_info("%s cxl_ras_uncor_severity: 0x%08x\n", pfx,
> +			cxl_ras->uncor_severity);
> +		pr_info("%s cxl_ras_cor_status: 0x%08x, cxl_ras_cor_mask: 0x%08x\n",
> +			pfx, cxl_ras->cor_status, cxl_ras->cor_mask);

Not outputting the cap_control register?  Some of that might be useful.

> +		pr_info("%s Header Log Registers:\n", pfx);
> +		print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, cxl_ras->header_log,
> +			       sizeof(cxl_ras->header_log), 0);
> +	}
>  }
> diff --git a/include/linux/cxl_err.h b/include/linux/cxl_err.h
> new file mode 100644
> index 000000000000..c89dbb6c286f
> --- /dev/null
> +++ b/include/linux/cxl_err.h
> @@ -0,0 +1,21 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2022 Advanced Micro Devices, Inc.
> + *
> + * Author: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> + */
> +
> +#ifndef LINUX_CXL_ERR_H
> +#define LINUX_CXL_ERR_H
> +
> +struct ras_capability_regs {

CXL r3.0 Spec reference plus prefix it with cxl_ 

Agreed with your comment at the top. Some discussion needed on where to
put this - or whether to delay figuring that out until a later stage.

> +	u32 uncor_status;
> +	u32 uncor_mask;
> +	u32 uncor_severity;
> +	u32 cor_status;
> +	u32 cor_mask;
> +	u32 cap_control;
> +	u32 header_log[16];
> +};
> +
> +#endif //__CXL_ERR_
Smita Koralahalli Oct. 11, 2022, 11:17 p.m. UTC | #2
On 10/10/2022 7:34 AM, Jonathan Cameron wrote:
> On Fri, 7 Oct 2022 21:17:14 +0000
> Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com> wrote:
>
>
>> +
>> +	if (prot_err->valid_bits & PROT_ERR_VALID_ERROR_LOG) {
>> +		size_t size = sizeof(*prot_err) + prot_err->dvsec_len;
>> +		struct ras_capability_regs *cxl_ras;
>> +
>> +		pr_info("%s Error log length: 0x%04x\n", pfx, prot_err->err_len);
>> +
>> +		pr_info("%s CXL Error Log:\n", pfx);
>> +		cxl_ras = (struct ras_capability_regs *)((long)prot_err + size);
>> +		pr_info("%s cxl_ras_uncor_status: 0x%08x, cxl_ras_uncor_mask: 0x%08x\n",
>> +			pfx, cxl_ras->uncor_status, cxl_ras->uncor_mask);
> Is it worth splitting these up, so that we get a human readable line with the
> individual fields broken out?

Will do.

>
>> +		pr_info("%s cxl_ras_uncor_severity: 0x%08x\n", pfx,
>> +			cxl_ras->uncor_severity);
>> +		pr_info("%s cxl_ras_cor_status: 0x%08x, cxl_ras_cor_mask: 0x%08x\n",
>> +			pfx, cxl_ras->cor_status, cxl_ras->cor_mask);
> Not outputting the cap_control register?  Some of that might be useful.

Will add.

>
>> +		pr_info("%s Header Log Registers:\n", pfx);
>> +		print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, cxl_ras->header_log,
>> +			       sizeof(cxl_ras->header_log), 0);
>> +	}
>>   }
>> diff --git a/include/linux/cxl_err.h b/include/linux/cxl_err.h
>> new file mode 100644
>> index 000000000000..c89dbb6c286f
>> --- /dev/null
>> +++ b/include/linux/cxl_err.h
>> @@ -0,0 +1,21 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (C) 2022 Advanced Micro Devices, Inc.
>> + *
>> + * Author: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
>> + */
>> +
>> +#ifndef LINUX_CXL_ERR_H
>> +#define LINUX_CXL_ERR_H
>> +
>> +struct ras_capability_regs {
> CXL r3.0 Spec reference plus prefix it with cxl_

Okay.

>
> Agreed with your comment at the top. Some discussion needed on where to
> put this - or whether to delay figuring that out until a later stage.

Yes, more discussion needed here though!

Thanks,
Smita

>
>> +	u32 uncor_status;
>> +	u32 uncor_mask;
>> +	u32 uncor_severity;
>> +	u32 cor_status;
>> +	u32 cor_mask;
>> +	u32 cap_control;
>> +	u32 header_log[16];
>> +};
>> +
>> +#endif //__CXL_ERR_
diff mbox series

Patch

diff --git a/drivers/firmware/efi/cper_cxl.c b/drivers/firmware/efi/cper_cxl.c
index e5f48f0de1a4..c3d1d0770aef 100644
--- a/drivers/firmware/efi/cper_cxl.c
+++ b/drivers/firmware/efi/cper_cxl.c
@@ -8,6 +8,7 @@ 
  */
 
 #include <linux/cper.h>
+#include <linux/cxl_err.h>
 #include "cper_cxl.h"
 
 #define PROT_ERR_VALID_AGENT_TYPE		BIT_ULL(0)
@@ -16,6 +17,7 @@ 
 #define PROT_ERR_VALID_SERIAL_NUMBER		BIT_ULL(3)
 #define PROT_ERR_VALID_CAPABILITY		BIT_ULL(4)
 #define PROT_ERR_VALID_DVSEC			BIT_ULL(5)
+#define PROT_ERR_VALID_ERROR_LOG		BIT_ULL(6)
 
 static const char * const prot_err_agent_type_strs[] = {
 	"Restricted CXL Device",
@@ -84,4 +86,23 @@  void cper_print_prot_err(const char *pfx, const struct cper_sec_prot_err *prot_e
 			break;
 		}
 	}
+
+	if (prot_err->valid_bits & PROT_ERR_VALID_ERROR_LOG) {
+		size_t size = sizeof(*prot_err) + prot_err->dvsec_len;
+		struct ras_capability_regs *cxl_ras;
+
+		pr_info("%s Error log length: 0x%04x\n", pfx, prot_err->err_len);
+
+		pr_info("%s CXL Error Log:\n", pfx);
+		cxl_ras = (struct ras_capability_regs *)((long)prot_err + size);
+		pr_info("%s cxl_ras_uncor_status: 0x%08x, cxl_ras_uncor_mask: 0x%08x\n",
+			pfx, cxl_ras->uncor_status, cxl_ras->uncor_mask);
+		pr_info("%s cxl_ras_uncor_severity: 0x%08x\n", pfx,
+			cxl_ras->uncor_severity);
+		pr_info("%s cxl_ras_cor_status: 0x%08x, cxl_ras_cor_mask: 0x%08x\n",
+			pfx, cxl_ras->cor_status, cxl_ras->cor_mask);
+		pr_info("%s Header Log Registers:\n", pfx);
+		print_hex_dump(pfx, "", DUMP_PREFIX_OFFSET, 16, 4, cxl_ras->header_log,
+			       sizeof(cxl_ras->header_log), 0);
+	}
 }
diff --git a/include/linux/cxl_err.h b/include/linux/cxl_err.h
new file mode 100644
index 000000000000..c89dbb6c286f
--- /dev/null
+++ b/include/linux/cxl_err.h
@@ -0,0 +1,21 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ *
+ * Author: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
+ */
+
+#ifndef LINUX_CXL_ERR_H
+#define LINUX_CXL_ERR_H
+
+struct ras_capability_regs {
+	u32 uncor_status;
+	u32 uncor_mask;
+	u32 uncor_severity;
+	u32 cor_status;
+	u32 cor_mask;
+	u32 cap_control;
+	u32 header_log[16];
+};
+
+#endif //__CXL_ERR_