diff mbox series

[RESEND,v3,1/2] efi/cper: add cper_mem_err_status_str to decode error description

Message ID 20220124024759.19176-2-xueshuai@linux.alibaba.com (mailing list archive)
State New, archived
Headers show
Series [RESEND,v3,1/2] efi/cper: add cper_mem_err_status_str to decode error description | expand

Commit Message

Shuai Xue Jan. 24, 2022, 2:47 a.m. UTC
Introduce a new helper function cper_mem_err_status_str() which is used to
decode the description of error status, and the cper_print_mem() will call
it and report the details of error status.

Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
---
 drivers/firmware/efi/cper.c | 46 ++++++++++++++++++++++++++++++++++++-
 include/linux/cper.h        |  1 +
 2 files changed, 46 insertions(+), 1 deletion(-)

Comments

Borislav Petkov Jan. 24, 2022, 9:16 p.m. UTC | #1
On Mon, Jan 24, 2022 at 10:47:58AM +0800, Shuai Xue wrote:
> Introduce a new helper function cper_mem_err_status_str() which is used to
> decode the description of error status, and the cper_print_mem() will call
> it and report the details of error status.
> 
> Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
> ---
>  drivers/firmware/efi/cper.c | 46 ++++++++++++++++++++++++++++++++++++-
>  include/linux/cper.h        |  1 +
>  2 files changed, 46 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
> index 6ec8edec6329..addafccecd84 100644
> --- a/drivers/firmware/efi/cper.c
> +++ b/drivers/firmware/efi/cper.c
> @@ -211,6 +211,49 @@ const char *cper_mem_err_type_str(unsigned int etype)
>  }
>  EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
>  
> +const char *cper_mem_err_status_str(u64 status)
> +{
> +	switch ((status >> 8) & 0xff) {
> +	case 1:
> +		return "Error detected internal to the component";

You can make that table a lot more compact:

        switch ((status >> 8) & 0xff) {
        case  1:        return "Error detected internal to the component";
        case  4:        return "Storage error in DRAM memory";
        case  5:        return "Storage error in TLB";
        case  6:        return "Storage error in cache";
        case  7:        return "Error in one or more functional units";
        case  8:        return "component failed self test";
        case  9:        return "Overflow or undervalue of internal queue";
        case 16:        return "Error detected in the bus";
	...

> +	case 16:
> +		return "Error detected in the bus";

And yes, that 16 needs to come before 17, ofc.

> @@ -334,7 +377,8 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
>  		return;
>  	}
>  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
> -		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
> +		printk("%s""error_status: 0x%016llx, %s\n", pfx, mem->error_status,
> +				cper_mem_err_status_str(mem->error_status));

Arguments need to be aligned at opening brace, i.e.:

                printk("%s""error_status: 0x%016llx, %s\n",
                        pfx, mem->error_status, cper_mem_err_status_str(mem->error_status));


Also, the naked error status number is not as user-friendly when we have
the decoded string. So the format should be:

                printk("%s error_status: %s (0x%016llx)\n",
                        pfx, cper_mem_err_status_str(mem->error_status), mem->error_status);
Shuai Xue Jan. 25, 2022, 2:45 a.m. UTC | #2
Hi, Borislav,

Thank you for your valuable comments.

在 2022/1/25 AM5:16, Borislav Petkov 写道:
> On Mon, Jan 24, 2022 at 10:47:58AM +0800, Shuai Xue wrote:
>> Introduce a new helper function cper_mem_err_status_str() which is used to
>> decode the description of error status, and the cper_print_mem() will call
>> it and report the details of error status.
>>
>> Signed-off-by: Shuai Xue <xueshuai@linux.alibaba.com>
>> ---
>>  drivers/firmware/efi/cper.c | 46 ++++++++++++++++++++++++++++++++++++-
>>  include/linux/cper.h        |  1 +
>>  2 files changed, 46 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
>> index 6ec8edec6329..addafccecd84 100644
>> --- a/drivers/firmware/efi/cper.c
>> +++ b/drivers/firmware/efi/cper.c
>> @@ -211,6 +211,49 @@ const char *cper_mem_err_type_str(unsigned int etype)
>>  }
>>  EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
>>  
>> +const char *cper_mem_err_status_str(u64 status)
>> +{
>> +	switch ((status >> 8) & 0xff) {
>> +	case 1:
>> +		return "Error detected internal to the component";
> 
> You can make that table a lot more compact:
> 
>         switch ((status >> 8) & 0xff) {
>         case  1:        return "Error detected internal to the component";
>         case  4:        return "Storage error in DRAM memory";
>         case  5:        return "Storage error in TLB";
>         case  6:        return "Storage error in cache";
>         case  7:        return "Error in one or more functional units";
>         case  8:        return "component failed self test";
>         case  9:        return "Overflow or undervalue of internal queue";
>         case 16:        return "Error detected in the bus";
> 	...
> 
>> +	case 16:
>> +		return "Error detected in the bus";
> 
> And yes, that 16 needs to come before 17, ofc.

I will fix it in next version.

>> @@ -334,7 +377,8 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
>>  		return;
>>  	}
>>  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
>> -		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
>> +		printk("%s""error_status: 0x%016llx, %s\n", pfx, mem->error_status,
>> +				cper_mem_err_status_str(mem->error_status));
> 
> Arguments need to be aligned at opening brace, i.e.:
> 
>                 printk("%s""error_status: 0x%016llx, %s\n",
>                         pfx, mem->error_status, cper_mem_err_status_str(mem->error_status));
> 
> 
> Also, the naked error status number is not as user-friendly when we have
> the decoded string. So the format should be:
> 
>                 printk("%s error_status: %s (0x%016llx)\n",
>                         pfx, cper_mem_err_status_str(mem->error_status), mem->error_status);
> 

Good point. Will fix it.

Best Regard,
Shuai
Borislav Petkov Jan. 25, 2022, 10:21 a.m. UTC | #3
On Tue, Jan 25, 2022 at 10:45:31AM +0800, Shuai Xue wrote:
> I will fix it in next version.

Yes, thanks.

However, you don't have to resend immediately but wait instead until
people have had time to review the whole thing. And while you're
waiting, you can read through Documentation/process/...

There are passages like the following one, for example:

"Don't get discouraged - or impatient
------------------------------------

After you have submitted your change, be patient and wait.  Reviewers are
busy people and may not get to your patch right away.

Once upon a time, patches used to disappear into the void without comment,
but the development process works more smoothly than that now.  You should
receive comments within a week or so; if that does not happen, make sure
that you have sent your patches to the right place.  Wait for a minimum of
one week before resubmitting or pinging reviewers - possibly longer during
busy times like merge windows.

It's also ok to resend the patch or the patch series after a couple of
weeks with the word "RESEND" added to the subject line::

   [PATCH Vx RESEND] sub/sys: Condensed patch summary

Don't add "RESEND" when you are submitting a modified version of your
patch or patch series - "RESEND" only applies to resubmission of a
patch or patch series which have not been modified in any way from the
previous submission."
Shuai Xue Jan. 25, 2022, 11:49 a.m. UTC | #4
Hi, Borislav,

Thank you for your reply.

I am sorry if you feel the RESEND tag is pushing you.

Take your time, I will be more patient :)

Best Regards,
Shuai

在 2022/1/25 PM6:21, Borislav Petkov 写道:
> On Tue, Jan 25, 2022 at 10:45:31AM +0800, Shuai Xue wrote:
>> I will fix it in next version.
> 
> Yes, thanks.
> 
> However, you don't have to resend immediately but wait instead until
> people have had time to review the whole thing. And while you're
> waiting, you can read through Documentation/process/...
> 
> There are passages like the following one, for example:
> 
> "Don't get discouraged - or impatient
> ------------------------------------
> 
> After you have submitted your change, be patient and wait.  Reviewers are
> busy people and may not get to your patch right away.
> 
> Once upon a time, patches used to disappear into the void without comment,
> but the development process works more smoothly than that now.  You should
> receive comments within a week or so; if that does not happen, make sure
> that you have sent your patches to the right place.  Wait for a minimum of
> one week before resubmitting or pinging reviewers - possibly longer during
> busy times like merge windows.
> 
> It's also ok to resend the patch or the patch series after a couple of
> weeks with the word "RESEND" added to the subject line::
> 
>    [PATCH Vx RESEND] sub/sys: Condensed patch summary
> 
> Don't add "RESEND" when you are submitting a modified version of your
> patch or patch series - "RESEND" only applies to resubmission of a
> patch or patch series which have not been modified in any way from the
> previous submission."
>
Borislav Petkov Jan. 25, 2022, 12:37 p.m. UTC | #5
On Tue, Jan 25, 2022 at 07:49:41PM +0800, Shuai Xue wrote:
> I am sorry if you feel the RESEND tag is pushing you.

It is not pushing me - there are rules, simply. Rules you should read
first before sending patches.

How about I start flooding you a patchset every day?

Also, please do not top-post. That's also explained in that
documentation directory I mentioned. Read about it too pls.
Shuai Xue Jan. 25, 2022, 3:02 p.m. UTC | #6
Hi Borislav,

在 2022/1/25 PM8:37, Borislav Petkov 写道:
> On Tue, Jan 25, 2022 at 07:49:41PM +0800, Shuai Xue wrote:
>> I am sorry if you feel the RESEND tag is pushing you.
> 
> It is not pushing me - there are rules, simply. Rules you should read
> first before sending patches.

Got it. I will learn rules first.

> How about I start flooding you a patchset every day?

Haha, I see. I am sorry to bother you and thank you very much for your patient
and valuable comments, just take your time. By the way, after sending patchset
v3 8 days, I resend it yesterday, and the patchset v4 sent today is to address
your comments, not a resend patchset. Anyway, I will be more patient.

> Also, please do not top-post. That's also explained in that
> documentation directory I mentioned. Read about it too pls.

I will, thank you.

Best Regards,
Shuai
diff mbox series

Patch

diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 6ec8edec6329..addafccecd84 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -211,6 +211,49 @@  const char *cper_mem_err_type_str(unsigned int etype)
 }
 EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
 
+const char *cper_mem_err_status_str(u64 status)
+{
+	switch ((status >> 8) & 0xff) {
+	case 1:
+		return "Error detected internal to the component";
+	case 16:
+		return "Error detected in the bus";
+	case 4:
+		return "Storage error in DRAM memory";
+	case 5:
+		return "Storage error in TLB";
+	case 6:
+		return "Storage error in cache";
+	case 7:
+		return "Error in one or more functional units";
+	case 8:
+		return "component failed self test";
+	case 9:
+		return "Overflow or undervalue of internal queue";
+	case 17:
+		return "Virtual address not found on IO-TLB or IO-PDIR";
+	case 18:
+		return "Improper access error";
+	case 19:
+		return "Access to a memory address which is not mapped to any component";
+	case 20:
+		return "Loss of Lockstep";
+	case 21:
+		return "Response not associated with a request";
+	case 22:
+		return "Bus parity error - must also set the A, C, or D Bits";
+	case 23:
+		return "Detection of a PATH_ERROR ";
+	case 25:
+		return "Bus operation timeout";
+	case 26:
+		return "A read was issued to data that has been poisoned";
+	default:
+		return "reserved";
+	}
+}
+EXPORT_SYMBOL_GPL(cper_mem_err_status_str);
+
 static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg)
 {
 	u32 len, n;
@@ -334,7 +377,8 @@  static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem,
 		return;
 	}
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
-		printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
+		printk("%s""error_status: 0x%016llx, %s\n", pfx, mem->error_status,
+				cper_mem_err_status_str(mem->error_status));
 	if (mem->validation_bits & CPER_MEM_VALID_PA)
 		printk("%s""physical_address: 0x%016llx\n",
 		       pfx, mem->physical_addr);
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 6a511a1078ca..5b1dd27b317d 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -558,6 +558,7 @@  extern const char *const cper_proc_error_type_strs[4];
 u64 cper_next_record_id(void);
 const char *cper_severity_str(unsigned int);
 const char *cper_mem_err_type_str(unsigned int);
+const char *cper_mem_err_status_str(u64 status);
 void cper_print_bits(const char *prefix, unsigned int bits,
 		     const char * const strs[], unsigned int strs_size);
 void cper_mem_err_pack(const struct cper_sec_mem_err *,