diff mbox

[V3,02/10] ras: acpi/apei: cper: generic error data entry v3 per ACPI 6.1

Message ID 1475875882-2604-3-git-send-email-tbaicar@codeaurora.org (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Tyler Baicar Oct. 7, 2016, 9:31 p.m. UTC
Currently when a RAS error is reported it is not timestamped.
The ACPI 6.1 spec adds the timestamp field to the generic error
data entry v3 structure. The timestamp of when the firmware
generated the error is now being reported.

Signed-off-by: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org>
Signed-off-by: Richard Ruigrok <rruigrok@codeaurora.org>
Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
Signed-off-by: Naveen Kaje <nkaje@codeaurora.org>
---
 drivers/acpi/apei/ghes.c    | 25 ++++++++++--
 drivers/firmware/efi/cper.c | 97 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 105 insertions(+), 17 deletions(-)

Comments

Suzuki K Poulose Oct. 11, 2016, 5:28 p.m. UTC | #1
On 07/10/16 22:31, Tyler Baicar wrote:
> Currently when a RAS error is reported it is not timestamped.
> The ACPI 6.1 spec adds the timestamp field to the generic error
> data entry v3 structure. The timestamp of when the firmware
> generated the error is now being reported.
>
> Signed-off-by: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org>
> Signed-off-by: Richard Ruigrok <rruigrok@codeaurora.org>
> Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
> Signed-off-by: Naveen Kaje <nkaje@codeaurora.org>

Please could you keep the people who reviewed/commented on your series in the past,
whenever you post a new version ?

> ---
>  drivers/acpi/apei/ghes.c    | 25 ++++++++++--
>  drivers/firmware/efi/cper.c | 97 +++++++++++++++++++++++++++++++++++++++------
>  2 files changed, 105 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index 3021f0e..c8488f1 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -80,6 +80,10 @@
>  	((struct acpi_hest_generic_status *)				\
>  	 ((struct ghes_estatus_node *)(estatus_node) + 1))
>
> +#define acpi_hest_generic_data_version(gdata)			\
> +	(gdata->revision >> 8)

...

> +inline void *acpi_hest_generic_data_payload(struct acpi_hest_generic_data *gdata)
> +{
> +	return acpi_hest_generic_data_version(gdata) >= 3 ?
> +		(void *)(((struct acpi_hest_generic_data_v300 *)(gdata)) + 1) :
> +		gdata + 1;
> +}
> +



> diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
> index d425374..9fa1317 100644
> --- a/drivers/firmware/efi/cper.c
> +++ b/drivers/firmware/efi/cper.c

   
> +#define acpi_hest_generic_data_version(gdata)		\
> +	(gdata->revision >> 8)
> +

...

> +static inline void *acpi_hest_generic_data_payload(struct acpi_hest_generic_data *gdata)
> +{
> +	return acpi_hest_generic_data_version(gdata) >= 3 ?
> +		(void *)(((struct acpi_hest_generic_data_v300 *)(gdata)) + 1) :
> +		gdata + 1;
> +}

Could these go to a header file, so that we don't need duplicate definitions of these helpers in
different files ?

> +
> +static void cper_estatus_print_section_v300(const char *pfx,
> +	const struct acpi_hest_generic_data_v300 *gdata)
> +{
> +	__u8 hour, min, sec, day, mon, year, century, *timestamp;
> +
> +	if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
> +		timestamp = (__u8 *)&(gdata->time_stamp);
> +		memcpy(&sec, timestamp, 1);
> +		memcpy(&min, timestamp + 1, 1);
> +		memcpy(&hour, timestamp + 2, 1);
> +		memcpy(&day, timestamp + 4, 1);
> +		memcpy(&mon, timestamp + 5, 1);
> +		memcpy(&year, timestamp + 6, 1);
> +		memcpy(&century, timestamp + 7, 1);
> +		printk("%stime: ", pfx);
> +		printk("%7s", 0x01 & *(timestamp + 3) ? "precise" : "");

What format is the (timestamp + 3) stored in ? Does it need conversion ?

> +		printk(" %02d:%02d:%02d %02d%02d-%02d-%02d\n",
> +			bcd2bin(hour), bcd2bin(min), bcd2bin(sec),
> +			bcd2bin(century), bcd2bin(year), bcd2bin(mon),
> +			bcd2bin(day));
> +	}

minor nit: Would it be easier to order/parse the error messages if the date
is printed first followed by time ?

i.e,
	17:20:14 2016-09-15 Mon
		vs
	2016-09-15 Mon 17:20:14

e.g, people looking at a huge log, looking for logs from a specific date might
find the latter more useful to skip the messages.

> +}
> +
>  static void cper_estatus_print_section(
> -	const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
> +	const char *pfx, struct acpi_hest_generic_data *gdata, int sec_no)
>  {
>  	uuid_le *sec_type = (uuid_le *)gdata->section_type;
>  	__u16 severity;
>  	char newpfx[64];
>
> +	if ((gdata->revision >> 8) >= 0x03)

Could we use the helper defined above ?

> @@ -451,12 +497,22 @@ void cper_estatus_print(const char *pfx,
>  	printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
>  	data_len = estatus->data_length;
>  	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
> +	if ((gdata->revision >> 8) >= 0x03)

Same as above, use the macro ?

> +		gdata_v3 = (struct acpi_hest_generic_data_v300 *)gdata;
> +
>  	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
> +
>  	while (data_len >= sizeof(*gdata)) {
>  		gedata_len = gdata->error_data_length;
>  		cper_estatus_print_section(newpfx, gdata, sec_no);
> -		data_len -= gedata_len + sizeof(*gdata);
> -		gdata = (void *)(gdata + 1) + gedata_len;
> +		if(gdata_v3) {
> +			data_len -= gedata_len + sizeof(*gdata_v3);
> +			gdata_v3 = (void *)(gdata_v3 + 1) + gedata_len;
> +			gdata = (struct acpi_hest_generic_data *)gdata_v3;
> +		} else {
> +			data_len -= gedata_len + sizeof(*gdata);
> +			gdata = (void *)(gdata + 1) + gedata_len;
> +		}
>  		sec_no++;
>  	}

...

>
> @@ -486,15 +543,29 @@ int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
>  		return rc;
>  	data_len = estatus->data_length;
>  	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
> -	while (data_len >= sizeof(*gdata)) {
> -		gedata_len = gdata->error_data_length;
> -		if (gedata_len > data_len - sizeof(*gdata))
> +
> +	if ((gdata->revision >> 8) >= 0x03) {
> +		gdata_v3 = (struct acpi_hest_generic_data_v300 *)gdata;
> +		while (data_len >= sizeof(*gdata_v3)) {
> +			gedata_len = gdata_v3->error_data_length;
> +			if (gedata_len > data_len - sizeof(*gdata_v3))
> +				return -EINVAL;
> +			data_len -= gedata_len + sizeof(*gdata_v3);
> +			gdata_v3 = (void *)(gdata_v3 + 1) + gedata_len;
> +		}
> +		if (data_len)
> +			return -EINVAL;
> +	} else {
> +		while (data_len >= sizeof(*gdata)) {
> +			gedata_len = gdata->error_data_length;
> +			if (gedata_len > data_len - sizeof(*gdata))
> +				return -EINVAL;
> +			data_len -= gedata_len + sizeof(*gdata);
> +			gdata = (void *)(gdata + 1) + gedata_len;
> +		}
> +		if (data_len)

As mentioned in the previous version, would it make sense to add some more
helpers to deal with record versions ? We seem to be doing the version switch and
code duplication at different places.

Does the following help ? Thoughts ?

#define acpi_hest_generic_data_error_length(gdata) (((struct acpi_hest_generic_data *)(gdata))->error_data_length)
#define acpi_hest_generic_data_size(gdata) \
	((acpi_hest_generic_data_version(gdata) >= 3) ? \
	 sizeof(struct acpi_hest_generic_data_v300) :	\
	 sizeof(struct acpi_hest_generic_data))
#define acpi_hest_generic_data_record_size(gdata)
	(acpi_hest_generic_data_size(gdata) + \
	 acpi_hest_generic_data_error_length(gdata))
#define acpi_hest_generic_data_next(gdata) \
	((void *)(gdata) + acpi_hest_generic_data_record_size(gdata))


Suzuki
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Russell King (Oracle) Oct. 11, 2016, 6:52 p.m. UTC | #2
On Fri, Oct 07, 2016 at 03:31:14PM -0600, Tyler Baicar wrote:
> +static void cper_estatus_print_section_v300(const char *pfx,
> +	const struct acpi_hest_generic_data_v300 *gdata)
> +{
> +	__u8 hour, min, sec, day, mon, year, century, *timestamp;
> +
> +	if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
> +		timestamp = (__u8 *)&(gdata->time_stamp);
> +		memcpy(&sec, timestamp, 1);
> +		memcpy(&min, timestamp + 1, 1);
> +		memcpy(&hour, timestamp + 2, 1);
> +		memcpy(&day, timestamp + 4, 1);
> +		memcpy(&mon, timestamp + 5, 1);
> +		memcpy(&year, timestamp + 6, 1);
> +		memcpy(&century, timestamp + 7, 1);

This is utterly silly.  Why are you using memcpy() to access individual
bytes of a u8 pointer?  What's wrong with:

		sec = timestamp[0];
		min = timestamp[1];
		hour = timestamp[2];
		day = timestamp[4];
		mon = timestamp[5];
		year = timestamp[6];
		century = timestamp[7];

or even do the conversion here:

		sec = bcd2bin(timestamp[0]);
... etc ...

> +		printk("%stime: ", pfx);
> +		printk("%7s", 0x01 & *(timestamp + 3) ? "precise" : "");
> +		printk(" %02d:%02d:%02d %02d%02d-%02d-%02d\n",
> +			bcd2bin(hour), bcd2bin(min), bcd2bin(sec),
> +			bcd2bin(century), bcd2bin(year), bcd2bin(mon),
> +			bcd2bin(day));
> +	}

It's also a good idea to (as much as possible) keep to single printk()
statements - which makes the emission of the string more atomic wrt
other CPUs and contexts.  So, this should probably become (with the
conversion being done at the assignment of sec etc):

		printk("%stime: %7s %02d:%02d:%02d %02d%02d-%02d-%02d\n",
			pfx, 0x01 & timestamp[3] ? "precise" : "",
			hour, min, sec, century, year, mon, day);

which, IMHO, looks a lot nicer and doesn't risk some other printk()
getting between each individual part of the line.

> +}
> +
>  static void cper_estatus_print_section(
> -	const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
> +	const char *pfx, struct acpi_hest_generic_data *gdata, int sec_no)
>  {
>  	uuid_le *sec_type = (uuid_le *)gdata->section_type;
>  	__u16 severity;
>  	char newpfx[64];
>  
> +	if ((gdata->revision >> 8) >= 0x03)
> +		cper_estatus_print_section_v300(pfx,
> +			(const struct acpi_hest_generic_data_v300 *)gdata);
> +
>  	severity = gdata->error_severity;
>  	printk("%s""Error %d, type: %s\n", pfx, sec_no,
>  	       cper_severity_str(severity));

Not sure why you have the "" here - %sError works just as well and the
"" is just obfuscation - the compiler will eliminate the double-double
quote and merge the strings anyway.
Tyler Baicar Oct. 12, 2016, 10:10 p.m. UTC | #3
Hello Suzuki,

Thank you for the feedback! Responses below.


On 10/11/2016 11:28 AM, Suzuki K Poulose wrote:
> On 07/10/16 22:31, Tyler Baicar wrote:
>> Currently when a RAS error is reported it is not timestamped.
>> The ACPI 6.1 spec adds the timestamp field to the generic error
>> data entry v3 structure. The timestamp of when the firmware
>> generated the error is now being reported.
>>
>> Signed-off-by: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org>
>> Signed-off-by: Richard Ruigrok <rruigrok@codeaurora.org>
>> Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
>> Signed-off-by: Naveen Kaje <nkaje@codeaurora.org>
>
> Please could you keep the people who reviewed/commented on your series 
> in the past,
> whenever you post a new version ?
Do you mean to just send the new version to their e-mail directly in 
addition to the lists? If so, I will do that next time.

I know you provided good feedback on the previous patchset, but I did 
not have anyone specifically respond to add "reviewed-by:...". I don't 
think I should add reviewed-by for someone unless they specifically add 
it in a response :)
>
>> ---
>>  drivers/acpi/apei/ghes.c    | 25 ++++++++++--
>>  drivers/firmware/efi/cper.c | 97 
>> +++++++++++++++++++++++++++++++++++++++------
>>  2 files changed, 105 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
>> index 3021f0e..c8488f1 100644
>> --- a/drivers/acpi/apei/ghes.c
>> +++ b/drivers/acpi/apei/ghes.c
>> @@ -80,6 +80,10 @@
>>      ((struct acpi_hest_generic_status *)                \
>>       ((struct ghes_estatus_node *)(estatus_node) + 1))
>>
>> +#define acpi_hest_generic_data_version(gdata)            \
>> +    (gdata->revision >> 8)
>
> ...
>
>> +inline void *acpi_hest_generic_data_payload(struct 
>> acpi_hest_generic_data *gdata)
>> +{
>> +    return acpi_hest_generic_data_version(gdata) >= 3 ?
>> +        (void *)(((struct acpi_hest_generic_data_v300 *)(gdata)) + 1) :
>> +        gdata + 1;
>> +}
>> +
>
>
>
>> diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
>> index d425374..9fa1317 100644
>> --- a/drivers/firmware/efi/cper.c
>> +++ b/drivers/firmware/efi/cper.c
>
>> +#define acpi_hest_generic_data_version(gdata)        \
>> +    (gdata->revision >> 8)
>> +
>
> ...
>
>> +static inline void *acpi_hest_generic_data_payload(struct 
>> acpi_hest_generic_data *gdata)
>> +{
>> +    return acpi_hest_generic_data_version(gdata) >= 3 ?
>> +        (void *)(((struct acpi_hest_generic_data_v300 *)(gdata)) + 1) :
>> +        gdata + 1;
>> +}
>
> Could these go to a header file, so that we don't need duplicate 
> definitions of these helpers in
> different files ?
>
I think that should work to avoid duplication. I will move them to a 
header file in the next patchset.
>> +
>> +static void cper_estatus_print_section_v300(const char *pfx,
>> +    const struct acpi_hest_generic_data_v300 *gdata)
>> +{
>> +    __u8 hour, min, sec, day, mon, year, century, *timestamp;
>> +
>> +    if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
>> +        timestamp = (__u8 *)&(gdata->time_stamp);
>> +        memcpy(&sec, timestamp, 1);
>> +        memcpy(&min, timestamp + 1, 1);
>> +        memcpy(&hour, timestamp + 2, 1);
>> +        memcpy(&day, timestamp + 4, 1);
>> +        memcpy(&mon, timestamp + 5, 1);
>> +        memcpy(&year, timestamp + 6, 1);
>> +        memcpy(&century, timestamp + 7, 1);
>> +        printk("%stime: ", pfx);
>> +        printk("%7s", 0x01 & *(timestamp + 3) ? "precise" : "");
>
> What format is the (timestamp + 3) stored in ? Does it need conversion ?
The third byte of the timestamp is currently only used to determine if 
the time is precise or not. Bit 0 is used to specify that and the other 
bits in this byte are marked as reserved. This is shown in table 247 of 
the UEFI spec 2.6:

Byte 3:
   Bit 0 – Timestamp is precise if this bit is set and correlates to the 
time of the error event.
   Bit 7:1 – Reserved
>
>> +        printk(" %02d:%02d:%02d %02d%02d-%02d-%02d\n",
>> +            bcd2bin(hour), bcd2bin(min), bcd2bin(sec),
>> +            bcd2bin(century), bcd2bin(year), bcd2bin(mon),
>> +            bcd2bin(day));
>> +    }
>
> minor nit: Would it be easier to order/parse the error messages if the 
> date
> is printed first followed by time ?
>
> i.e,
>     17:20:14 2016-09-15 Mon
>         vs
>     2016-09-15 Mon 17:20:14
>
> e.g, people looking at a huge log, looking for logs from a specific 
> date might
> find the latter more useful to skip the messages.
>
The latter does seem like it would be better for parsing large logs. I 
can rearrange the order in the next patchset.
>> +}
>> +
>>  static void cper_estatus_print_section(
>> -    const char *pfx, const struct acpi_hest_generic_data *gdata, int 
>> sec_no)
>> +    const char *pfx, struct acpi_hest_generic_data *gdata, int sec_no)
>>  {
>>      uuid_le *sec_type = (uuid_le *)gdata->section_type;
>>      __u16 severity;
>>      char newpfx[64];
>>
>> +    if ((gdata->revision >> 8) >= 0x03)
>
> Could we use the helper defined above ?
Yes, I'll change this to use acpi_hest_generic_data_version(gdata) instead.
>
>> @@ -451,12 +497,22 @@ void cper_estatus_print(const char *pfx,
>>      printk("%s""event severity: %s\n", pfx, 
>> cper_severity_str(severity));
>>      data_len = estatus->data_length;
>>      gdata = (struct acpi_hest_generic_data *)(estatus + 1);
>> +    if ((gdata->revision >> 8) >= 0x03)
>
> Same as above, use the macro ?
Yes, I'll change this to use acpi_hest_generic_data_version(gdata) instead.
>
>> +        gdata_v3 = (struct acpi_hest_generic_data_v300 *)gdata;
>> +
>>      snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
>> +
>>      while (data_len >= sizeof(*gdata)) {
>>          gedata_len = gdata->error_data_length;
>>          cper_estatus_print_section(newpfx, gdata, sec_no);
>> -        data_len -= gedata_len + sizeof(*gdata);
>> -        gdata = (void *)(gdata + 1) + gedata_len;
>> +        if(gdata_v3) {
>> +            data_len -= gedata_len + sizeof(*gdata_v3);
>> +            gdata_v3 = (void *)(gdata_v3 + 1) + gedata_len;
>> +            gdata = (struct acpi_hest_generic_data *)gdata_v3;
>> +        } else {
>> +            data_len -= gedata_len + sizeof(*gdata);
>> +            gdata = (void *)(gdata + 1) + gedata_len;
>> +        }
>>          sec_no++;
>>      }
>
> ...
>
>>
>> @@ -486,15 +543,29 @@ int cper_estatus_check(const struct 
>> acpi_hest_generic_status *estatus)
>>          return rc;
>>      data_len = estatus->data_length;
>>      gdata = (struct acpi_hest_generic_data *)(estatus + 1);
>> -    while (data_len >= sizeof(*gdata)) {
>> -        gedata_len = gdata->error_data_length;
>> -        if (gedata_len > data_len - sizeof(*gdata))
>> +
>> +    if ((gdata->revision >> 8) >= 0x03) {
>> +        gdata_v3 = (struct acpi_hest_generic_data_v300 *)gdata;
>> +        while (data_len >= sizeof(*gdata_v3)) {
>> +            gedata_len = gdata_v3->error_data_length;
>> +            if (gedata_len > data_len - sizeof(*gdata_v3))
>> +                return -EINVAL;
>> +            data_len -= gedata_len + sizeof(*gdata_v3);
>> +            gdata_v3 = (void *)(gdata_v3 + 1) + gedata_len;
>> +        }
>> +        if (data_len)
>> +            return -EINVAL;
>> +    } else {
>> +        while (data_len >= sizeof(*gdata)) {
>> +            gedata_len = gdata->error_data_length;
>> +            if (gedata_len > data_len - sizeof(*gdata))
>> +                return -EINVAL;
>> +            data_len -= gedata_len + sizeof(*gdata);
>> +            gdata = (void *)(gdata + 1) + gedata_len;
>> +        }
>> +        if (data_len)
>
> As mentioned in the previous version, would it make sense to add some 
> more
> helpers to deal with record versions ? We seem to be doing the version 
> switch and
> code duplication at different places.
>
> Does the following help ? Thoughts ?
>
> #define acpi_hest_generic_data_error_length(gdata) (((struct 
> acpi_hest_generic_data *)(gdata))->error_data_length)
> #define acpi_hest_generic_data_size(gdata) \
>     ((acpi_hest_generic_data_version(gdata) >= 3) ? \
>      sizeof(struct acpi_hest_generic_data_v300) :    \
>      sizeof(struct acpi_hest_generic_data))
> #define acpi_hest_generic_data_record_size(gdata)
>     (acpi_hest_generic_data_size(gdata) + \
>      acpi_hest_generic_data_error_length(gdata))
> #define acpi_hest_generic_data_next(gdata) \
>     ((void *)(gdata) + acpi_hest_generic_data_record_size(gdata))
>
>
> Suzuki
These helpers will definitely help consolidate this code. I will use 
these in the next version to remove the code duplication here.

Thanks,
Tyler
Tyler Baicar Oct. 12, 2016, 10:18 p.m. UTC | #4
Hello Russell,

Thank you for the feedback! Responses below


On 10/11/2016 12:52 PM, Russell King - ARM Linux wrote:
> On Fri, Oct 07, 2016 at 03:31:14PM -0600, Tyler Baicar wrote:
>> +static void cper_estatus_print_section_v300(const char *pfx,
>> +	const struct acpi_hest_generic_data_v300 *gdata)
>> +{
>> +	__u8 hour, min, sec, day, mon, year, century, *timestamp;
>> +
>> +	if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
>> +		timestamp = (__u8 *)&(gdata->time_stamp);
>> +		memcpy(&sec, timestamp, 1);
>> +		memcpy(&min, timestamp + 1, 1);
>> +		memcpy(&hour, timestamp + 2, 1);
>> +		memcpy(&day, timestamp + 4, 1);
>> +		memcpy(&mon, timestamp + 5, 1);
>> +		memcpy(&year, timestamp + 6, 1);
>> +		memcpy(&century, timestamp + 7, 1);
> This is utterly silly.  Why are you using memcpy() to access individual
> bytes of a u8 pointer?  What's wrong with:
>
> 		sec = timestamp[0];
> 		min = timestamp[1];
> 		hour = timestamp[2];
> 		day = timestamp[4];
> 		mon = timestamp[5];
> 		year = timestamp[6];
> 		century = timestamp[7];
>
> or even do the conversion here:
>
> 		sec = bcd2bin(timestamp[0]);
> ... etc ...
Yes, that will be a lot cleaner especially with moving the conversion here.
>
>> +		printk("%stime: ", pfx);
>> +		printk("%7s", 0x01 & *(timestamp + 3) ? "precise" : "");
>> +		printk(" %02d:%02d:%02d %02d%02d-%02d-%02d\n",
>> +			bcd2bin(hour), bcd2bin(min), bcd2bin(sec),
>> +			bcd2bin(century), bcd2bin(year), bcd2bin(mon),
>> +			bcd2bin(day));
>> +	}
> It's also a good idea to (as much as possible) keep to single printk()
> statements - which makes the emission of the string more atomic wrt
> other CPUs and contexts.  So, this should probably become (with the
> conversion being done at the assignment of sec etc):
>
> 		printk("%stime: %7s %02d:%02d:%02d %02d%02d-%02d-%02d\n",
> 			pfx, 0x01 & timestamp[3] ? "precise" : "",
> 			hour, min, sec, century, year, mon, day);
>
> which, IMHO, looks a lot nicer and doesn't risk some other printk()
> getting between each individual part of the line.
I will make this change in the next version. This printk does look a lot 
nicer and avoids other prints from getting in the middle (I actually 
just saw that happen in testing a couple days ago)
>> +}
>> +
>>   static void cper_estatus_print_section(
>> -	const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
>> +	const char *pfx, struct acpi_hest_generic_data *gdata, int sec_no)
>>   {
>>   	uuid_le *sec_type = (uuid_le *)gdata->section_type;
>>   	__u16 severity;
>>   	char newpfx[64];
>>   
>> +	if ((gdata->revision >> 8) >= 0x03)
>> +		cper_estatus_print_section_v300(pfx,
>> +			(const struct acpi_hest_generic_data_v300 *)gdata);
>> +
>>   	severity = gdata->error_severity;
>>   	printk("%s""Error %d, type: %s\n", pfx, sec_no,
>>   	       cper_severity_str(severity));
> Not sure why you have the "" here - %sError works just as well and the
> "" is just obfuscation - the compiler will eliminate the double-double
> quote and merge the strings anyway.
>
I will remove the "" in the next version.

Thanks,
Tyler
Suzuki K Poulose Oct. 13, 2016, 8:50 a.m. UTC | #5
On 12/10/16 23:10, Baicar, Tyler wrote:
> Hello Suzuki,
>
> Thank you for the feedback! Responses below.
>
>
> On 10/11/2016 11:28 AM, Suzuki K Poulose wrote:
>> On 07/10/16 22:31, Tyler Baicar wrote:
>>> Currently when a RAS error is reported it is not timestamped.
>>> The ACPI 6.1 spec adds the timestamp field to the generic error
>>> data entry v3 structure. The timestamp of when the firmware
>>> generated the error is now being reported.
>>>
>>> Signed-off-by: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org>
>>> Signed-off-by: Richard Ruigrok <rruigrok@codeaurora.org>
>>> Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
>>> Signed-off-by: Naveen Kaje <nkaje@codeaurora.org>
>>
>> Please could you keep the people who reviewed/commented on your series in the past,
>> whenever you post a new version ?
> Do you mean to just send the new version to their e-mail directly in addition to the lists? If so, I will do that next time.

If you send a new version of a series to the list, it is a good idea to keep
the people who commented (significantly) on your previous version in Cc, especially
when you have addressed their feedback. That will help them to keep track of the
series. People can always see the new version in the list, but then it is so easy
to miss something in the 100s of emails you get each day. I am sure people have
special filters for the emails based on if they are in Cc/To etc.

>
> I know you provided good feedback on the previous patchset, but I did not have anyone specifically respond to add "reviewed-by:...". I don't think I should add reviewed-by for someone unless they specifically add it in a response :)

No, I haven't yet "Reviewed-by" your patches. I had some comments on it, which means
I expected it to be addressed as you committed in your response.

>>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
>>> index 3021f0e..c8488f1 100644
>>> --- a/drivers/acpi/apei/ghes.c
>>> +++ b/drivers/acpi/apei/ghes.c
>>> @@ -80,6 +80,10 @@

> I think that should work to avoid duplication. I will move them to a header file in the next patchset.
>>> +
>>> +static void cper_estatus_print_section_v300(const char *pfx,
>>> +    const struct acpi_hest_generic_data_v300 *gdata)
>>> +{
>>> +    __u8 hour, min, sec, day, mon, year, century, *timestamp;
>>> +
>>> +    if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
>>> +        timestamp = (__u8 *)&(gdata->time_stamp);
>>> +        memcpy(&sec, timestamp, 1);
>>> +        memcpy(&min, timestamp + 1, 1);
>>> +        memcpy(&hour, timestamp + 2, 1);
>>> +        memcpy(&day, timestamp + 4, 1);
>>> +        memcpy(&mon, timestamp + 5, 1);
>>> +        memcpy(&year, timestamp + 6, 1);
>>> +        memcpy(&century, timestamp + 7, 1);
>>> +        printk("%stime: ", pfx);
>>> +        printk("%7s", 0x01 & *(timestamp + 3) ? "precise" : "");
>>
>> What format is the (timestamp + 3) stored in ? Does it need conversion ?
> The third byte of the timestamp is currently only used to determine if the time is precise or not. Bit 0 is used to specify that and the other bits in this byte are marked as reserved. This is shown in table 247 of the UEFI spec 2.6:
>
> Byte 3:
>   Bit 0 – Timestamp is precise if this bit is set and correlates to the time of the error event.
>   Bit 7:1 – Reserved

Is it always the same endianness as that of the CPU ?

Cheers
Suzuki
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tyler Baicar Oct. 13, 2016, 7:37 p.m. UTC | #6
Hello Suzuki,

On 10/13/2016 2:50 AM, Suzuki K Poulose wrote:
> On 12/10/16 23:10, Baicar, Tyler wrote:
>> Hello Suzuki,
>>
>> Thank you for the feedback! Responses below.
>>
>> On 10/11/2016 11:28 AM, Suzuki K Poulose wrote:
>>> On 07/10/16 22:31, Tyler Baicar wrote:
>>>> Currently when a RAS error is reported it is not timestamped.
>>>> The ACPI 6.1 spec adds the timestamp field to the generic error
>>>> data entry v3 structure. The timestamp of when the firmware
>>>> generated the error is now being reported.
>>>>
>>>> Signed-off-by: Jonathan (Zhixiong) Zhang <zjzhang@codeaurora.org>
>>>> Signed-off-by: Richard Ruigrok <rruigrok@codeaurora.org>
>>>> Signed-off-by: Tyler Baicar <tbaicar@codeaurora.org>
>>>> Signed-off-by: Naveen Kaje <nkaje@codeaurora.org>
>>>
>>> Please could you keep the people who reviewed/commented on your 
>>> series in the past,
>>> whenever you post a new version ?
>> Do you mean to just send the new version to their e-mail directly in 
>> addition to the lists? If so, I will do that next time.
>
> If you send a new version of a series to the list, it is a good idea 
> to keep
> the people who commented (significantly) on your previous version in 
> Cc, especially
> when you have addressed their feedback. That will help them to keep 
> track of the
> series. People can always see the new version in the list, but then it 
> is so easy
> to miss something in the 100s of emails you get each day. I am sure 
> people have
> special filters for the emails based on if they are in Cc/To etc.
>
Okay, understood. I'll make sure to add those who have commented in the 
cc/to list to avoid the e-mail filters.
>>
>> I know you provided good feedback on the previous patchset, but I did 
>> not have anyone specifically respond to add "reviewed-by:...". I 
>> don't think I should add reviewed-by for someone unless they 
>> specifically add it in a response :)
>
> No, I haven't yet "Reviewed-by" your patches. I had some comments on 
> it, which means
> I expected it to be addressed as you committed in your response.
>
>>>> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
>>>> index 3021f0e..c8488f1 100644
>>>> --- a/drivers/acpi/apei/ghes.c
>>>> +++ b/drivers/acpi/apei/ghes.c
>>>> @@ -80,6 +80,10 @@
>
>> I think that should work to avoid duplication. I will move them to a 
>> header file in the next patchset.
>>>> +
>>>> +static void cper_estatus_print_section_v300(const char *pfx,
>>>> +    const struct acpi_hest_generic_data_v300 *gdata)
>>>> +{
>>>> +    __u8 hour, min, sec, day, mon, year, century, *timestamp;
>>>> +
>>>> +    if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
>>>> +        timestamp = (__u8 *)&(gdata->time_stamp);
>>>> +        memcpy(&sec, timestamp, 1);
>>>> +        memcpy(&min, timestamp + 1, 1);
>>>> +        memcpy(&hour, timestamp + 2, 1);
>>>> +        memcpy(&day, timestamp + 4, 1);
>>>> +        memcpy(&mon, timestamp + 5, 1);
>>>> +        memcpy(&year, timestamp + 6, 1);
>>>> +        memcpy(&century, timestamp + 7, 1);
>>>> +        printk("%stime: ", pfx);
>>>> +        printk("%7s", 0x01 & *(timestamp + 3) ? "precise" : "");
>>>
>>> What format is the (timestamp + 3) stored in ? Does it need 
>>> conversion ?
>> The third byte of the timestamp is currently only used to determine 
>> if the time is precise or not. Bit 0 is used to specify that and the 
>> other bits in this byte are marked as reserved. This is shown in 
>> table 247 of the UEFI spec 2.6:
>>
>> Byte 3:
>>   Bit 0 – Timestamp is precise if this bit is set and correlates to 
>> the time of the error event.
>>   Bit 7:1 – Reserved
>
> Is it always the same endianness as that of the CPU ?
It is a fair assumption that the firmware populating this record will 
use a CPU of the same endianness. There is no mechanism in the spec to 
indicate otherwise.

Thanks,
Tyler
>
> Cheers
> Suzuki
>
Suzuki K Poulose Oct. 14, 2016, 4:28 p.m. UTC | #7
On 13/10/16 20:37, Baicar, Tyler wrote:
> Hello Suzuki,
>
> On 10/13/2016 2:50 AM, Suzuki K Poulose wrote:
>> On 12/10/16 23:10, Baicar, Tyler wrote:

>>>> Please could you keep the people who reviewed/commented on your series in the past,
>>>> whenever you post a new version ?
>>> Do you mean to just send the new version to their e-mail directly in addition to the lists? If so, I will do that next time.
>>
>> If you send a new version of a series to the list, it is a good idea to keep
>> the people who commented (significantly) on your previous version in Cc, especially
>> when you have addressed their feedback. That will help them to keep track of the
>> series. People can always see the new version in the list, but then it is so easy
>> to miss something in the 100s of emails you get each day. I am sure people have
>> special filters for the emails based on if they are in Cc/To etc.
>>
> Okay, understood. I'll make sure to add those who have commented in the cc/to list to avoid the e-mail filters.

Thanks !

>> Is it always the same endianness as that of the CPU ?
> It is a fair assumption that the firmware populating this record will use a CPU of the same endianness. There is no mechanism in the spec to indicate otherwise.

Yep, you are right. The EFI expects the EL2/EL1 to be of the same endianness

Cheers
Suzuki

--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Mark Rutland Oct. 14, 2016, 4:39 p.m. UTC | #8
On Fri, Oct 14, 2016 at 05:28:58PM +0100, Suzuki K Poulose wrote:
> On 13/10/16 20:37, Baicar, Tyler wrote:
> >On 10/13/2016 2:50 AM, Suzuki K Poulose wrote:
> >>Is it always the same endianness as that of the CPU ?
> >
> >It is a fair assumption that the firmware populating this record will
> >use a CPU of the same endianness. There is no mechanism in the spec
> >to indicate otherwise.
> 
> Yep, you are right. The EFI expects the EL2/EL1 to be of the same endianness

To be clear, it is specifically required in the ACPI spec that elements
are in little-endian. Per the ACPI 6.1 spec, page 109:

	All numeric values in ACPI-defined tables, blocks, and
	structures are always encoded in little endian
	format.

Given that CPER, HEST, etc are defined within the ACPI specification,
they are covered by this requirement.

Elements outside of the ACPI spec are not necessarily covered by this
requirement, but their specifications should state their endianness.

Thanks,
Mark.
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 3021f0e..c8488f1 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -80,6 +80,10 @@ 
 	((struct acpi_hest_generic_status *)				\
 	 ((struct ghes_estatus_node *)(estatus_node) + 1))
 
+#define acpi_hest_generic_data_version(gdata)			\
+	(gdata->revision >> 8)
+
+
 /*
  * This driver isn't really modular, however for the time being,
  * continuing to use module_param is the easiest way to remain
@@ -412,6 +416,13 @@  static void ghes_clear_estatus(struct ghes *ghes)
 	ghes->flags &= ~GHES_TO_CLEAR;
 }
 
+inline void *acpi_hest_generic_data_payload(struct acpi_hest_generic_data *gdata)
+{
+	return acpi_hest_generic_data_version(gdata) >= 3 ?
+		(void *)(((struct acpi_hest_generic_data_v300 *)(gdata)) + 1) :
+		gdata + 1;
+}
+
 static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int sev)
 {
 #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
@@ -419,7 +430,8 @@  static void ghes_handle_memory_failure(struct acpi_hest_generic_data *gdata, int
 	int flags = -1;
 	int sec_sev = ghes_severity(gdata->error_severity);
 	struct cper_sec_mem_err *mem_err;
-	mem_err = (struct cper_sec_mem_err *)(gdata + 1);
+
+	mem_err = acpi_hest_generic_data_payload(gdata);
 
 	if (!(mem_err->validation_bits & CPER_MEM_VALID_PA))
 		return;
@@ -449,14 +461,18 @@  static void ghes_do_proc(struct ghes *ghes,
 {
 	int sev, sec_sev;
 	struct acpi_hest_generic_data *gdata;
+	uuid_le sec_type;
 
 	sev = ghes_severity(estatus->error_severity);
 	apei_estatus_for_each_section(estatus, gdata) {
 		sec_sev = ghes_severity(gdata->error_severity);
-		if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
+		sec_type = *(uuid_le *)gdata->section_type;
+
+		if (!uuid_le_cmp(sec_type,
 				 CPER_SEC_PLATFORM_MEM)) {
 			struct cper_sec_mem_err *mem_err;
-			mem_err = (struct cper_sec_mem_err *)(gdata+1);
+
+			mem_err = acpi_hest_generic_data_payload(gdata);
 			ghes_edac_report_mem_error(ghes, sev, mem_err);
 
 			arch_apei_report_mem_error(sev, mem_err);
@@ -466,7 +482,8 @@  static void ghes_do_proc(struct ghes *ghes,
 		else if (!uuid_le_cmp(*(uuid_le *)gdata->section_type,
 				      CPER_SEC_PCIE)) {
 			struct cper_sec_pcie *pcie_err;
-			pcie_err = (struct cper_sec_pcie *)(gdata+1);
+
+			pcie_err = acpi_hest_generic_data_payload(gdata);
 			if (sev == GHES_SEV_RECOVERABLE &&
 			    sec_sev == GHES_SEV_RECOVERABLE &&
 			    pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID &&
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index d425374..9fa1317 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -32,9 +32,14 @@ 
 #include <linux/acpi.h>
 #include <linux/pci.h>
 #include <linux/aer.h>
+#include <linux/printk.h>
+#include <linux/bcd.h>
 
 #define INDENT_SP	" "
 
+#define acpi_hest_generic_data_version(gdata)		\
+	(gdata->revision >> 8)
+
 static char rcd_decode_str[CPER_REC_LEN];
 
 /*
@@ -386,13 +391,47 @@  static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
 	pfx, pcie->bridge.secondary_status, pcie->bridge.control);
 }
 
+static inline void *acpi_hest_generic_data_payload(struct acpi_hest_generic_data *gdata)
+{
+	return acpi_hest_generic_data_version(gdata) >= 3 ?
+		(void *)(((struct acpi_hest_generic_data_v300 *)(gdata)) + 1) :
+		gdata + 1;
+}
+
+static void cper_estatus_print_section_v300(const char *pfx,
+	const struct acpi_hest_generic_data_v300 *gdata)
+{
+	__u8 hour, min, sec, day, mon, year, century, *timestamp;
+
+	if (gdata->validation_bits & ACPI_HEST_GEN_VALID_TIMESTAMP) {
+		timestamp = (__u8 *)&(gdata->time_stamp);
+		memcpy(&sec, timestamp, 1);
+		memcpy(&min, timestamp + 1, 1);
+		memcpy(&hour, timestamp + 2, 1);
+		memcpy(&day, timestamp + 4, 1);
+		memcpy(&mon, timestamp + 5, 1);
+		memcpy(&year, timestamp + 6, 1);
+		memcpy(&century, timestamp + 7, 1);
+		printk("%stime: ", pfx);
+		printk("%7s", 0x01 & *(timestamp + 3) ? "precise" : "");
+		printk(" %02d:%02d:%02d %02d%02d-%02d-%02d\n",
+			bcd2bin(hour), bcd2bin(min), bcd2bin(sec),
+			bcd2bin(century), bcd2bin(year), bcd2bin(mon),
+			bcd2bin(day));
+	}
+}
+
 static void cper_estatus_print_section(
-	const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
+	const char *pfx, struct acpi_hest_generic_data *gdata, int sec_no)
 {
 	uuid_le *sec_type = (uuid_le *)gdata->section_type;
 	__u16 severity;
 	char newpfx[64];
 
+	if ((gdata->revision >> 8) >= 0x03)
+		cper_estatus_print_section_v300(pfx,
+			(const struct acpi_hest_generic_data_v300 *)gdata);
+
 	severity = gdata->error_severity;
 	printk("%s""Error %d, type: %s\n", pfx, sec_no,
 	       cper_severity_str(severity));
@@ -403,14 +442,18 @@  static void cper_estatus_print_section(
 
 	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
 	if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
-		struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
+		struct cper_sec_proc_generic *proc_err;
+
+		proc_err = acpi_hest_generic_data_payload(gdata);
 		printk("%s""section_type: general processor error\n", newpfx);
 		if (gdata->error_data_length >= sizeof(*proc_err))
 			cper_print_proc_generic(newpfx, proc_err);
 		else
 			goto err_section_too_small;
 	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
-		struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
+		struct cper_sec_mem_err *mem_err;
+
+		mem_err = acpi_hest_generic_data_payload(gdata);
 		printk("%s""section_type: memory error\n", newpfx);
 		if (gdata->error_data_length >=
 		    sizeof(struct cper_sec_mem_err_old))
@@ -419,7 +462,9 @@  static void cper_estatus_print_section(
 		else
 			goto err_section_too_small;
 	} else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
-		struct cper_sec_pcie *pcie = (void *)(gdata + 1);
+		struct cper_sec_pcie *pcie;
+
+		pcie = acpi_hest_generic_data_payload(gdata);
 		printk("%s""section_type: PCIe error\n", newpfx);
 		if (gdata->error_data_length >= sizeof(*pcie))
 			cper_print_pcie(newpfx, pcie, gdata);
@@ -438,6 +483,7 @@  void cper_estatus_print(const char *pfx,
 			const struct acpi_hest_generic_status *estatus)
 {
 	struct acpi_hest_generic_data *gdata;
+	struct acpi_hest_generic_data_v300 *gdata_v3 = NULL;
 	unsigned int data_len, gedata_len;
 	int sec_no = 0;
 	char newpfx[64];
@@ -451,12 +497,22 @@  void cper_estatus_print(const char *pfx,
 	printk("%s""event severity: %s\n", pfx, cper_severity_str(severity));
 	data_len = estatus->data_length;
 	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
+	if ((gdata->revision >> 8) >= 0x03)
+		gdata_v3 = (struct acpi_hest_generic_data_v300 *)gdata;
+
 	snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP);
+
 	while (data_len >= sizeof(*gdata)) {
 		gedata_len = gdata->error_data_length;
 		cper_estatus_print_section(newpfx, gdata, sec_no);
-		data_len -= gedata_len + sizeof(*gdata);
-		gdata = (void *)(gdata + 1) + gedata_len;
+		if(gdata_v3) {
+			data_len -= gedata_len + sizeof(*gdata_v3);
+			gdata_v3 = (void *)(gdata_v3 + 1) + gedata_len;
+			gdata = (struct acpi_hest_generic_data *)gdata_v3;
+		} else {
+			data_len -= gedata_len + sizeof(*gdata);
+			gdata = (void *)(gdata + 1) + gedata_len;
+		}
 		sec_no++;
 	}
 }
@@ -478,6 +534,7 @@  EXPORT_SYMBOL_GPL(cper_estatus_check_header);
 int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
 {
 	struct acpi_hest_generic_data *gdata;
+	struct acpi_hest_generic_data_v300 *gdata_v3 = NULL;
 	unsigned int data_len, gedata_len;
 	int rc;
 
@@ -486,15 +543,29 @@  int cper_estatus_check(const struct acpi_hest_generic_status *estatus)
 		return rc;
 	data_len = estatus->data_length;
 	gdata = (struct acpi_hest_generic_data *)(estatus + 1);
-	while (data_len >= sizeof(*gdata)) {
-		gedata_len = gdata->error_data_length;
-		if (gedata_len > data_len - sizeof(*gdata))
+
+	if ((gdata->revision >> 8) >= 0x03) {
+		gdata_v3 = (struct acpi_hest_generic_data_v300 *)gdata;
+		while (data_len >= sizeof(*gdata_v3)) {
+			gedata_len = gdata_v3->error_data_length;
+			if (gedata_len > data_len - sizeof(*gdata_v3))
+				return -EINVAL;
+			data_len -= gedata_len + sizeof(*gdata_v3);
+			gdata_v3 = (void *)(gdata_v3 + 1) + gedata_len;
+		}
+		if (data_len)
+			return -EINVAL;
+	} else {
+		while (data_len >= sizeof(*gdata)) {
+			gedata_len = gdata->error_data_length;
+			if (gedata_len > data_len - sizeof(*gdata))
+				return -EINVAL;
+			data_len -= gedata_len + sizeof(*gdata);
+			gdata = (void *)(gdata + 1) + gedata_len;
+		}
+		if (data_len)
 			return -EINVAL;
-		data_len -= gedata_len + sizeof(*gdata);
-		gdata = (void *)(gdata + 1) + gedata_len;
 	}
-	if (data_len)
-		return -EINVAL;
 
 	return 0;
 }