diff mbox series

[V3] ACPI: APEI: Use ERST timeout for slow devices

Message ID 20231027223309.22883-1-jeshuas@nvidia.com (mailing list archive)
State Superseded, archived
Headers show
Series [V3] ACPI: APEI: Use ERST timeout for slow devices | expand

Commit Message

Jeshua Smith Oct. 27, 2023, 10:33 p.m. UTC
Slow devices such as flash may not meet the default 1ms timeout value,
so use the ERST max execution time value as the timeout if it is larger
and if the ERST has the "slow" attribute set.

Example:
A NOR flash spec lists "Page program time (256 bytes)" as 120us typical,
and 1800us max. A 32KB error log would be (32K/256) = 128 nor-flash
pages. Writing those 128 nor-flash pages would then take
120us * 128 = 15ms typical, or 1800us * 128 = 230.4ms max.

Without this change, when pstore calls APEI's ERST code to write a 32KB
error log to the NOR flash in the example, the code will flag a timeout
after 1ms and return an error to pstore. With this change, unless the
max time value from the ERST table is exceeded, the code will not flag a
timeout and will return success to pstore after the operation completes.

Signed-off-by: Jeshua Smith <jeshuas@nvidia.com>
---
v3:
* Extended commit message based on feedback from Borislav Petkov
* Use bitfield.h based on feedback from Tony Luck

v2:
* no longer add copyright.
* no longer add unused ERST_EXEC_TIMING_TYPICAL defines.
* set timings to 0 if the ACPI_ERST_EXECUTE_TIMINGS operation isn't supported,
  which will result in the default timeout being used.

 drivers/acpi/apei/erst.c | 41 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

Comments

Tony Luck Oct. 27, 2023, 10:40 p.m. UTC | #1
> Slow devices such as flash may not meet the default 1ms timeout value,
> so use the ERST max execution time value as the timeout if it is larger
> and if the ERST has the "slow" attribute set.
>
> Example:
> A NOR flash spec lists "Page program time (256 bytes)" as 120us typical,
> and 1800us max. A 32KB error log would be (32K/256) = 128 nor-flash
> pages. Writing those 128 nor-flash pages would then take
> 120us * 128 = 15ms typical, or 1800us * 128 = 230.4ms max.
>
> Without this change, when pstore calls APEI's ERST code to write a 32KB
> error log to the NOR flash in the example, the code will flag a timeout
> after 1ms and return an error to pstore. With this change, unless the
> max time value from the ERST table is exceeded, the code will not flag a
> timeout and will return success to pstore after the operation completes.
>
> Signed-off-by: Jeshua Smith <jeshuas@nvidia.com>

Reviewed-by: Tony Luck <tony.luck@intel.com>
Borislav Petkov Oct. 28, 2023, 11:06 a.m. UTC | #2
On Fri, Oct 27, 2023 at 10:33:09PM +0000, Jeshua Smith wrote:
> Slow devices such as flash may not meet the default 1ms timeout value,
> so use the ERST max execution time value as the timeout if it is larger
> and if the ERST has the "slow" attribute set.
> 
> Example:
> A NOR flash spec lists "Page program time (256 bytes)" as 120us typical,
> and 1800us max. A 32KB error log would be (32K/256) = 128 nor-flash
> pages. Writing those 128 nor-flash pages would then take
> 120us * 128 = 15ms typical, or 1800us * 128 = 230.4ms max.
> 
> Without this change, when pstore calls APEI's ERST code to write a 32KB
> error log to the NOR flash in the example, the code will flag a timeout
> after 1ms and return an error to pstore. With this change, unless the
> max time value from the ERST table is exceeded, the code will not flag a
> timeout and will return success to pstore after the operation completes.
> 
> Signed-off-by: Jeshua Smith <jeshuas@nvidia.com>
> ---

Reviewed-by: Borislav Petkov (AMD) <bp@alien8.de>
diff mbox series

Patch

diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c
index 247989060e29..90dc0fc68dad 100644
--- a/drivers/acpi/apei/erst.c
+++ b/drivers/acpi/apei/erst.c
@@ -25,6 +25,7 @@ 
 #include <linux/pstore.h>
 #include <linux/vmalloc.h>
 #include <linux/mm.h> /* kvfree() */
+#include <linux/bitfield.h>
 #include <acpi/apei.h>
 
 #include "apei-internal.h"
@@ -59,6 +60,9 @@  static struct acpi_table_erst *erst_tab;
 #define ERST_RANGE_NVRAM	0x0002
 #define ERST_RANGE_SLOW		0x0004
 
+/* ERST Exec max timings */
+#define ERST_EXEC_TIMING_MAX    GENMASK_ULL(63, 32)
+
 /*
  * ERST Error Log Address Range, used as buffer for reading/writing
  * error records.
@@ -68,6 +72,7 @@  static struct erst_erange {
 	u64 size;
 	void __iomem *vaddr;
 	u32 attr;
+	u64 timings;
 } erst_erange;
 
 /*
@@ -97,6 +102,19 @@  static inline int erst_errno(int command_status)
 	}
 }
 
+static inline u64 erst_get_timeout(void)
+{
+	u64 timeout = FIRMWARE_TIMEOUT;
+
+	if (erst_erange.attr & ERST_RANGE_SLOW) {
+		timeout = FIELD_GET(ERST_EXEC_TIMING_MAX, erst_erange.timings) *
+                          NSEC_PER_MSEC;
+		if (timeout < FIRMWARE_TIMEOUT)
+			timeout = FIRMWARE_TIMEOUT;
+	}
+	return timeout;
+}
+
 static int erst_timedout(u64 *t, u64 spin_unit)
 {
 	if ((s64)*t < spin_unit) {
@@ -191,9 +209,11 @@  static int erst_exec_stall_while_true(struct apei_exec_context *ctx,
 {
 	int rc;
 	u64 val;
-	u64 timeout = FIRMWARE_TIMEOUT;
+	u64 timeout;
 	u64 stall_time;
 
+	timeout = erst_get_timeout();
+
 	if (ctx->var1 > FIRMWARE_MAX_STALL) {
 		if (!in_nmi())
 			pr_warn(FW_WARN
@@ -389,6 +409,13 @@  static int erst_get_erange(struct erst_erange *range)
 	if (rc)
 		return rc;
 	range->attr = apei_exec_ctx_get_output(&ctx);
+	rc = apei_exec_run(&ctx, ACPI_ERST_EXECUTE_TIMINGS);
+	if (rc == 0)
+		range->timings = apei_exec_ctx_get_output(&ctx);
+	else if (rc == -ENOENT)
+		range->timings = 0;
+	else
+		return rc;
 
 	return 0;
 }
@@ -621,10 +648,12 @@  EXPORT_SYMBOL_GPL(erst_get_record_id_end);
 static int __erst_write_to_storage(u64 offset)
 {
 	struct apei_exec_context ctx;
-	u64 timeout = FIRMWARE_TIMEOUT;
+	u64 timeout;
 	u64 val;
 	int rc;
 
+	timeout = erst_get_timeout();
+
 	erst_exec_ctx_init(&ctx);
 	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_WRITE);
 	if (rc)
@@ -660,10 +689,12 @@  static int __erst_write_to_storage(u64 offset)
 static int __erst_read_from_storage(u64 record_id, u64 offset)
 {
 	struct apei_exec_context ctx;
-	u64 timeout = FIRMWARE_TIMEOUT;
+	u64 timeout;
 	u64 val;
 	int rc;
 
+	timeout = erst_get_timeout();
+
 	erst_exec_ctx_init(&ctx);
 	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_READ);
 	if (rc)
@@ -703,10 +734,12 @@  static int __erst_read_from_storage(u64 record_id, u64 offset)
 static int __erst_clear_from_storage(u64 record_id)
 {
 	struct apei_exec_context ctx;
-	u64 timeout = FIRMWARE_TIMEOUT;
+	u64 timeout;
 	u64 val;
 	int rc;
 
+	timeout = erst_get_timeout();
+
 	erst_exec_ctx_init(&ctx);
 	rc = apei_exec_run_optional(&ctx, ACPI_ERST_BEGIN_CLEAR);
 	if (rc)