mbox series

[v4,0/3] AMD Address Translation Library

Message ID 20231218190406.27479-1-yazen.ghannam@amd.com (mailing list archive)
Headers show
Series AMD Address Translation Library | expand

Message

Yazen Ghannam Dec. 18, 2023, 7:04 p.m. UTC
Hi all,

This revision addresses comments from Boris for v3. The most substantial
change is the removal of the library "stub".

Thanks,
Yazen

Yazen Ghannam (3):
  RAS: Introduce AMD Address Translation Library
  EDAC/amd64: Use new AMD Address Translation Library
  Documentation: RAS: Add index and address translation section

 Documentation/RAS/address-translation.rst     |  24 +
 .../RAS/{ras.rst => error-decoding.rst}       |  11 +-
 Documentation/RAS/index.rst                   |  14 +
 Documentation/index.rst                       |   2 +-
 MAINTAINERS                                   |   7 +
 drivers/edac/Kconfig                          |   1 +
 drivers/edac/amd64_edac.c                     | 282 +-------
 drivers/ras/Kconfig                           |   1 +
 drivers/ras/Makefile                          |   2 +
 drivers/ras/amd/atl/Kconfig                   |  20 +
 drivers/ras/amd/atl/Makefile                  |  18 +
 drivers/ras/amd/atl/access.c                  | 106 +++
 drivers/ras/amd/atl/core.c                    | 225 ++++++
 drivers/ras/amd/atl/dehash.c                  | 416 +++++++++++
 drivers/ras/amd/atl/denormalize.c             | 616 ++++++++++++++++
 drivers/ras/amd/atl/internal.h                | 297 ++++++++
 drivers/ras/amd/atl/map.c                     | 667 ++++++++++++++++++
 drivers/ras/amd/atl/reg_fields.h              | 603 ++++++++++++++++
 drivers/ras/amd/atl/system.c                  | 283 ++++++++
 drivers/ras/amd/atl/umc.c                     |  41 ++
 drivers/ras/ras.c                             |  31 +
 include/linux/ras.h                           |  11 +
 22 files changed, 3392 insertions(+), 286 deletions(-)
 create mode 100644 Documentation/RAS/address-translation.rst
 rename Documentation/RAS/{ras.rst => error-decoding.rst} (73%)
 create mode 100644 Documentation/RAS/index.rst
 create mode 100644 drivers/ras/amd/atl/Kconfig
 create mode 100644 drivers/ras/amd/atl/Makefile
 create mode 100644 drivers/ras/amd/atl/access.c
 create mode 100644 drivers/ras/amd/atl/core.c
 create mode 100644 drivers/ras/amd/atl/dehash.c
 create mode 100644 drivers/ras/amd/atl/denormalize.c
 create mode 100644 drivers/ras/amd/atl/internal.h
 create mode 100644 drivers/ras/amd/atl/map.c
 create mode 100644 drivers/ras/amd/atl/reg_fields.h
 create mode 100644 drivers/ras/amd/atl/system.c
 create mode 100644 drivers/ras/amd/atl/umc.c


base-commit: ba7d5744cf6fac619fd0bf1165c90ee930956ebc

Comments

Borislav Petkov Jan. 3, 2024, 10:59 a.m. UTC | #1
On Mon, Dec 18, 2023 at 01:04:03PM -0600, Yazen Ghannam wrote:
> Hi all,
> 
> This revision addresses comments from Boris for v3. The most substantial
> change is the removal of the library "stub".
> 
> Thanks,
> Yazen
> 
> Yazen Ghannam (3):
>   RAS: Introduce AMD Address Translation Library
>   EDAC/amd64: Use new AMD Address Translation Library
>   Documentation: RAS: Add index and address translation section

Ok, a combo diff of my fixes ontop, below. Lemme queue it - further
fixes can go ontop from now on.

Thx.

---
diff --git a/drivers/ras/amd/atl/access.c b/drivers/ras/amd/atl/access.c
index 1de0460f5e03..f6dd87bb2c35 100644
--- a/drivers/ras/amd/atl/access.c
+++ b/drivers/ras/amd/atl/access.c
@@ -18,12 +18,12 @@ static DEFINE_MUTEX(df_indirect_mutex);
 /*
  * Data Fabric Indirect Access uses FICAA/FICAD.
  *
- * Fabric Indirect Configuration Access Address (FICAA): Constructed based
+ * Fabric Indirect Configuration Access Address (FICAA): constructed based
  * on the device's Instance Id and the PCI function and register offset of
  * the desired register.
  *
- * Fabric Indirect Configuration Access Data (FICAD): There are FICAD LO
- * and FICAD HI registers but so far we only need the LO register.
+ * Fabric Indirect Configuration Access Data (FICAD): there are FICAD
+ * low and high registers but so far only the low register is needed.
  *
  * Use Instance Id 0xFF to indicate a broadcast read.
  */
diff --git a/drivers/ras/amd/atl/core.c b/drivers/ras/amd/atl/core.c
index 9cc31c052427..6dc4e06305f7 100644
--- a/drivers/ras/amd/atl/core.c
+++ b/drivers/ras/amd/atl/core.c
@@ -31,7 +31,7 @@ static int addr_over_limit(struct addr_ctx *ctx)
 
 	/* Is calculated system address above DRAM limit address? */
 	if (ctx->ret_addr > dram_limit_addr) {
-		atl_debug("Calculated address (0x%016llx) > DRAM limit (0x%016llx)",
+		atl_debug(ctx, "Calculated address (0x%016llx) > DRAM limit (0x%016llx)",
 			  ctx->ret_addr, dram_limit_addr);
 		return -EINVAL;
 	}
@@ -179,7 +179,7 @@ static void check_for_legacy_df_access(void)
  * are technically independent things.
  *
  * It's possible to match on the PCI IDs of the Data Fabric devices, but this will be
- * an every expanding list. Instead match on the SMCA and Zen features to cover all
+ * an ever expanding list. Instead, match on the SMCA and Zen features to cover all
  * relevant systems.
  */
 static const struct x86_cpu_id amd_atl_cpuids[] = {
diff --git a/drivers/ras/amd/atl/dehash.c b/drivers/ras/amd/atl/dehash.c
index 51721094dd06..6f414926e6fe 100644
--- a/drivers/ras/amd/atl/dehash.c
+++ b/drivers/ras/amd/atl/dehash.c
@@ -12,7 +12,14 @@
 
 #include "internal.h"
 
-static inline bool valid_map_bits(struct addr_ctx *ctx, u8 bit1, u8 bit2,
+/*
+ * Verify the interleave bits are correct in the different interleaving
+ * settings.
+ *
+ * If @num_intlv_dies and/or @num_intlv_sockets are 1, it means the
+ * respective interleaving is disabled.
+ */
+static inline bool map_bits_valid(struct addr_ctx *ctx, u8 bit1, u8 bit2,
 				  u8 num_intlv_dies, u8 num_intlv_sockets)
 {
 	if (!(ctx->map.intlv_bit_pos == bit1 || ctx->map.intlv_bit_pos == bit2)) {
@@ -37,11 +44,7 @@ static int df2_dehash_addr(struct addr_ctx *ctx)
 {
 	u8 hashed_bit, intlv_bit, intlv_bit_pos;
 
-	/*
-	 * Assert that interleave bit is 8 or 9 and that die and socket
-	 * interleaving are disabled.
-	 */
-	if (!valid_map_bits(ctx, 8, 9, 1, 1))
+	if (!map_bits_valid(ctx, 8, 9, 1, 1))
 		return -EINVAL;
 
 	intlv_bit_pos = ctx->map.intlv_bit_pos;
@@ -64,11 +67,7 @@ static int df3_dehash_addr(struct addr_ctx *ctx)
 	bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
 	u8 hashed_bit, intlv_bit, intlv_bit_pos;
 
-	/*
-	 * Assert that interleave bit is 8 or 9 and that die and socket
-	 * interleaving are disabled.
-	 */
-	if (!valid_map_bits(ctx, 8, 9, 1, 1))
+	if (!map_bits_valid(ctx, 8, 9, 1, 1))
 		return -EINVAL;
 
 	hash_ctl_64k = FIELD_GET(DF3_HASH_CTL_64K, ctx->map.ctl);
@@ -172,11 +171,7 @@ static int df4_dehash_addr(struct addr_ctx *ctx)
 	bool hash_ctl_64k, hash_ctl_2M, hash_ctl_1G;
 	u8 hashed_bit, intlv_bit;
 
-	/*
-	 * Assert that interleave bit is 8, die interleaving is disabled,
-	 * and no more than 2 sockets are interleaved.
-	 */
-	if (!valid_map_bits(ctx, 8, 8, 1, 2))
+	if (!map_bits_valid(ctx, 8, 8, 1, 2))
 		return -EINVAL;
 
 	hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
@@ -252,11 +247,7 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx)
 	u8 hashed_bit, intlv_bit;
 	u64 rehash_vector;
 
-	/*
-	 * Assert that interleave bit is 8, die interleaving is disabled,
-	 * and no more than 2 sockets are interleaved.
-	 */
-	if (!valid_map_bits(ctx, 8, 8, 1, 2))
+	if (!map_bits_valid(ctx, 8, 8, 1, 2))
 		return -EINVAL;
 
 	hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
diff --git a/drivers/ras/amd/atl/denormalize.c b/drivers/ras/amd/atl/denormalize.c
index fb182dd7cca6..01f1d0fb6799 100644
--- a/drivers/ras/amd/atl/denormalize.c
+++ b/drivers/ras/amd/atl/denormalize.c
@@ -339,7 +339,8 @@ static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx)
 	}
 
 	if (log_fabric_id == MAX_COH_ST_CHANNELS)
-		atl_debug("COH_ST remap entry not found for 0x%x", log_fabric_id);
+		atl_debug(ctx, "COH_ST remap entry not found for 0x%x",
+			  log_fabric_id);
 
 	/* Get the Node ID bits from the physical and apply to the logical. */
 	return (phys_fabric_id & df_cfg.node_id_mask) | log_fabric_id;
diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h
index a1996811aa34..f17c5f5c9950 100644
--- a/drivers/ras/amd/atl/internal.h
+++ b/drivers/ras/amd/atl/internal.h
@@ -279,10 +279,10 @@ static inline u64 remove_bits(u8 low_bit, u8 high_bit, u64 data)
 	return temp1 | temp2;
 }
 
-#define atl_debug(fmt, arg...) \
+#define atl_debug(ctx, fmt, arg...) \
 	pr_debug("socket_id=%u die_id=%u coh_st_inst_id=%u norm_addr=0x%016llx: " fmt,\
-		 ctx->inputs.socket_id, ctx->inputs.die_id,\
-		 ctx->inputs.coh_st_inst_id, ctx->inputs.norm_addr, ##arg)
+		 (ctx)->inputs.socket_id, (ctx)->inputs.die_id,\
+		 (ctx)->inputs.coh_st_inst_id, (ctx)->inputs.norm_addr, ##arg)
 
 static inline void atl_debug_on_bad_df_rev(void)
 {
@@ -291,7 +291,7 @@ static inline void atl_debug_on_bad_df_rev(void)
 
 static inline void atl_debug_on_bad_intlv_mode(struct addr_ctx *ctx)
 {
-	atl_debug("Unrecognized interleave mode: %u", ctx->map.intlv_mode);
+	atl_debug(ctx, "Unrecognized interleave mode: %u", ctx->map.intlv_mode);
 }
 
 #endif /* __AMD_ATL_INTERNAL_H__ */
diff --git a/drivers/ras/amd/atl/map.c b/drivers/ras/amd/atl/map.c
index 8145b7bb2b40..64e8b1eda1ae 100644
--- a/drivers/ras/amd/atl/map.c
+++ b/drivers/ras/amd/atl/map.c
@@ -140,7 +140,7 @@ static int get_dram_offset(struct addr_ctx *ctx, u64 *norm_offset)
 
 	/* Should not be called for map 0. */
 	if (!ctx->map.num) {
-		atl_debug("Trying to find DRAM offset for map 0");
+		atl_debug(ctx, "Trying to find DRAM offset for map 0");
 		return -EINVAL;
 	}
 
@@ -388,7 +388,6 @@ static int find_normalized_offset(struct addr_ctx *ctx, u64 *norm_offset)
 
 	for (ctx->map.num = 1; ctx->map.num < df_cfg.num_coh_st_maps; ctx->map.num++) {
 		ret = get_dram_offset(ctx, norm_offset);
-
 		if (ret < 0)
 			return ret;
 
@@ -398,13 +397,13 @@ static int find_normalized_offset(struct addr_ctx *ctx, u64 *norm_offset)
 
 		/* Enabled offsets should never be 0. */
 		if (*norm_offset == 0) {
-			atl_debug("Enabled map %u offset is 0", ctx->map.num);
+			atl_debug(ctx, "Enabled map %u offset is 0", ctx->map.num);
 			return -EINVAL;
 		}
 
 		/* Offsets should always increase from one map to the next. */
 		if (*norm_offset <= last_offset) {
-			atl_debug("Map %u offset (0x%016llx) <= previous (0x%016llx)",
+			atl_debug(ctx, "Map %u offset (0x%016llx) <= previous (0x%016llx)",
 				  ctx->map.num, *norm_offset, last_offset);
 			return -EINVAL;
 		}
@@ -650,18 +649,17 @@ static void dump_address_map(struct dram_addr_map *map)
 
 int get_address_map(struct addr_ctx *ctx)
 {
-	int ret = 0;
+	int ret;
 
 	ret = get_address_map_common(ctx);
 	if (ret)
-		goto out;
+		return ret;
 
 	ret = get_global_map_data(ctx);
 	if (ret)
-		goto out;
+		return ret;
 
 	dump_address_map(&ctx->map);
 
-out:
 	return ret;
 }
diff --git a/drivers/ras/amd/atl/system.c b/drivers/ras/amd/atl/system.c
index 37ad203bb93e..af61f2f1d6de 100644
--- a/drivers/ras/amd/atl/system.c
+++ b/drivers/ras/amd/atl/system.c
@@ -17,7 +17,7 @@ int determine_node_id(struct addr_ctx *ctx, u8 socket_id, u8 die_id)
 	u16 socket_id_bits, die_id_bits;
 
 	if (socket_id > 0 && df_cfg.socket_id_mask == 0) {
-		atl_debug("Invalid socket inputs: socket_id=%u socket_id_mask=0x%x",
+		atl_debug(ctx, "Invalid socket inputs: socket_id=%u socket_id_mask=0x%x",
 			  socket_id, df_cfg.socket_id_mask);
 		return -EINVAL;
 	}
@@ -28,7 +28,7 @@ int determine_node_id(struct addr_ctx *ctx, u8 socket_id, u8 die_id)
 	socket_id_bits &=	df_cfg.socket_id_mask;
 
 	if (die_id > 0 && df_cfg.die_id_mask == 0) {
-		atl_debug("Invalid die inputs: die_id=%u die_id_mask=0x%x",
+		atl_debug(ctx, "Invalid die inputs: die_id=%u die_id_mask=0x%x",
 			  die_id, df_cfg.die_id_mask);
 		return -EINVAL;
 	}
@@ -225,8 +225,6 @@ static void get_num_maps(void)
 		df_cfg.num_coh_st_maps	= 2;
 		break;
 	case DF4:
-		df_cfg.num_coh_st_maps	= 4;
-		break;
 	case DF4p5:
 		df_cfg.num_coh_st_maps	= 4;
 		break;