@@ -36,6 +36,30 @@ static DEFINE_MUTEX(df_indirect_mutex);
#define DF_FICAA_REG_NUM_LEGACY GENMASK(10, 2)
+static u16 get_accessible_node(u16 node)
+{
+ /*
+ * On heterogeneous systems, not all AMD Nodes are accessible through
+ * software-visible registers. The Node ID needs to be adjusted for
+ * register accesses. But its value should not be changed for the
+ * translation methods.
+ */
+ if (df_cfg.flags.heterogeneous) {
+ /* Only Node 0 is accessible on DF3.5 systems. */
+ if (df_cfg.rev == DF3p5)
+ node = 0;
+ /*
+ * Only the first Node in each Socket is accessible on DF4.5 systems, and
+ * this is visible to software as one Fabric per Socket.
+ * The Socket ID can be derived from the Node ID and global shift values.
+ */
+ if (df_cfg.rev == DF4p5)
+ node >>= df_cfg.socket_id_shift - df_cfg.node_id_shift;
+ }
+
+ return node;
+}
+
static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *lo)
{
u32 ficaa_addr = 0x8C, ficad_addr = 0xB8;
@@ -43,6 +67,8 @@ static int __df_indirect_read(u16 node, u8 func, u16 reg, u8 instance_id, u32 *l
int err = -ENODEV;
u32 ficaa = 0;
+ node = get_accessible_node(node);
+
if (node >= amd_nb_num())
goto out;
@@ -253,7 +253,7 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx)
hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
- hash_ctl_1T = FIELD_GET(DF4_HASH_CTL_1T, ctx->map.ctl);
+ hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
/*
* Generate a unique address to determine which bits
@@ -343,6 +343,94 @@ static int df4p5_dehash_addr(struct addr_ctx *ctx)
return 0;
}
+/*
+ * MI300 hash bits
+ * 4K 64K 2M 1G 1T 1T
+ * COH_ST_Select[0] = XOR of addr{8, 12, 15, 22, 29, 36, 43}
+ * COH_ST_Select[1] = XOR of addr{9, 13, 16, 23, 30, 37, 44}
+ * COH_ST_Select[2] = XOR of addr{10, 14, 17, 24, 31, 38, 45}
+ * COH_ST_Select[3] = XOR of addr{11, 18, 25, 32, 39, 46}
+ * COH_ST_Select[4] = XOR of addr{14, 19, 26, 33, 40, 47} aka Stack
+ * DieID[0] = XOR of addr{12, 20, 27, 34, 41 }
+ * DieID[1] = XOR of addr{13, 21, 28, 35, 42 }
+ */
+static int mi300_dehash_addr(struct addr_ctx *ctx)
+{
+ bool hash_ctl_4k, hash_ctl_64k, hash_ctl_2M, hash_ctl_1G, hash_ctl_1T;
+ bool hashed_bit, intlv_bit, test_bit;
+ u8 num_intlv_bits, base_bit, i;
+
+ if (!map_bits_valid(ctx, 8, 8, 4, 1))
+ return -EINVAL;
+
+ hash_ctl_4k = FIELD_GET(DF4p5_HASH_CTL_4K, ctx->map.ctl);
+ hash_ctl_64k = FIELD_GET(DF4_HASH_CTL_64K, ctx->map.ctl);
+ hash_ctl_2M = FIELD_GET(DF4_HASH_CTL_2M, ctx->map.ctl);
+ hash_ctl_1G = FIELD_GET(DF4_HASH_CTL_1G, ctx->map.ctl);
+ hash_ctl_1T = FIELD_GET(DF4p5_HASH_CTL_1T, ctx->map.ctl);
+
+ /* Channel bits */
+ num_intlv_bits = ilog2(ctx->map.num_intlv_chan);
+
+ for (i = 0; i < num_intlv_bits; i++) {
+ base_bit = 8 + i;
+
+ /* COH_ST_Select[4] jumps to a base bit of 14. */
+ if (i == 4)
+ base_bit = 14;
+
+ intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr;
+
+ hashed_bit = intlv_bit;
+
+ /* 4k hash bit only applies to the first 3 bits. */
+ if (i <= 2) {
+ test_bit = BIT_ULL(12 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_4k;
+ }
+
+ /* Use temporary 'test_bit' value to avoid Sparse warnings. */
+ test_bit = BIT_ULL(15 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_64k;
+ test_bit = BIT_ULL(22 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_2M;
+ test_bit = BIT_ULL(29 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_1G;
+ test_bit = BIT_ULL(36 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_1T;
+ test_bit = BIT_ULL(43 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_1T;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(base_bit);
+ }
+
+ /* Die bits */
+ num_intlv_bits = ilog2(ctx->map.num_intlv_dies);
+
+ for (i = 0; i < num_intlv_bits; i++) {
+ base_bit = 12 + i;
+
+ intlv_bit = BIT_ULL(base_bit) & ctx->ret_addr;
+
+ hashed_bit = intlv_bit;
+
+ test_bit = BIT_ULL(20 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_64k;
+ test_bit = BIT_ULL(27 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_2M;
+ test_bit = BIT_ULL(34 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_1G;
+ test_bit = BIT_ULL(41 + i) & ctx->ret_addr;
+ hashed_bit ^= test_bit & hash_ctl_1T;
+
+ if (hashed_bit != intlv_bit)
+ ctx->ret_addr ^= BIT_ULL(base_bit);
+ }
+
+ return 0;
+}
+
int dehash_address(struct addr_ctx *ctx)
{
switch (ctx->map.intlv_mode) {
@@ -400,6 +488,11 @@ int dehash_address(struct addr_ctx *ctx)
case DF4p5_NPS1_16CHAN_2K_HASH:
return df4p5_dehash_addr(ctx);
+ case MI3_HASH_8CHAN:
+ case MI3_HASH_16CHAN:
+ case MI3_HASH_32CHAN:
+ return mi300_dehash_addr(ctx);
+
default:
atl_debug_on_bad_intlv_mode(ctx);
return -EINVAL;
@@ -80,6 +80,40 @@ static u64 make_space_for_coh_st_id_split_2_1(struct addr_ctx *ctx)
return expand_bits(12, ctx->map.total_intlv_bits - 1, denorm_addr);
}
+/*
+ * Make space for CS ID at bits [14:8] as follows:
+ *
+ * 8 channels -> bits [10:8]
+ * 16 channels -> bits [11:8]
+ * 32 channels -> bits [14,11:8]
+ *
+ * 1 die -> N/A
+ * 2 dies -> bit [12]
+ * 4 dies -> bits [13:12]
+ */
+static u64 make_space_for_coh_st_id_mi300(struct addr_ctx *ctx)
+{
+ u8 num_intlv_bits = ilog2(ctx->map.num_intlv_chan);
+ u64 denorm_addr;
+
+ if (ctx->map.intlv_bit_pos != 8) {
+ pr_debug("Invalid interleave bit: %u", ctx->map.intlv_bit_pos);
+ return ~0ULL;
+ }
+
+ /* Channel bits. Covers up to 4 bits at [11:8]. */
+ denorm_addr = expand_bits(8, min(num_intlv_bits, 4), ctx->ret_addr);
+
+ /* Die bits. Always starts at [12]. */
+ denorm_addr = expand_bits(12, ilog2(ctx->map.num_intlv_dies), denorm_addr);
+
+ /* Additional channel bit at [14]. */
+ if (num_intlv_bits > 4)
+ denorm_addr = expand_bits(14, 1, denorm_addr);
+
+ return denorm_addr;
+}
+
/*
* Take the current calculated address and shift enough bits in the middle
* to make a gap where the interleave bits will be inserted.
@@ -107,6 +141,12 @@ static u64 make_space_for_coh_st_id(struct addr_ctx *ctx)
case DF4p5_NPS1_8CHAN_2K_HASH:
case DF4p5_NPS1_16CHAN_2K_HASH:
return make_space_for_coh_st_id_split_2_1(ctx);
+
+ case MI3_HASH_8CHAN:
+ case MI3_HASH_16CHAN:
+ case MI3_HASH_32CHAN:
+ return make_space_for_coh_st_id_mi300(ctx);
+
default:
atl_debug_on_bad_intlv_mode(ctx);
return ~0ULL;
@@ -204,6 +244,32 @@ static u16 get_coh_st_id_df4(struct addr_ctx *ctx)
return coh_st_id;
}
+/*
+ * MI300 hash has:
+ * (C)hannel[3:0] = coh_st_id[3:0]
+ * (S)tack[0] = coh_st_id[4]
+ * (D)ie[1:0] = coh_st_id[6:5]
+ *
+ * Hashed coh_st_id is swizzled so that Stack bit is at the end.
+ * coh_st_id = SDDCCCC
+ */
+static u16 get_coh_st_id_mi300(struct addr_ctx *ctx)
+{
+ u8 channel_bits, die_bits, stack_bit;
+ u16 die_id;
+
+ /* Subtract the "base" Destination Fabric ID. */
+ ctx->coh_st_fabric_id -= get_dst_fabric_id(ctx);
+
+ die_id = (ctx->coh_st_fabric_id & df_cfg.die_id_mask) >> df_cfg.die_id_shift;
+
+ channel_bits = FIELD_GET(GENMASK(3, 0), ctx->coh_st_fabric_id);
+ stack_bit = FIELD_GET(BIT(4), ctx->coh_st_fabric_id) << 6;
+ die_bits = die_id << 4;
+
+ return stack_bit | die_bits | channel_bits;
+}
+
/*
* Derive the correct Coherent Station ID that represents the interleave bits
* used within the system physical address. This accounts for the
@@ -237,6 +303,11 @@ static u16 calculate_coh_st_id(struct addr_ctx *ctx)
case DF4p5_NPS1_16CHAN_2K_HASH:
return get_coh_st_id_df4(ctx);
+ case MI3_HASH_8CHAN:
+ case MI3_HASH_16CHAN:
+ case MI3_HASH_32CHAN:
+ return get_coh_st_id_mi300(ctx);
+
/* COH_ST ID is simply the COH_ST Fabric ID adjusted by the Destination Fabric ID. */
case DF4p5_NPS2_4CHAN_1K_HASH:
case DF4p5_NPS1_8CHAN_1K_HASH:
@@ -287,6 +358,9 @@ static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id
case NOHASH_8CHAN:
case NOHASH_16CHAN:
case NOHASH_32CHAN:
+ case MI3_HASH_8CHAN:
+ case MI3_HASH_16CHAN:
+ case MI3_HASH_32CHAN:
case DF2_2CHAN_HASH:
return insert_coh_st_id_at_intlv_bit(ctx, denorm_addr, coh_st_id);
@@ -314,6 +388,31 @@ static u64 insert_coh_st_id(struct addr_ctx *ctx, u64 denorm_addr, u16 coh_st_id
}
}
+/*
+ * MI300 systems have a fixed, hardware-defined physical to logical
+ * Coherent Station mapping. The Remap registers are not used.
+ */
+static const u16 phy_to_log_coh_st_map_mi300[] = {
+ 12, 13, 14, 15,
+ 8, 9, 10, 11,
+ 4, 5, 6, 7,
+ 0, 1, 2, 3,
+ 28, 29, 30, 31,
+ 24, 25, 26, 27,
+ 20, 21, 22, 23,
+ 16, 17, 18, 19,
+};
+
+static u16 get_logical_coh_st_fabric_id_mi300(struct addr_ctx *ctx)
+{
+ if (ctx->inst_id >= sizeof(phy_to_log_coh_st_map_mi300)) {
+ atl_debug(ctx, "Instance ID out of range");
+ return ~0;
+ }
+
+ return phy_to_log_coh_st_map_mi300[ctx->inst_id] | (ctx->node_id << df_cfg.node_id_shift);
+}
+
static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx)
{
u16 component_id, log_fabric_id;
@@ -321,6 +420,9 @@ static u16 get_logical_coh_st_fabric_id(struct addr_ctx *ctx)
/* Start with the physical COH_ST Fabric ID. */
u16 phys_fabric_id = ctx->coh_st_fabric_id;
+ if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
+ return get_logical_coh_st_fabric_id_mi300(ctx);
+
/* Skip logical ID lookup if remapping is disabled. */
if (!FIELD_GET(DF4_REMAP_EN, ctx->map.ctl) &&
ctx->map.intlv_mode != DF3_6CHAN)
@@ -27,8 +27,12 @@
/* PCI ID for Zen4 Server DF Function 0. */
#define DF_FUNC0_ID_ZEN4_SERVER 0x14AD1022
+/* PCI IDs for MI300 DF Function 0. */
+#define DF_FUNC0_ID_MI300 0x15281022
+
/* Shift needed for adjusting register values to true values. */
#define DF_DRAM_BASE_LIMIT_LSB 28
+#define MI300_DRAM_LIMIT_LSB 20
enum df_revisions {
UNKNOWN,
@@ -59,6 +63,9 @@ enum intlv_modes {
DF4_NPS1_12CHAN_HASH = 0x15,
DF4_NPS2_5CHAN_HASH = 0x16,
DF4_NPS1_10CHAN_HASH = 0x17,
+ MI3_HASH_8CHAN = 0x18,
+ MI3_HASH_16CHAN = 0x19,
+ MI3_HASH_32CHAN = 0x1A,
DF2_2CHAN_HASH = 0x21,
/* DF4.5 modes are all IntLvNumChan + 0x20 */
DF4p5_NPS1_16CHAN_1K_HASH = 0x2C,
@@ -86,7 +93,8 @@ enum intlv_modes {
struct df_flags {
__u8 legacy_ficaa : 1,
socket_id_shift_quirk : 1,
- __reserved_0 : 6;
+ heterogeneous : 1,
+ __reserved_0 : 5;
};
struct df_config {
@@ -63,6 +63,10 @@ static int df4p5_get_intlv_mode(struct addr_ctx *ctx)
if (ctx->map.intlv_mode <= NOHASH_32CHAN)
return 0;
+ if (ctx->map.intlv_mode >= MI3_HASH_8CHAN &&
+ ctx->map.intlv_mode <= MI3_HASH_32CHAN)
+ return 0;
+
/*
* Modes matching the ranges above are returned as-is.
*
@@ -125,6 +129,9 @@ static u64 get_hi_addr_offset(u32 reg_dram_offset)
atl_debug_on_bad_df_rev();
}
+ if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
+ shift = MI300_DRAM_LIMIT_LSB;
+
return hi_addr_offset << shift;
}
@@ -369,6 +376,13 @@ static int get_coh_st_fabric_id(struct addr_ctx *ctx)
{
u32 reg;
+ /*
+ * On MI300 systems, the Coherent Station Fabric ID is derived
+ * later. And it does not depend on the register value.
+ */
+ if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
+ return 0;
+
/* Read D18F0x50 (FabricBlockInstanceInformation3). */
if (df_indirect_read_instance(ctx->node_id, 0, 0x50, ctx->inst_id, ®))
return -EINVAL;
@@ -490,6 +504,7 @@ static u8 get_num_intlv_chan(struct addr_ctx *ctx)
case NOHASH_8CHAN:
case DF3_COD1_8CHAN_HASH:
case DF4_NPS1_8CHAN_HASH:
+ case MI3_HASH_8CHAN:
case DF4p5_NPS1_8CHAN_1K_HASH:
case DF4p5_NPS1_8CHAN_2K_HASH:
return 8;
@@ -502,6 +517,7 @@ static u8 get_num_intlv_chan(struct addr_ctx *ctx)
case DF4p5_NPS1_12CHAN_2K_HASH:
return 12;
case NOHASH_16CHAN:
+ case MI3_HASH_16CHAN:
case DF4p5_NPS1_16CHAN_1K_HASH:
case DF4p5_NPS1_16CHAN_2K_HASH:
return 16;
@@ -509,6 +525,7 @@ static u8 get_num_intlv_chan(struct addr_ctx *ctx)
case DF4p5_NPS0_24CHAN_2K_HASH:
return 24;
case NOHASH_32CHAN:
+ case MI3_HASH_32CHAN:
return 32;
default:
atl_debug_on_bad_intlv_mode(ctx);
@@ -246,11 +246,11 @@
#define DF3_HASH_CTL_64K BIT(20)
#define DF3_HASH_CTL_2M BIT(21)
#define DF3_HASH_CTL_1G BIT(22)
-#define DF4_HASH_CTL_4K BIT(7)
#define DF4_HASH_CTL_64K BIT(8)
#define DF4_HASH_CTL_2M BIT(9)
#define DF4_HASH_CTL_1G BIT(10)
-#define DF4_HASH_CTL_1T BIT(15)
+#define DF4p5_HASH_CTL_4K BIT(7)
+#define DF4p5_HASH_CTL_1T BIT(15)
/*
* High Address Offset
@@ -268,10 +268,13 @@
* D18F7x140 [DRAM Offset]
* DF4 HiAddrOffset [24:1]
* DF4p5 HiAddrOffset [24:1]
+ * MI300 HiAddrOffset [31:1]
*/
#define DF2_HI_ADDR_OFFSET GENMASK(31, 20)
#define DF3_HI_ADDR_OFFSET GENMASK(31, 12)
-#define DF4_HI_ADDR_OFFSET GENMASK(24, 1)
+
+/* Follow reference code by including reserved bits for simplicity. */
+#define DF4_HI_ADDR_OFFSET GENMASK(31, 1)
/*
* High Address Offset Enable
@@ -124,6 +124,9 @@ static int df4_determine_df_rev(u32 reg)
if (reg == DF_FUNC0_ID_ZEN4_SERVER)
df_cfg.flags.socket_id_shift_quirk = 1;
+ if (reg == DF_FUNC0_ID_MI300)
+ df_cfg.flags.heterogeneous = 1;
+
return df4_get_fabric_id_mask_registers();
}
@@ -12,8 +12,56 @@
#include "internal.h"
+/*
+ * MI300 has a fixed, model-specific mapping between a UMC instance and
+ * its related Data Fabric Coherent Station instance.
+ *
+ * The MCA_IPID_UMC[InstanceId] field holds a unique identifier for the
+ * UMC instance within a Node. Use this to find the appropriate Coherent
+ * Station ID.
+ *
+ * Redundant bits were removed from the map below.
+ */
+static const u16 umc_coh_st_map[32] = {
+ 0x393, 0x293, 0x193, 0x093,
+ 0x392, 0x292, 0x192, 0x092,
+ 0x391, 0x291, 0x191, 0x091,
+ 0x390, 0x290, 0x190, 0x090,
+ 0x793, 0x693, 0x593, 0x493,
+ 0x792, 0x692, 0x592, 0x492,
+ 0x791, 0x691, 0x591, 0x491,
+ 0x790, 0x690, 0x590, 0x490,
+};
+
+#define UMC_ID_MI300 GENMASK(23, 12)
+static u8 get_coh_st_inst_id_mi300(struct atl_err *err)
+{
+ u16 umc_id = FIELD_GET(UMC_ID_MI300, err->ipid);
+ u8 i;
+
+ for (i = 0; i < ARRAY_SIZE(umc_coh_st_map); i++) {
+ if (umc_id == umc_coh_st_map[i])
+ break;
+ }
+
+ WARN_ON_ONCE(i >= ARRAY_SIZE(umc_coh_st_map));
+
+ return i;
+}
+
+#define MCA_IPID_INST_ID_HI GENMASK_ULL(47, 44)
static u8 get_die_id(struct atl_err *err)
{
+ /*
+ * AMD Node ID is provided in MCA_IPID[InstanceIdHi], and this
+ * needs to be divided by 4 to get the internal Die ID.
+ */
+ if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) {
+ u8 node_id = FIELD_GET(MCA_IPID_INST_ID_HI, err->ipid);
+
+ return node_id >> 2;
+ }
+
/*
* For CPUs, this is the AMD Node ID modulo the number
* of AMD Nodes per socket.
@@ -24,6 +72,9 @@ static u8 get_die_id(struct atl_err *err)
#define UMC_CHANNEL_NUM GENMASK(31, 20)
static u8 get_coh_st_inst_id(struct atl_err *err)
{
+ if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous)
+ return get_coh_st_inst_id_mi300(err);
+
return FIELD_GET(UMC_CHANNEL_NUM, err->ipid);
}