diff mbox series

[2/2] x86/MCE/AMD Support new memory interleaving schemes during address translation

Message ID 20200814191449.183998-3-Yazen.Ghannam@amd.com (mailing list archive)
State New, archived
Headers show
Series AMD MCA Address Translation Updates | expand

Commit Message

Yazen Ghannam Aug. 14, 2020, 7:14 p.m. UTC
From: Muralidhara M K <muralidhara.mk@amd.com>

Add support for new memory interleaving schemes used in current AMD
systems.

Check if the system is using a current Data Fabric version or a legacy
version as some bit and register definitions have changed.

Tested on AMD reference platforms with the following memory interleaving
options.

Naples
- None
- Channel
- Die
- Socket

Rome (NPS = Nodes per Socket)
- None
- NPS0
- NPS1
- NPS2
- NPS4

The fixes tag refers to the commit that allows amd64_edac_mod to load on
Rome systems. The module may report an incorrect system address on Rome
systems depending on the interleaving option used.

Fixes: 6e846239e548 ("EDAC/amd64: Add Family 17h Model 30h PCI IDs")
Signed-off-by: Muralidhara M K <muralidhara.mk@amd.com>
Co-developed-by: Naveen Krishna Chtradhi <naveenkrishna.chatradhi@amd.com>
Signed-off-by: Naveen Krishna Chtradhi <naveenkrishna.chatradhi@amd.com>
Co-developed-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 arch/x86/kernel/cpu/mce/amd.c | 237 +++++++++++++++++++++++++++-------
 1 file changed, 188 insertions(+), 49 deletions(-)

Comments

Ingo Molnar Aug. 15, 2020, 9:13 a.m. UTC | #1
* Yazen Ghannam <Yazen.Ghannam@amd.com> wrote:

> +     /* Read D18F1x208 (System Fabric ID Mask 0). */
> +     if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp))
> +             goto out_err;
> +
> +     /* Determine if system is a legacy Data Fabric type. */
> +     legacy_df = !(tmp & 0xFF);

1)

I see this pattern in a lot of places in the code, first the magic 
constant 0x208 is explained a comment, then it is *repeated* and used 
it in the code...

How about introducing an obviously named enum for it instead, which 
would then be self-documenting, saving the comment and removing magic 
numbers:

	if (amd_df_indirect_read(nid, 1, AMD_REG_FAB_ID, umc, &reg_fab_id))
		goto out_err;

(The symbolic name should be something better, I just guessed 
something quickly.)

Please clean this up in a separate patch, not part of the already 
large patch that introduces a new feature.

2)

'tmp & 0xFF' is some sort of fabric version ID value, with a value of 
0 denoting legacy (pre-Rome) systems, right?

How about making that explicit:

	df_version = reg_fab_id & 0xFF;

I'm pretty sure such a version ID might come handy later on, should 
there be quirks or new capabilities with the newer systems ...


>  			ret_addr -= hi_addr_offset;
> @@ -728,23 +740,31 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
>  	}
>  
>  	lgcy_mmio_hole_en = tmp & BIT(1);
> -	intlv_num_chan	  = (tmp >> 4) & 0xF;
> -	intlv_addr_sel	  = (tmp >> 8) & 0x7;
> -	dram_base_addr	  = (tmp & GENMASK_ULL(31, 12)) << 16;
>  
> -	/* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */
> -	if (intlv_addr_sel > 3) {
> -		pr_err("%s: Invalid interleave address select %d.\n",
> -			__func__, intlv_addr_sel);
> -		goto out_err;
> +	if (legacy_df) {
> +		intlv_num_chan	  = (tmp >> 4) & 0xF;
> +		intlv_addr_sel	  = (tmp >> 8) & 0x7;
> +	} else {
> +		intlv_num_chan    = (tmp >> 2) & 0xF;
> +		intlv_num_dies	  = (tmp >> 6) & 0x3;
> +		intlv_num_sockets = (tmp >> 8) & 0x1;
> +		intlv_addr_sel	  = (tmp >> 9) & 0x7;
>  	}
>  
> +	dram_base_addr	  = (tmp & GENMASK_ULL(31, 12)) << 16;
> +
>  	/* Read D18F0x114 (DramLimitAddress). */
>  	if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp))
>  		goto out_err;
>  
> -	intlv_num_sockets = (tmp >> 8) & 0x1;
> -	intlv_num_dies	  = (tmp >> 10) & 0x3;
> +	if (legacy_df) {
> +		intlv_num_sockets = (tmp >> 8) & 0x1;
> +		intlv_num_dies	  = (tmp >> 10) & 0x3;
> +		dst_fabric_id	  = tmp & 0xFF;
> +	} else {
> +		dst_fabric_id	  = tmp & 0x3FF;
> +	}
> +
>  	dram_limit_addr	  = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0);

Could we please structure this code in a bit more readable fashion?

1)

Such as not using the meaningless 'tmp' variable name to first read 
out DramOffset, then DramLimitAddress?

How about naming them a bit more obviously, and retrieving them in a 
single step:

        if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &reg_dram_off))
                goto out_err;

        /* Remove HiAddrOffset from normalized address, if enabled: */
        if (reg_dram_off & BIT(0)) {
                u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8;

                if (norm_addr >= hi_addr_offset) {
                        ret_addr -= hi_addr_offset;
                        base = 1;
                }
        }

        if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &reg_dram_lim))
                goto out_err;

('reg' stands for register value - but 'val' would work too.)

Side note: why is the above code using BIT() and GENMASK_UUL() when 
all the other and new code is using fixed masks? Use one of these 
versions instead of a weird mix ...

2)

Then all the fabric version dependent logic could be consolidated 
instead of being spread out:

	if (df_version) {
		intlv_num_chan    = (reg_dram_off >>  2) & 0xF;
		intlv_num_dies    = (reg_dram_off >>  6) & 0x3;
		intlv_num_sockets = (reg_dram_off >>  8) & 0x1;
		intlv_addr_sel    = (reg_dram_off >>  9) & 0x7;

		dst_fabric_id     = (reg_dram_lim >>  0) & 0x3FF;
	} else {
		intlv_num_chan    = (reg_dram_off >>  4) & 0xF;
		intlv_num_dies    = (reg_dram_lim >> 10) & 0x3;
		intlv_num_sockets = (reg_dram_lim >>  8) & 0x1;
		intlv_addr_sel    = (reg_dram_off >>  8) & 0x7;

		dst_fabric_id     = (reg_dram_lim >>  0) & 0xFF;
	}

Also note a couple of more formatting & ordering edits I did to the 
code, to improve the structure. My copy & paste job is untested 
though.

3)

Notably, note how the new code on current systems is the first branch 
- that's the most interesting code most of the time anyaway, legacy 
systems being legacy.

BTW., please do any such suggested code flow and structure clean-up 
patches first in the series, then introduce the new logic, to make it 
easier to verify.

Thanks,

	Ingo
Yazen Ghannam Aug. 18, 2020, 2:02 p.m. UTC | #2
On Sat, Aug 15, 2020 at 11:13:36AM +0200, Ingo Molnar wrote:
> 
> * Yazen Ghannam <Yazen.Ghannam@amd.com> wrote:
> 
> > +     /* Read D18F1x208 (System Fabric ID Mask 0). */
> > +     if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp))
> > +             goto out_err;
> > +
> > +     /* Determine if system is a legacy Data Fabric type. */
> > +     legacy_df = !(tmp & 0xFF);
> 
> 1)
> 
> I see this pattern in a lot of places in the code, first the magic 
> constant 0x208 is explained a comment, then it is *repeated* and used 
> it in the code...
> 
> How about introducing an obviously named enum for it instead, which 
> would then be self-documenting, saving the comment and removing magic 
> numbers:
> 
> 	if (amd_df_indirect_read(nid, 1, AMD_REG_FAB_ID, umc, &reg_fab_id))
> 		goto out_err;
> 
> (The symbolic name should be something better, I just guessed 
> something quickly.)
> 
> Please clean this up in a separate patch, not part of the already 
> large patch that introduces a new feature.
>

Okay, will do.

> 2)
> 
> 'tmp & 0xFF' is some sort of fabric version ID value, with a value of 
> 0 denoting legacy (pre-Rome) systems, right?
> 
> How about making that explicit:
> 
> 	df_version = reg_fab_id & 0xFF;
> 
> I'm pretty sure such a version ID might come handy later on, should 
> there be quirks or new capabilities with the newer systems ...
> 

Not exactly. The register field is Read-as-Zero on legacy systems. The
versions are 2 and 3 where 2 is the "legacy" version. But I can make
this change.

For example:

	df_version = reg_fab_id & 0xFF ? 3 : 2;

> 
> >  			ret_addr -= hi_addr_offset;
> > @@ -728,23 +740,31 @@ int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
> >  	}
> >  
> >  	lgcy_mmio_hole_en = tmp & BIT(1);
> > -	intlv_num_chan	  = (tmp >> 4) & 0xF;
> > -	intlv_addr_sel	  = (tmp >> 8) & 0x7;
> > -	dram_base_addr	  = (tmp & GENMASK_ULL(31, 12)) << 16;
> >  
> > -	/* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */
> > -	if (intlv_addr_sel > 3) {
> > -		pr_err("%s: Invalid interleave address select %d.\n",
> > -			__func__, intlv_addr_sel);
> > -		goto out_err;
> > +	if (legacy_df) {
> > +		intlv_num_chan	  = (tmp >> 4) & 0xF;
> > +		intlv_addr_sel	  = (tmp >> 8) & 0x7;
> > +	} else {
> > +		intlv_num_chan    = (tmp >> 2) & 0xF;
> > +		intlv_num_dies	  = (tmp >> 6) & 0x3;
> > +		intlv_num_sockets = (tmp >> 8) & 0x1;
> > +		intlv_addr_sel	  = (tmp >> 9) & 0x7;
> >  	}
> >  
> > +	dram_base_addr	  = (tmp & GENMASK_ULL(31, 12)) << 16;
> > +
> >  	/* Read D18F0x114 (DramLimitAddress). */
> >  	if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp))
> >  		goto out_err;
> >  
> > -	intlv_num_sockets = (tmp >> 8) & 0x1;
> > -	intlv_num_dies	  = (tmp >> 10) & 0x3;
> > +	if (legacy_df) {
> > +		intlv_num_sockets = (tmp >> 8) & 0x1;
> > +		intlv_num_dies	  = (tmp >> 10) & 0x3;
> > +		dst_fabric_id	  = tmp & 0xFF;
> > +	} else {
> > +		dst_fabric_id	  = tmp & 0x3FF;
> > +	}
> > +
> >  	dram_limit_addr	  = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0);
> 
> Could we please structure this code in a bit more readable fashion?
> 
> 1)
> 
> Such as not using the meaningless 'tmp' variable name to first read 
> out DramOffset, then DramLimitAddress?
> 

IIRC, the "tmp" variable come to be in the review for the patch which
added this function. There are a few places where the register name and
the value needed have the same or similar name. For example,
DramLimitAddress is the register name and also a field within the
register. So we'd have a reg_dram_limit_addr and val_dram_limit_addr.
The "tmp" variable removes the need for the "reg_" variable.

But I think this can be reworked so that the final variable name is
reused. The register value can read into the variable, extra fields can
be extracted from it, and the final value can be adjusted as needed.

> How about naming them a bit more obviously, and retrieving them in a 
> single step:
> 
>         if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &reg_dram_off))
>                 goto out_err;
> 
>         /* Remove HiAddrOffset from normalized address, if enabled: */
>         if (reg_dram_off & BIT(0)) {
>                 u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8;
> 
>                 if (norm_addr >= hi_addr_offset) {
>                         ret_addr -= hi_addr_offset;
>                         base = 1;
>                 }
>         }
> 
>         if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &reg_dram_lim))
>                 goto out_err;
> 
> ('reg' stands for register value - but 'val' would work too.)
> 
> Side note: why is the above code using BIT() and GENMASK_UUL() when 
> all the other and new code is using fixed masks? Use one of these 
> versions instead of a weird mix ...
> 

I'll clean this up. Also, there are a lot of places where bit fields are
extracted. I think this can be made into a macro.

> 2)
> 
> Then all the fabric version dependent logic could be consolidated 
> instead of being spread out:
> 
> 	if (df_version) {
> 		intlv_num_chan    = (reg_dram_off >>  2) & 0xF;
> 		intlv_num_dies    = (reg_dram_off >>  6) & 0x3;
> 		intlv_num_sockets = (reg_dram_off >>  8) & 0x1;
> 		intlv_addr_sel    = (reg_dram_off >>  9) & 0x7;
> 
> 		dst_fabric_id     = (reg_dram_lim >>  0) & 0x3FF;
> 	} else {
> 		intlv_num_chan    = (reg_dram_off >>  4) & 0xF;
> 		intlv_num_dies    = (reg_dram_lim >> 10) & 0x3;
> 		intlv_num_sockets = (reg_dram_lim >>  8) & 0x1;
> 		intlv_addr_sel    = (reg_dram_off >>  8) & 0x7;
> 
> 		dst_fabric_id     = (reg_dram_lim >>  0) & 0xFF;
> 	}
> 
> Also note a couple of more formatting & ordering edits I did to the 
> code, to improve the structure. My copy & paste job is untested 
> though.
> 

Okay.

> 3)
> 
> Notably, note how the new code on current systems is the first branch 
> - that's the most interesting code most of the time anyaway, legacy 
> systems being legacy.
> 

Understood.

> BTW., please do any such suggested code flow and structure clean-up 
> patches first in the series, then introduce the new logic, to make it 
> easier to verify.
>

Will do.

Thanks,
Yazen
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 524edf81e287..a687aa898fef 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -689,18 +689,25 @@  void mce_amd_feature_init(struct cpuinfo_x86 *c)
 int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
 {
 	u64 dram_base_addr, dram_limit_addr, dram_hole_base;
+
 	/* We start from the normalized address */
 	u64 ret_addr = norm_addr;
 
 	u32 tmp;
 
-	u8 die_id_shift, die_id_mask, socket_id_shift, socket_id_mask;
+	bool hash_enabled = false, split_normalized = false, legacy_df = false;
+
 	u8 intlv_num_dies, intlv_num_chan, intlv_num_sockets;
-	u8 intlv_addr_sel, intlv_addr_bit;
-	u8 num_intlv_bits, hashed_bit;
+	u8 intlv_addr_sel, intlv_addr_bit, num_intlv_bits;
+	u8 cs_mask, cs_id = 0, dst_fabric_id = 0;
 	u8 lgcy_mmio_hole_en, base = 0;
-	u8 cs_mask, cs_id = 0;
-	bool hash_enabled = false;
+
+	/* Read D18F1x208 (System Fabric ID Mask 0). */
+	if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp))
+		goto out_err;
+
+	/* Determine if system is a legacy Data Fabric type. */
+	legacy_df = !(tmp & 0xFF);
 
 	/* Read D18F0x1B4 (DramOffset), check if base 1 is used. */
 	if (amd_df_indirect_read(nid, 0, 0x1B4, umc, &tmp))
@@ -708,7 +715,12 @@  int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
 
 	/* Remove HiAddrOffset from normalized address, if enabled: */
 	if (tmp & BIT(0)) {
-		u64 hi_addr_offset = (tmp & GENMASK_ULL(31, 20)) << 8;
+		u8 hi_addr_offset_lsb = legacy_df ? 20 : 12;
+		u64 hi_addr_offset = tmp & GENMASK_ULL(31, hi_addr_offset_lsb);
+
+		/* Align to bit 28 regardless of the LSB used. */
+		hi_addr_offset >>= hi_addr_offset_lsb;
+		hi_addr_offset <<= 28;
 
 		if (norm_addr >= hi_addr_offset) {
 			ret_addr -= hi_addr_offset;
@@ -728,23 +740,31 @@  int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
 	}
 
 	lgcy_mmio_hole_en = tmp & BIT(1);
-	intlv_num_chan	  = (tmp >> 4) & 0xF;
-	intlv_addr_sel	  = (tmp >> 8) & 0x7;
-	dram_base_addr	  = (tmp & GENMASK_ULL(31, 12)) << 16;
 
-	/* {0, 1, 2, 3} map to address bits {8, 9, 10, 11} respectively */
-	if (intlv_addr_sel > 3) {
-		pr_err("%s: Invalid interleave address select %d.\n",
-			__func__, intlv_addr_sel);
-		goto out_err;
+	if (legacy_df) {
+		intlv_num_chan	  = (tmp >> 4) & 0xF;
+		intlv_addr_sel	  = (tmp >> 8) & 0x7;
+	} else {
+		intlv_num_chan    = (tmp >> 2) & 0xF;
+		intlv_num_dies	  = (tmp >> 6) & 0x3;
+		intlv_num_sockets = (tmp >> 8) & 0x1;
+		intlv_addr_sel	  = (tmp >> 9) & 0x7;
 	}
 
+	dram_base_addr	  = (tmp & GENMASK_ULL(31, 12)) << 16;
+
 	/* Read D18F0x114 (DramLimitAddress). */
 	if (amd_df_indirect_read(nid, 0, 0x114 + (8 * base), umc, &tmp))
 		goto out_err;
 
-	intlv_num_sockets = (tmp >> 8) & 0x1;
-	intlv_num_dies	  = (tmp >> 10) & 0x3;
+	if (legacy_df) {
+		intlv_num_sockets = (tmp >> 8) & 0x1;
+		intlv_num_dies	  = (tmp >> 10) & 0x3;
+		dst_fabric_id	  = tmp & 0xFF;
+	} else {
+		dst_fabric_id	  = tmp & 0x3FF;
+	}
+
 	dram_limit_addr	  = ((tmp & GENMASK_ULL(31, 12)) << 16) | GENMASK_ULL(27, 0);
 
 	intlv_addr_bit = intlv_addr_sel + 8;
@@ -757,8 +777,27 @@  int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
 	case 5:	intlv_num_chan = 3; break;
 	case 7:	intlv_num_chan = 4; break;
 
-	case 8: intlv_num_chan = 1;
+	case 8:
+		if (legacy_df) {
+			intlv_num_chan = 1;
+			hash_enabled = true;
+		} else {
+			intlv_num_chan = 5;
+		}
+		break;
+	case 12:
+		intlv_num_chan = 1;
+		hash_enabled = true;
+		break;
+	case 13:
+		intlv_num_chan = 2;
+		hash_enabled = true;
+		split_normalized = true;
+		break;
+	case 14:
+		intlv_num_chan = 3;
 		hash_enabled = true;
+		split_normalized = true;
 		break;
 	default:
 		pr_err("%s: Invalid number of interleaved channels %d.\n",
@@ -766,18 +805,14 @@  int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
 		goto out_err;
 	}
 
-	num_intlv_bits = intlv_num_chan;
-
-	if (intlv_num_dies > 2) {
-		pr_err("%s: Invalid number of interleaved nodes/dies %d.\n",
-			__func__, intlv_num_dies);
+	/* Assert interleave address bit is 8 or 9 for hashing cases. */
+	if (hash_enabled && intlv_addr_bit != 8 && intlv_addr_bit != 9) {
+		pr_err("%s: Invalid interleave address bit for hashing %d.\n",
+			__func__, intlv_addr_bit);
 		goto out_err;
 	}
 
-	num_intlv_bits += intlv_num_dies;
-
-	/* Add a bit if sockets are interleaved. */
-	num_intlv_bits += intlv_num_sockets;
+	num_intlv_bits = intlv_num_chan + intlv_num_dies + intlv_num_sockets;
 
 	/* Assert num_intlv_bits <= 4 */
 	if (num_intlv_bits > 4) {
@@ -787,8 +822,10 @@  int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
 	}
 
 	if (num_intlv_bits > 0) {
-		u64 temp_addr_x, temp_addr_i, temp_addr_y;
+		u8 cs_fabric_id_mask = legacy_df ? 0xFF : 0x3F;
 		u8 die_id_bit, sock_id_bit, cs_fabric_id;
+		u64 addr_x, addr_y, addr_z;
+		u8 node_id_shift = 0;
 
 		/*
 		 * Read FabricBlockInstanceInformation3_CS[BlockFabricID].
@@ -799,7 +836,7 @@  int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
 		if (amd_df_indirect_read(nid, 0, 0x50, umc, &tmp))
 			goto out_err;
 
-		cs_fabric_id = (tmp >> 8) & 0xFF;
+		cs_fabric_id = (tmp >> 8) & cs_fabric_id_mask;
 		die_id_bit   = 0;
 
 		/* If interleaved over more than 1 channel: */
@@ -807,44 +844,94 @@  int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
 			die_id_bit = intlv_num_chan;
 			cs_mask	   = (1 << die_id_bit) - 1;
 			cs_id	   = cs_fabric_id & cs_mask;
+			cs_id	  -= dst_fabric_id & cs_mask;
 		}
 
 		sock_id_bit = die_id_bit;
 
-		/* Read D18F1x208 (SystemFabricIdMask). */
-		if (intlv_num_dies || intlv_num_sockets)
-			if (amd_df_indirect_read(nid, 1, 0x208, umc, &tmp))
+		if (intlv_num_dies || intlv_num_sockets) {
+			u16 offset = 0;
+
+			if (legacy_df) {
+				/* Read D18F1x208 (SystemFabricIdMask). */
+				offset = 0x208;
+			} else {
+				/* Read D18F1x20C (SystemFabricIdMask1). */
+				offset = 0x20C;
+			}
+
+			if (amd_df_indirect_read(nid, 1, offset, umc, &tmp))
 				goto out_err;
 
+			if (!legacy_df)
+				node_id_shift = tmp & 0xF;
+		}
+
 		/* If interleaved over more than 1 die. */
 		if (intlv_num_dies) {
+			u8 die_id_shift, die_id_mask;
+
 			sock_id_bit  = die_id_bit + intlv_num_dies;
-			die_id_shift = (tmp >> 24) & 0xF;
-			die_id_mask  = (tmp >> 8) & 0xFF;
+
+			if (legacy_df) {
+				die_id_shift = (tmp >> 24) & 0xF;
+				die_id_mask  = (tmp >> 8) & 0xFF;
+			} else {
+				die_id_shift = (tmp & 0xF) + node_id_shift;
+
+				die_id_mask  = (tmp >> 16) & 0x7;
+				die_id_mask <<= node_id_shift;
+			}
 
 			cs_id |= ((cs_fabric_id & die_id_mask) >> die_id_shift) << die_id_bit;
 		}
 
 		/* If interleaved over more than 1 socket. */
 		if (intlv_num_sockets) {
-			socket_id_shift	= (tmp >> 28) & 0xF;
-			socket_id_mask	= (tmp >> 16) & 0xFF;
+			u8 socket_id_shift, socket_id_mask;
+
+			if (legacy_df) {
+				socket_id_shift	= (tmp >> 28) & 0xF;
+				socket_id_mask	= (tmp >> 16) & 0xFF;
+			} else {
+				socket_id_shift	= (tmp >> 8) & 0x3;
+				socket_id_shift += node_id_shift;
+
+				socket_id_mask	= (tmp >> 24) & 0x7;
+				socket_id_mask <<= node_id_shift;
+			}
 
 			cs_id |= ((cs_fabric_id & socket_id_mask) >> socket_id_shift) << sock_id_bit;
 		}
 
 		/*
 		 * The pre-interleaved address consists of XXXXXXIIIYYYYY
-		 * where III is the ID for this CS, and XXXXXXYYYYY are the
-		 * address bits from the post-interleaved address.
-		 * "num_intlv_bits" has been calculated to tell us how many "I"
-		 * bits there are. "intlv_addr_bit" tells us how many "Y" bits
-		 * there are (where "I" starts).
+		 * or XXXXXXIIZZZIYYY where III is the ID for this CS, and
+		 * XXXXXXZZZYYYYY are the address bits from the post-interleaved
+		 * address. "num_intlv_bits" has been calculated to tell us how
+		 * many "I" bits there are. "intlv_addr_bit" tells us how many
+		 * "Y" bits there are (where "I" starts).
+		 *
+		 * The "split" III is only used in the COD modes, where there
+		 * is one bit I at "intlv_addr_bit", and the remaining CS bits
+		 * are higher up starting at bit 12.
 		 */
-		temp_addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit-1, 0);
-		temp_addr_i = (cs_id << intlv_addr_bit);
-		temp_addr_x = (ret_addr & GENMASK_ULL(63, intlv_addr_bit)) << num_intlv_bits;
-		ret_addr    = temp_addr_x | temp_addr_i | temp_addr_y;
+		addr_y = ret_addr & GENMASK_ULL(intlv_addr_bit - 1, 0);
+
+		if (split_normalized) {
+			addr_x = ret_addr & GENMASK_ULL(63, 11);
+			addr_x <<= num_intlv_bits;
+
+			addr_z = ret_addr & GENMASK_ULL(10, intlv_addr_bit);
+			addr_z <<= 1;
+		} else {
+			addr_x = ret_addr & GENMASK_ULL(63, intlv_addr_bit);
+			addr_x <<= num_intlv_bits;
+
+			addr_z = 0;
+		}
+
+		ret_addr = addr_x | addr_z | addr_y;
 	}
 
 	/* Add dram base address */
@@ -860,18 +947,70 @@  int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr)
 			ret_addr += (BIT_ULL(32) - dram_hole_base);
 	}
 
-	if (hash_enabled) {
-		/* Save some parentheses and grab ls-bit at the end. */
-		hashed_bit =	(ret_addr >> 12) ^
+	/*
+	 * There are three cases for hashing:
+	 * 1) No Hashing
+	 * 2) Legacy Hashing
+	 * 3) Cluster-on-Die (COD) Hashing
+	 */
+	if (!hash_enabled) {
+		/* Fill in the interleave bit. */
+		if (intlv_num_chan)
+			ret_addr |= (cs_id << intlv_addr_bit);
+	} else if (legacy_df) {
+		/* Legacy 2ch hash. */
+		u8 hashed_bit =	(ret_addr >> 12) ^
 				(ret_addr >> 18) ^
 				(ret_addr >> 21) ^
 				(ret_addr >> 30) ^
 				cs_id;
 
 		hashed_bit &= BIT(0);
+		ret_addr ^= hashed_bit << intlv_addr_bit;
+	} else {
+		u8 hashed_bit, hash_ctl_64K, hash_ctl_2M, hash_ctl_1G;
+
+		/* Read D18F0x3F8 (DfGlobalCtrl)). */
+		if (amd_df_indirect_read(nid, 0, 0x3F8, umc, &tmp))
+			goto out_err;
+
+		hash_ctl_64K = !!(tmp & BIT(20));
+		hash_ctl_2M  = !!(tmp & BIT(21));
+		hash_ctl_1G  = !!(tmp & BIT(22));
+
+		/* COD with 2ch, 4ch, or 8ch hash. */
+		hashed_bit =	(ret_addr >> 14) ^
+				((ret_addr >> 18) & hash_ctl_64K) ^
+				((ret_addr >> 23) & hash_ctl_2M) ^
+				((ret_addr >> 32) & hash_ctl_1G) ^
+				cs_id;
+
+		hashed_bit &= BIT(0);
+		ret_addr ^= hashed_bit << intlv_addr_bit;
+
+		/* COD with 4ch or 8ch hash. */
+		if ((intlv_num_chan == 2) || (intlv_num_chan == 3)) {
+			hashed_bit =	(ret_addr >> 12) ^
+					((ret_addr >> 16) & hash_ctl_64K) ^
+					((ret_addr >> 21) & hash_ctl_2M) ^
+					((ret_addr >> 30) & hash_ctl_1G) ^
+					(cs_id >> 1);
+
+			hashed_bit &= BIT(0);
+			ret_addr ^= hashed_bit << 12;
+		}
+
+		/* COD with 8ch hash. */
+		if (intlv_num_chan == 3) {
+			hashed_bit =	(ret_addr >> 13) ^
+					((ret_addr >> 17) & hash_ctl_64K) ^
+					((ret_addr >> 22) & hash_ctl_2M) ^
+					((ret_addr >> 31) & hash_ctl_1G) ^
+					(cs_id >> 2);
 
-		if (hashed_bit != ((ret_addr >> intlv_addr_bit) & BIT(0)))
-			ret_addr ^= BIT(intlv_addr_bit);
+			hashed_bit &= BIT(0);
+			ret_addr ^= hashed_bit << 13;
+		}
 	}
 
 	/* Is calculated system address is above DRAM limit address? */