diff mbox series

[05/20] x86/mce/amd: Use helper for UMC bank type check

Message ID 20231118193248.1296798-6-yazen.ghannam@amd.com (mailing list archive)
State Handled Elsewhere
Headers show
Series MCA Updates | expand

Commit Message

Yazen Ghannam Nov. 18, 2023, 7:32 p.m. UTC
Scalable MCA systems use values in the MCA_IPID register to describe the
type of hardware for an MCA bank. This information is used when
bank-specific actions or decoding are needed. Otherwise,
microarchitectural information, like MCA_STATUS bits, should be used.

Currently, the bank type information is cached at boot time for all CPUs
and all banks. This uses more memory as the number of CPUs and MCA banks
increases. Furthermore, this causes bank-specific actions to rely on the
OS "CPU number" to look up cached values. And this can break if the CPU
number processing an error is not the same at the CPU that reported the
error.

The bank type should be determined solely on the MCA_IPID values. And
the cached information should be removed.

Define a helper function to check for a UMC bank type. This simplifies
the common case where software needs to determine if an MCA error is for
memory, and where the exact bank type is not needed.

Use bitops and rename old mask until removed.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
 arch/x86/include/asm/mce.h    |  3 ++-
 arch/x86/kernel/cpu/mce/amd.c | 15 +++++++++------
 2 files changed, 11 insertions(+), 7 deletions(-)

Comments

Borislav Petkov Nov. 27, 2023, 11:43 a.m. UTC | #1
On Sat, Nov 18, 2023 at 01:32:33PM -0600, Yazen Ghannam wrote:
> @@ -714,14 +721,10 @@ static bool legacy_mce_is_memory_error(struct mce *m)
>   */
>  static bool smca_mce_is_memory_error(struct mce *m)
>  {
> -	enum smca_bank_types bank_type;
> -
>  	if (XEC(m->status, 0x3f))
>  		return false;
>  
> -	bank_type = smca_get_bank_type(m->extcpu, m->bank);
> -
> -	return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2;
> +	return smca_umc_bank_type(m->ipid);

	return FIELD_GET(MCI_IPID_HWID, ipid) == IPID_TYPE_UMC;

after having done:

#define IPID_TYPE_UMC	0x96;

and you don't need that silly helper.

And then you can do more cleanups ontop by doing

        /* Unified Memory Controller MCA type */
        { SMCA_UMC,      HWID_MCATYPE(IPID_TYPE_UMC, 0x0)        },
        { SMCA_UMC_V2,   HWID_MCATYPE(IPID_TYPE_UMC, 0x1)        },

and have all the numbering properly defined and abstracted away.

Thx.
Yazen Ghannam Nov. 27, 2023, 3 p.m. UTC | #2
On 11/27/2023 6:43 AM, Borislav Petkov wrote:
> On Sat, Nov 18, 2023 at 01:32:33PM -0600, Yazen Ghannam wrote:
>> @@ -714,14 +721,10 @@ static bool legacy_mce_is_memory_error(struct mce *m)
>>    */
>>   static bool smca_mce_is_memory_error(struct mce *m)
>>   {
>> -	enum smca_bank_types bank_type;
>> -
>>   	if (XEC(m->status, 0x3f))
>>   		return false;
>>   
>> -	bank_type = smca_get_bank_type(m->extcpu, m->bank);
>> -
>> -	return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2;
>> +	return smca_umc_bank_type(m->ipid);
> 
> 	return FIELD_GET(MCI_IPID_HWID, ipid) == IPID_TYPE_UMC;
> 
> after having done:
> 
> #define IPID_TYPE_UMC	0x96;
> 
> and you don't need that silly helper.

The helper is also used in the following patch. But in any case, it may 
be overkill. So I'll drop it.

> 
> And then you can do more cleanups ontop by doing
> 
>          /* Unified Memory Controller MCA type */
>          { SMCA_UMC,      HWID_MCATYPE(IPID_TYPE_UMC, 0x0)        },
>          { SMCA_UMC_V2,   HWID_MCATYPE(IPID_TYPE_UMC, 0x1)        },
> 
> and have all the numbering properly defined and abstracted away.
>

Yep, agreed. Thanks for the suggestion.

Thanks,
Yazen
diff mbox series

Patch

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 4ad49afca2db..c43b41677a3e 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -60,7 +60,8 @@ 
  */
 #define MCI_CONFIG_MCAX		0x1
 #define MCI_IPID_MCATYPE	0xFFFF0000
-#define MCI_IPID_HWID		0xFFF
+#define MCI_IPID_HWID_OLD	0xFFF
+#define MCI_IPID_HWID		GENMASK_ULL(43, 32)
 
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 6cf8ed9c79be..c8fb6c24170f 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -7,6 +7,7 @@ 
  *
  *  All MC4_MISCi registers are shared between cores on a node.
  */
+#include <linux/bitfield.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
 #include <linux/kobject.h>
@@ -143,6 +144,12 @@  enum smca_bank_types smca_get_bank_type(unsigned int cpu, unsigned int bank)
 }
 EXPORT_SYMBOL_GPL(smca_get_bank_type);
 
+/* UMCs have HWID=0x96.*/
+static bool smca_umc_bank_type(u64 ipid)
+{
+	return FIELD_GET(MCI_IPID_HWID, ipid) == 0x96;
+}
+
 static const struct smca_hwid smca_hwid_mcatypes[] = {
 	/* { bank_type, hwid_mcatype } */
 
@@ -304,7 +311,7 @@  static void smca_configure(unsigned int bank, unsigned int cpu)
 		return;
 	}
 
-	hwid_mcatype = HWID_MCATYPE(high & MCI_IPID_HWID,
+	hwid_mcatype = HWID_MCATYPE(high & MCI_IPID_HWID_OLD,
 				    (high & MCI_IPID_MCATYPE) >> 16);
 
 	for (i = 0; i < ARRAY_SIZE(smca_hwid_mcatypes); i++) {
@@ -714,14 +721,10 @@  static bool legacy_mce_is_memory_error(struct mce *m)
  */
 static bool smca_mce_is_memory_error(struct mce *m)
 {
-	enum smca_bank_types bank_type;
-
 	if (XEC(m->status, 0x3f))
 		return false;
 
-	bank_type = smca_get_bank_type(m->extcpu, m->bank);
-
-	return bank_type == SMCA_UMC || bank_type == SMCA_UMC_V2;
+	return smca_umc_bank_type(m->ipid);
 }
 
 bool amd_mce_is_memory_error(struct mce *m)