diff mbox series

[v2,12/16] x86/mce/amd: Support SMCA Corrected Error Interrupt

Message ID 20240404151359.47970-13-yazen.ghannam@amd.com (mailing list archive)
State New, archived
Headers show
Series MCA Updates | expand

Commit Message

Yazen Ghannam April 4, 2024, 3:13 p.m. UTC
AMD systems optionally support MCA Thresholding which provides the
ability for hardware to send an interrupt when a set error threshold is
reached. This feature counts errors of all severities, but it is
commonly used to report correctable errors with an interrupt rather than
polling.

Scalable MCA systems allow the Platform to take control of this feature.
In this case, the OS will not see the feature configuration and control
bits in the MCA_MISC* registers. The OS will not receive the MCA
Thresholding interrupt, and it will need to poll for correctable errors.

A "corrected error interrupt" will be available on Scalable MCA systems.
This will be used in the same configuration where the Platform controls
MCA Thresholding. However, the Platform will now be able to send the
MCA Thresholding interrupt to the OS.

Check for the feature bit in the MCA_CONFIG register and attempt to set
up the MCA Thresholding interrupt handler. If successful, set the feature
enable bit in the MCA_CONFIG register to indicate to the Platform that
the OS is ready for the interrupt.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---

Notes:
    Link:
    https://lkml.kernel.org/r/20231118193248.1296798-17-yazen.ghannam@amd.com
    
    v1->v2:
    * Rebase on earlier changes. (Yazen)

 arch/x86/kernel/cpu/mce/amd.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 08ee647cb6ce..a81d911d608e 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -47,6 +47,7 @@ 
 /* MCA Interrupt Configuration register, one per CPU */
 #define MSR_CU_DEF_ERR		0xC0000410
 #define MSR_MCA_INTR_CFG		0xC0000410
+#define INTR_CFG_THR_LVT_OFFSET		GENMASK_ULL(15, 12)
 #define INTR_CFG_DFR_LVT_OFFSET		GENMASK_ULL(7, 4)
 #define INTR_CFG_LEGACY_DFR_INTR_TYPE	GENMASK_ULL(2, 1)
 #define INTR_TYPE_APIC			0x1
@@ -58,8 +59,10 @@ 
 #define MCI_IPID_HWID_OLD	0xFFF
 
 /* MCA_CONFIG register, one per MCA bank */
+#define CFG_CE_INT_EN			BIT_ULL(40)
 #define CFG_DFR_INT_TYPE		GENMASK_ULL(38, 37)
 #define CFG_MCAX_EN			BIT_ULL(32)
+#define CFG_CE_INT_PRESENT		BIT_ULL(10)
 #define CFG_LSB_IN_STATUS		BIT_ULL(8)
 #define CFG_DFR_INT_SUPP		BIT_ULL(5)
 #define CFG_DFR_LOG_SUPP		BIT_ULL(2)
@@ -352,6 +355,17 @@  static void smca_set_misc_banks_map(unsigned int bank, unsigned int cpu)
 
 }
 
+static bool smca_thr_handler_enabled(u64 mca_intr_cfg)
+{
+	u8 offset = FIELD_GET(INTR_CFG_THR_LVT_OFFSET, mca_intr_cfg);
+
+	if (setup_APIC_eilvt(offset, THRESHOLD_APIC_VECTOR, APIC_EILVT_MSG_FIX, 0))
+		return false;
+
+	mce_threshold_vector = amd_threshold_interrupt;
+	return true;
+}
+
 /* SMCA sets the Deferred Error Interrupt type per bank. */
 static void configure_smca_dfr(unsigned int bank, u64 *mca_config)
 {
@@ -375,7 +389,7 @@  static void configure_smca_dfr(unsigned int bank, u64 *mca_config)
 }
 
 /* Set appropriate bits in MCA_CONFIG. */
-static void configure_smca(unsigned int bank)
+static void configure_smca(unsigned int bank, u64 mca_intr_cfg)
 {
 	u64 mca_config;
 
@@ -399,6 +413,9 @@  static void configure_smca(unsigned int bank)
 	if (FIELD_GET(CFG_LSB_IN_STATUS, mca_config))
 		this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = true;
 
+	if (FIELD_GET(CFG_CE_INT_PRESENT, mca_config) && smca_thr_handler_enabled(mca_intr_cfg))
+		mca_config |= FIELD_PREP(CFG_CE_INT_EN, 0x1);
+
 	wrmsrl(MSR_AMD64_SMCA_MCx_CONFIG(bank), mca_config);
 }
 
@@ -791,7 +808,7 @@  void mce_amd_feature_init(struct cpuinfo_x86 *c)
 		if (mce_flags.smca)
 			smca_configure_old(bank, cpu);
 
-		configure_smca(bank);
+		configure_smca(bank, mca_intr_cfg);
 		disable_err_thresholding(c, bank);
 
 		for (block = 0; block < NR_BLOCKS; ++block) {