diff mbox series

[RFC,3/5] cxl/core: introduce cxl_mem_report_poison()

Message ID 20240209115417.724638-6-ruansy.fnst@fujitsu.com (mailing list archive)
State New, archived
Headers show
Series [RFC,1/2] hw/cxl/type3: add missing flag bit for GMER | expand

Commit Message

Shiyang Ruan Feb. 9, 2024, 11:54 a.m. UTC
If poison is detected(reported from cxl memdev), OS should be notified to
handle it.  Introduce this function:
  1. translate DPA to HPA;
  2. construct a MCE instance; (TODO: more details need to be filled)
  3. log it into MCE event queue;

After that, MCE mechanism can walk over its notifier chain to execute
specific handlers.

Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
---
 arch/x86/kernel/cpu/mce/core.c |  1 +
 drivers/cxl/core/mbox.c        | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

Comments

Dan Williams Feb. 10, 2024, 6:46 a.m. UTC | #1
Shiyang Ruan wrote:
> If poison is detected(reported from cxl memdev), OS should be notified to
> handle it.  Introduce this function:
>   1. translate DPA to HPA;
>   2. construct a MCE instance; (TODO: more details need to be filled)
>   3. log it into MCE event queue;
> 
> After that, MCE mechanism can walk over its notifier chain to execute
> specific handlers.
> 
> Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
> ---
>  arch/x86/kernel/cpu/mce/core.c |  1 +
>  drivers/cxl/core/mbox.c        | 33 +++++++++++++++++++++++++++++++++
>  2 files changed, 34 insertions(+)
> 
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index bc39252bc54f..a64c0aceb7e0 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -131,6 +131,7 @@ void mce_setup(struct mce *m)
>  	m->ppin = cpu_data(m->extcpu).ppin;
>  	m->microcode = boot_cpu_data.microcode;
>  }
> +EXPORT_SYMBOL_GPL(mce_setup);

No, mce_setup() is x86 specific and the CXL subsystem is CPU
architecture independent. My expectation is that CXL should translate
errors for edac similar to how the ACPI GHES code does it. See usage of
edac_raw_mc_handle_error() and memory_failure_queue().

Otherwise an MCE is a CPU consumption of poison event, and CXL is
reporting device-side discovery of poison.
Tony Luck Feb. 15, 2024, 1:19 a.m. UTC | #2
On Fri, Feb 09, 2024 at 07:54:15PM +0800, Shiyang Ruan wrote:
> If poison is detected(reported from cxl memdev), OS should be notified to
> handle it.  Introduce this function:
>   1. translate DPA to HPA;
>   2. construct a MCE instance; (TODO: more details need to be filled)
>   3. log it into MCE event queue;
> 
> After that, MCE mechanism can walk over its notifier chain to execute
> specific handlers.

This looks like a useful proof of concept patch to pass errors to all
the existing logging systems (console, mcelog, rasdaemon, EDAC). But
it's a bare minimum (just passing the address and dropping any other
interesting information about the error). I think we need something
more advanced that covers more CXL error types.

> Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
> ---
>  arch/x86/kernel/cpu/mce/core.c |  1 +
>  drivers/cxl/core/mbox.c        | 33 +++++++++++++++++++++++++++++++++
>  2 files changed, 34 insertions(+)
> 
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index bc39252bc54f..a64c0aceb7e0 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -131,6 +131,7 @@ void mce_setup(struct mce *m)
>  	m->ppin = cpu_data(m->extcpu).ppin;
>  	m->microcode = boot_cpu_data.microcode;
>  }
> +EXPORT_SYMBOL_GPL(mce_setup);
>  
>  DEFINE_PER_CPU(struct mce, injectm);
>  EXPORT_PER_CPU_SYMBOL_GPL(injectm);
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index 27166a411705..f9b6f50fbe80 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -4,6 +4,7 @@
>  #include <linux/debugfs.h>
>  #include <linux/ktime.h>
>  #include <linux/mutex.h>
> +#include <asm/mce.h>
>  #include <asm/unaligned.h>
>  #include <cxlpci.h>
>  #include <cxlmem.h>
> @@ -1290,6 +1291,38 @@ int cxl_set_timestamp(struct cxl_memdev_state *mds)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL);
>  
> +static void cxl_mem_report_poison(struct cxl_memdev *cxlmd,
> +				  struct cxl_poison_record *poison)
> +{
> +	struct mce m;
> +	u64 dpa = le64_to_cpu(poison->address) & CXL_POISON_START_MASK;
> +	u64 len = le64_to_cpu(poison->length), i;
> +	phys_addr_t phys_addr = cxl_memdev_dpa_to_hpa(cxlmd, dpa);
> +
> +	if (phys_addr)
> +		return;
> +
> +	/*
> +	 * Initialize struct mce.  Call preempt_disable() to avoid
> +	 * "BUG: using smp_processor_id() in preemptible" for now, not sure
> +	 * if this is a correct way.
> +	 */
> +	preempt_disable();
> +	mce_setup(&m);
> +	preempt_enable();
> +
> +	m.bank = -1;
> +	/* Fake a memory read error with unknown channel */
> +	m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV |
> +		   MCI_STATUS_MISCV | 0x9f;
> +	m.misc = (MCI_MISC_ADDR_PHYS << 6);
> +
> +	for (i = 0; i < len; i++) {
> +		m.addr = phys_addr++;
> +		mce_log(&m);

This loop looks wrong. What values do you expect for "len" (a.k.a.
poison->length)? Creating one log for each byte in the range will
be very noisy!

> +	}
> +}
> +
>  int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
>  		       struct cxl_region *cxlr)
>  {
> -- 
> 2.34.1

-Tony
Shiyang Ruan March 14, 2024, 3:23 p.m. UTC | #3
在 2024/2/10 14:46, Dan Williams 写道:
> Shiyang Ruan wrote:
>> If poison is detected(reported from cxl memdev), OS should be notified to
>> handle it.  Introduce this function:
>>    1. translate DPA to HPA;
>>    2. construct a MCE instance; (TODO: more details need to be filled)
>>    3. log it into MCE event queue;
>>
>> After that, MCE mechanism can walk over its notifier chain to execute
>> specific handlers.
>>
>> Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
>> ---
>>   arch/x86/kernel/cpu/mce/core.c |  1 +
>>   drivers/cxl/core/mbox.c        | 33 +++++++++++++++++++++++++++++++++
>>   2 files changed, 34 insertions(+)
>>
>> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
>> index bc39252bc54f..a64c0aceb7e0 100644
>> --- a/arch/x86/kernel/cpu/mce/core.c
>> +++ b/arch/x86/kernel/cpu/mce/core.c
>> @@ -131,6 +131,7 @@ void mce_setup(struct mce *m)
>>   	m->ppin = cpu_data(m->extcpu).ppin;
>>   	m->microcode = boot_cpu_data.microcode;
>>   }
>> +EXPORT_SYMBOL_GPL(mce_setup);
> 
> No, mce_setup() is x86 specific and the CXL subsystem is CPU
> architecture independent. My expectation is that CXL should translate
> errors for edac similar to how the ACPI GHES code does it. See usage of
> edac_raw_mc_handle_error() and memory_failure_queue().
> 
> Otherwise an MCE is a CPU consumption of poison event, and CXL is
> reporting device-side discovery of poison.

Yes, I misunderstood here.  I was mean to use MCE to finally call 
memory_failure(). I think memory_failure_queue() is what I need.

   void memory_failure_queue(unsigned long pfn, int flags)

But it can only queue one PFN at a time, we may need to make it support 
queuing a range of PFN.


--
Thanks,
Ruan.
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index bc39252bc54f..a64c0aceb7e0 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -131,6 +131,7 @@  void mce_setup(struct mce *m)
 	m->ppin = cpu_data(m->extcpu).ppin;
 	m->microcode = boot_cpu_data.microcode;
 }
+EXPORT_SYMBOL_GPL(mce_setup);
 
 DEFINE_PER_CPU(struct mce, injectm);
 EXPORT_PER_CPU_SYMBOL_GPL(injectm);
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 27166a411705..f9b6f50fbe80 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -4,6 +4,7 @@ 
 #include <linux/debugfs.h>
 #include <linux/ktime.h>
 #include <linux/mutex.h>
+#include <asm/mce.h>
 #include <asm/unaligned.h>
 #include <cxlpci.h>
 #include <cxlmem.h>
@@ -1290,6 +1291,38 @@  int cxl_set_timestamp(struct cxl_memdev_state *mds)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, CXL);
 
+static void cxl_mem_report_poison(struct cxl_memdev *cxlmd,
+				  struct cxl_poison_record *poison)
+{
+	struct mce m;
+	u64 dpa = le64_to_cpu(poison->address) & CXL_POISON_START_MASK;
+	u64 len = le64_to_cpu(poison->length), i;
+	phys_addr_t phys_addr = cxl_memdev_dpa_to_hpa(cxlmd, dpa);
+
+	if (phys_addr)
+		return;
+
+	/*
+	 * Initialize struct mce.  Call preempt_disable() to avoid
+	 * "BUG: using smp_processor_id() in preemptible" for now, not sure
+	 * if this is a correct way.
+	 */
+	preempt_disable();
+	mce_setup(&m);
+	preempt_enable();
+
+	m.bank = -1;
+	/* Fake a memory read error with unknown channel */
+	m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV |
+		   MCI_STATUS_MISCV | 0x9f;
+	m.misc = (MCI_MISC_ADDR_PHYS << 6);
+
+	for (i = 0; i < len; i++) {
+		m.addr = phys_addr++;
+		mce_log(&m);
+	}
+}
+
 int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
 		       struct cxl_region *cxlr)
 {