diff mbox series

[v10,2/6] arm64: add support for machine check error safe

Message ID 20240129134652.4004931-3-tongtiangen@huawei.com (mailing list archive)
State New, archived
Headers show
Series arm64: add machine check safe support | expand

Commit Message

Tong Tiangen Jan. 29, 2024, 1:46 p.m. UTC
For the arm64 kernel, when it processes hardware memory errors for
synchronize notifications(do_sea()), if the errors is consumed within the
kernel, the current processing is panic. However, it is not optimal.

Take uaccess for example, if the uaccess operation fails due to memory
error, only the user process will be affected. Killing the user process and
isolating the corrupt page is a better choice.

This patch only enable machine error check framework and adds an exception
fixup before the kernel panic in do_sea().

Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
---
 arch/arm64/Kconfig               |  1 +
 arch/arm64/include/asm/extable.h |  1 +
 arch/arm64/mm/extable.c          | 16 ++++++++++++++++
 arch/arm64/mm/fault.c            | 29 ++++++++++++++++++++++++++++-
 4 files changed, 46 insertions(+), 1 deletion(-)

Comments

Mark Rutland Jan. 29, 2024, 5:51 p.m. UTC | #1
On Mon, Jan 29, 2024 at 09:46:48PM +0800, Tong Tiangen wrote:
> For the arm64 kernel, when it processes hardware memory errors for
> synchronize notifications(do_sea()), if the errors is consumed within the
> kernel, the current processing is panic. However, it is not optimal.
> 
> Take uaccess for example, if the uaccess operation fails due to memory
> error, only the user process will be affected. Killing the user process and
> isolating the corrupt page is a better choice.
> 
> This patch only enable machine error check framework and adds an exception
> fixup before the kernel panic in do_sea().
> 
> Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
> ---
>  arch/arm64/Kconfig               |  1 +
>  arch/arm64/include/asm/extable.h |  1 +
>  arch/arm64/mm/extable.c          | 16 ++++++++++++++++
>  arch/arm64/mm/fault.c            | 29 ++++++++++++++++++++++++++++-
>  4 files changed, 46 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index aa7c1d435139..2cc34b5e7abb 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -20,6 +20,7 @@ config ARM64
>  	select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
>  	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
>  	select ARCH_HAS_CACHE_LINE_SIZE
> +	select ARCH_HAS_COPY_MC if ACPI_APEI_GHES
>  	select ARCH_HAS_CURRENT_STACK_POINTER
>  	select ARCH_HAS_DEBUG_VIRTUAL
>  	select ARCH_HAS_DEBUG_VM_PGTABLE
> diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h
> index 72b0e71cc3de..f80ebd0addfd 100644
> --- a/arch/arm64/include/asm/extable.h
> +++ b/arch/arm64/include/asm/extable.h
> @@ -46,4 +46,5 @@ bool ex_handler_bpf(const struct exception_table_entry *ex,
>  #endif /* !CONFIG_BPF_JIT */
>  
>  bool fixup_exception(struct pt_regs *regs);
> +bool fixup_exception_mc(struct pt_regs *regs);
>  #endif
> diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
> index 228d681a8715..478e639f8680 100644
> --- a/arch/arm64/mm/extable.c
> +++ b/arch/arm64/mm/extable.c
> @@ -76,3 +76,19 @@ bool fixup_exception(struct pt_regs *regs)
>  
>  	BUG();
>  }
> +
> +bool fixup_exception_mc(struct pt_regs *regs)

Can we please replace 'mc' with something like 'memory_error' ?

There's no "machine check" on arm64, and 'mc' is opaque regardless.

> +{
> +	const struct exception_table_entry *ex;
> +
> +	ex = search_exception_tables(instruction_pointer(regs));
> +	if (!ex)
> +		return false;
> +
> +	/*
> +	 * This is not complete, More Machine check safe extable type can
> +	 * be processed here.
> +	 */
> +
> +	return false;
> +}

As with my comment on the subsequenty patch, I'd much prefer that we handle
EX_TYPE_UACCESS_ERR_ZERO from the outset.



> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index 55f6455a8284..312932dc100b 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -730,6 +730,31 @@ static int do_bad(unsigned long far, unsigned long esr, struct pt_regs *regs)
>  	return 1; /* "fault" */
>  }
>  
> +static bool arm64_do_kernel_sea(unsigned long addr, unsigned int esr,
> +				     struct pt_regs *regs, int sig, int code)
> +{
> +	if (!IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC))
> +		return false;
> +
> +	if (user_mode(regs))
> +		return false;

This function is called "arm64_do_kernel_sea"; surely the caller should *never*
call this for a SEA taken from user mode?

> +
> +	if (apei_claim_sea(regs) < 0)
> +		return false;
> +
> +	if (!fixup_exception_mc(regs))
> +		return false;
> +
> +	if (current->flags & PF_KTHREAD)
> +		return true;

I think this needs a comment; why do we allow kthreads to go on, yet kill user
threads? What about helper threads (e.g. for io_uring)?

> +
> +	set_thread_esr(0, esr);

Why do we set the ESR to 0?

Mark.

> +	arm64_force_sig_fault(sig, code, addr,
> +		"Uncorrected memory error on access to user memory\n");
> +
> +	return true;
> +}
> +
>  static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
>  {
>  	const struct fault_info *inf;
> @@ -755,7 +780,9 @@ static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
>  		 */
>  		siaddr  = untagged_addr(far);
>  	}
> -	arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
> +
> +	if (!arm64_do_kernel_sea(siaddr, esr, regs, inf->sig, inf->code))
> +		arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
>  
>  	return 0;
>  }
> -- 
> 2.25.1
>
Tong Tiangen Jan. 30, 2024, 10:57 a.m. UTC | #2
在 2024/1/30 1:51, Mark Rutland 写道:
> On Mon, Jan 29, 2024 at 09:46:48PM +0800, Tong Tiangen wrote:
>> For the arm64 kernel, when it processes hardware memory errors for
>> synchronize notifications(do_sea()), if the errors is consumed within the
>> kernel, the current processing is panic. However, it is not optimal.
>>
>> Take uaccess for example, if the uaccess operation fails due to memory
>> error, only the user process will be affected. Killing the user process and
>> isolating the corrupt page is a better choice.
>>
>> This patch only enable machine error check framework and adds an exception
>> fixup before the kernel panic in do_sea().
>>
>> Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
>> ---
>>   arch/arm64/Kconfig               |  1 +
>>   arch/arm64/include/asm/extable.h |  1 +
>>   arch/arm64/mm/extable.c          | 16 ++++++++++++++++
>>   arch/arm64/mm/fault.c            | 29 ++++++++++++++++++++++++++++-
>>   4 files changed, 46 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index aa7c1d435139..2cc34b5e7abb 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -20,6 +20,7 @@ config ARM64
>>   	select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
>>   	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
>>   	select ARCH_HAS_CACHE_LINE_SIZE
>> +	select ARCH_HAS_COPY_MC if ACPI_APEI_GHES
>>   	select ARCH_HAS_CURRENT_STACK_POINTER
>>   	select ARCH_HAS_DEBUG_VIRTUAL
>>   	select ARCH_HAS_DEBUG_VM_PGTABLE
>> diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h
>> index 72b0e71cc3de..f80ebd0addfd 100644
>> --- a/arch/arm64/include/asm/extable.h
>> +++ b/arch/arm64/include/asm/extable.h
>> @@ -46,4 +46,5 @@ bool ex_handler_bpf(const struct exception_table_entry *ex,
>>   #endif /* !CONFIG_BPF_JIT */
>>   
>>   bool fixup_exception(struct pt_regs *regs);
>> +bool fixup_exception_mc(struct pt_regs *regs);
>>   #endif
>> diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
>> index 228d681a8715..478e639f8680 100644
>> --- a/arch/arm64/mm/extable.c
>> +++ b/arch/arm64/mm/extable.c
>> @@ -76,3 +76,19 @@ bool fixup_exception(struct pt_regs *regs)
>>   
>>   	BUG();
>>   }
>> +
>> +bool fixup_exception_mc(struct pt_regs *regs)
> 
> Can we please replace 'mc' with something like 'memory_error' ?
> 
> There's no "machine check" on arm64, and 'mc' is opaque regardless.

OK, It's more appropriate to use "memory_error" on arm64.

> 
>> +{
>> +	const struct exception_table_entry *ex;
>> +
>> +	ex = search_exception_tables(instruction_pointer(regs));
>> +	if (!ex)
>> +		return false;
>> +
>> +	/*
>> +	 * This is not complete, More Machine check safe extable type can
>> +	 * be processed here.
>> +	 */
>> +
>> +	return false;
>> +}
> 
> As with my comment on the subsequenty patch, I'd much prefer that we handle
> EX_TYPE_UACCESS_ERR_ZERO from the outset.

OK, In the next version, the two patches will be merged.

> 
> 
> 
>> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
>> index 55f6455a8284..312932dc100b 100644
>> --- a/arch/arm64/mm/fault.c
>> +++ b/arch/arm64/mm/fault.c
>> @@ -730,6 +730,31 @@ static int do_bad(unsigned long far, unsigned long esr, struct pt_regs *regs)
>>   	return 1; /* "fault" */
>>   }
>>   
>> +static bool arm64_do_kernel_sea(unsigned long addr, unsigned int esr,
>> +				     struct pt_regs *regs, int sig, int code)
>> +{
>> +	if (!IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC))
>> +		return false;
>> +
>> +	if (user_mode(regs))
>> +		return false;
> 
> This function is called "arm64_do_kernel_sea"; surely the caller should *never*
> call this for a SEA taken from user mode?

In do_sea(), the processing logic is as follows:
   do_sea()
   {
     [...]
     if (user_mode(regs) && apei_claim_sea(regs) == 0) {
        return 0;
     }
     [...]
     //[1]
     if (!arm64_do_kernel_sea()) {
        arm64_notify_die();
     }
   }

[1] user_mode() is still possible to go here,If user_mode() goes here,
  it indicates that the impact caused by the memory error cannot be
  processed correctly by apei_claim_sea().


In this case, only arm64_notify_die() can be used, This also maintains
the original logic of user_mode()'s processing.

> 
>> +
>> +	if (apei_claim_sea(regs) < 0)
>> +		return false;
>> +
>> +	if (!fixup_exception_mc(regs))
>> +		return false;
>> +
>> +	if (current->flags & PF_KTHREAD)
>> +		return true;
> 
> I think this needs a comment; why do we allow kthreads to go on, yet kill user
> threads? What about helper threads (e.g. for io_uring)?

If a memroy error occurs in the kernel thread, the problem is more
serious than that of the user thread. As a result, related kernel
functions, such as khugepaged, cannot run properly. kernel panic should
be a better choice at this time.

Therefore, the processing scope of this framework is limited to the user 
  thread.

> 
>> +
>> +	set_thread_esr(0, esr);
> 
> Why do we set the ESR to 0?

The purpose is to reuse the logic of arm64_notify_die() and set the 
following parameters before sending signals to users:
   current->thread.fault_address = 0;
   current->thread.fault_code = err;

I looked at the git log and found that the logic was added by this
commit:


9141300a5884 (“arm64: Provide read/write fault information in compat 
signal handlers”)

According to the description of commit message, the purpose seems to be
for aarch32.

Many thanks.
Tong.


> 
> Mark.
> 
>> +	arm64_force_sig_fault(sig, code, addr,
>> +		"Uncorrected memory error on access to user memory\n");
>> +
>> +	return true;
>> +}
>> +
>>   static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
>>   {
>>   	const struct fault_info *inf;
>> @@ -755,7 +780,9 @@ static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
>>   		 */
>>   		siaddr  = untagged_addr(far);
>>   	}
>> -	arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
>> +
>> +	if (!arm64_do_kernel_sea(siaddr, esr, regs, inf->sig, inf->code))
>> +		arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
>>   
>>   	return 0;
>>   }
>> -- 
>> 2.25.1
>>
> .
Mark Rutland Jan. 30, 2024, 1:07 p.m. UTC | #3
On Tue, Jan 30, 2024 at 06:57:24PM +0800, Tong Tiangen wrote:
> 在 2024/1/30 1:51, Mark Rutland 写道:
> > On Mon, Jan 29, 2024 at 09:46:48PM +0800, Tong Tiangen wrote:

> > > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> > > index 55f6455a8284..312932dc100b 100644
> > > --- a/arch/arm64/mm/fault.c
> > > +++ b/arch/arm64/mm/fault.c
> > > @@ -730,6 +730,31 @@ static int do_bad(unsigned long far, unsigned long esr, struct pt_regs *regs)
> > >   	return 1; /* "fault" */
> > >   }
> > > +static bool arm64_do_kernel_sea(unsigned long addr, unsigned int esr,
> > > +				     struct pt_regs *regs, int sig, int code)
> > > +{
> > > +	if (!IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC))
> > > +		return false;
> > > +
> > > +	if (user_mode(regs))
> > > +		return false;
> > 
> > This function is called "arm64_do_kernel_sea"; surely the caller should *never*
> > call this for a SEA taken from user mode?
> 
> In do_sea(), the processing logic is as follows:
>   do_sea()
>   {
>     [...]
>     if (user_mode(regs) && apei_claim_sea(regs) == 0) {
>        return 0;
>     }
>     [...]
>     //[1]
>     if (!arm64_do_kernel_sea()) {
>        arm64_notify_die();
>     }
>   }
> 
> [1] user_mode() is still possible to go here,If user_mode() goes here,
>  it indicates that the impact caused by the memory error cannot be
>  processed correctly by apei_claim_sea().
> 
> 
> In this case, only arm64_notify_die() can be used, This also maintains
> the original logic of user_mode()'s processing.

My point is that either:

(a) The name means that this should *only* be called for SEAs from a kernel
    context, and the caller should be responsible for ensuring that.

(b) The name is misleading, and the 'kernel' part should be removed from the
    name.

I prefer (a), and if you head down that route it's clear that you can get rid
of a bunch of redundant logic and remove the need for do_kernel_sea(), anyway,
e.g.

| static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
| {
|         const struct fault_info *inf = esr_to_fault_info(esr);
|         bool claimed = apei_claim_sea(regs) == 0;
|         unsigned long siaddr;
| 
|         if (claimed) {
|                 if (user_mode(regs)) {
|                         /*  
|                          * APEI claimed this as a firmware-first notification.
|                          * Some processing deferred to task_work before ret_to_user().
|                          */
|                         return 0;
|                 } else {
|                         /*
|                          * TODO: explain why this is correct.
|                          */
|                         if ((current->flags & PF_KTHREAD) &&
|                             fixup_exception_mc(regs))
|                                 return 0;
|                 }
|         }
| 
|         if (esr & ESR_ELx_FnV) {
|                 siaddr = 0;
|         } else {
|                 /*  
|                  * The architecture specifies that the tag bits of FAR_EL1 are
|                  * UNKNOWN for synchronous external aborts. Mask them out now
|                  * so that userspace doesn't see them.
|                  */
|                 siaddr  = untagged_addr(far);
|         }   
|         arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
| 
|         return 0;
| }

> > > +
> > > +	if (apei_claim_sea(regs) < 0)
> > > +		return false;
> > > +
> > > +	if (!fixup_exception_mc(regs))
> > > +		return false;
> > > +
> > > +	if (current->flags & PF_KTHREAD)
> > > +		return true;
> > 
> > I think this needs a comment; why do we allow kthreads to go on, yet kill user
> > threads? What about helper threads (e.g. for io_uring)?
> 
> If a memroy error occurs in the kernel thread, the problem is more
> serious than that of the user thread. As a result, related kernel
> functions, such as khugepaged, cannot run properly. kernel panic should
> be a better choice at this time.
> 
> Therefore, the processing scope of this framework is limited to the user
> thread.

That's reasonable, but needs to be explained in a comment.

Also, as above, I think you haven't conisderd helper threads (e.g. io_uring),
which don't have PF_KTHREAD set but do have PF_USER_WORKER set. I suspect those
need the same treatment as kthreads.

> > > +	set_thread_esr(0, esr);
> > 
> > Why do we set the ESR to 0?
> 
> The purpose is to reuse the logic of arm64_notify_die() and set the
> following parameters before sending signals to users:
>   current->thread.fault_address = 0;
>   current->thread.fault_code = err;

Ok, but there's no need to open-code that.

As per my above example, please continue to use the existing call to
arm64_notify_die() rather than open-coding bits of it.

Mark.
Tong Tiangen Jan. 30, 2024, 1:22 p.m. UTC | #4
在 2024/1/30 21:07, Mark Rutland 写道:
> On Tue, Jan 30, 2024 at 06:57:24PM +0800, Tong Tiangen wrote:
>> 在 2024/1/30 1:51, Mark Rutland 写道:
>>> On Mon, Jan 29, 2024 at 09:46:48PM +0800, Tong Tiangen wrote:
> 
>>>> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
>>>> index 55f6455a8284..312932dc100b 100644
>>>> --- a/arch/arm64/mm/fault.c
>>>> +++ b/arch/arm64/mm/fault.c
>>>> @@ -730,6 +730,31 @@ static int do_bad(unsigned long far, unsigned long esr, struct pt_regs *regs)
>>>>    	return 1; /* "fault" */
>>>>    }
>>>> +static bool arm64_do_kernel_sea(unsigned long addr, unsigned int esr,
>>>> +				     struct pt_regs *regs, int sig, int code)
>>>> +{
>>>> +	if (!IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC))
>>>> +		return false;
>>>> +
>>>> +	if (user_mode(regs))
>>>> +		return false;
>>>
>>> This function is called "arm64_do_kernel_sea"; surely the caller should *never*
>>> call this for a SEA taken from user mode?
>>
>> In do_sea(), the processing logic is as follows:
>>    do_sea()
>>    {
>>      [...]
>>      if (user_mode(regs) && apei_claim_sea(regs) == 0) {
>>         return 0;
>>      }
>>      [...]
>>      //[1]
>>      if (!arm64_do_kernel_sea()) {
>>         arm64_notify_die();
>>      }
>>    }
>>
>> [1] user_mode() is still possible to go here,If user_mode() goes here,
>>   it indicates that the impact caused by the memory error cannot be
>>   processed correctly by apei_claim_sea().
>>
>>
>> In this case, only arm64_notify_die() can be used, This also maintains
>> the original logic of user_mode()'s processing.
> 
> My point is that either:
> 
> (a) The name means that this should *only* be called for SEAs from a kernel
>      context, and the caller should be responsible for ensuring that.
> 
> (b) The name is misleading, and the 'kernel' part should be removed from the
>      name.
> 
> I prefer (a), and if you head down that route it's clear that you can get rid
> of a bunch of redundant logic and remove the need for do_kernel_sea(), anyway,
> e.g.
> 
> | static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
> | {
> |         const struct fault_info *inf = esr_to_fault_info(esr);
> |         bool claimed = apei_claim_sea(regs) == 0;
> |         unsigned long siaddr;
> |
> |         if (claimed) {
> |                 if (user_mode(regs)) {
> |                         /*
> |                          * APEI claimed this as a firmware-first notification.
> |                          * Some processing deferred to task_work before ret_to_user().
> |                          */
> |                         return 0;
> |                 } else {
> |                         /*
> |                          * TODO: explain why this is correct.
> |                          */
> |                         if ((current->flags & PF_KTHREAD) &&
> |                             fixup_exception_mc(regs))
> |                                 return 0;
> |                 }
> |         }

This code seems to be a bit more concise and avoids misleading function 
names, which I'll use in the next version:)

> |
> |         if (esr & ESR_ELx_FnV) {
> |                 siaddr = 0;
> |         } else {
> |                 /*
> |                  * The architecture specifies that the tag bits of FAR_EL1 are
> |                  * UNKNOWN for synchronous external aborts. Mask them out now
> |                  * so that userspace doesn't see them.
> |                  */
> |                 siaddr  = untagged_addr(far);
> |         }
> |         arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
> |
> |         return 0;
> | }
> 
>>>> +
>>>> +	if (apei_claim_sea(regs) < 0)
>>>> +		return false;
>>>> +
>>>> +	if (!fixup_exception_mc(regs))
>>>> +		return false;
>>>> +
>>>> +	if (current->flags & PF_KTHREAD)
>>>> +		return true;
>>>
>>> I think this needs a comment; why do we allow kthreads to go on, yet kill user
>>> threads? What about helper threads (e.g. for io_uring)?
>>
>> If a memroy error occurs in the kernel thread, the problem is more
>> serious than that of the user thread. As a result, related kernel
>> functions, such as khugepaged, cannot run properly. kernel panic should
>> be a better choice at this time.
>>
>> Therefore, the processing scope of this framework is limited to the user
>> thread.
> 
> That's reasonable, but needs to be explained in a comment.
> 
> Also, as above, I think you haven't conisderd helper threads (e.g. io_uring),
> which don't have PF_KTHREAD set but do have PF_USER_WORKER set. I suspect those
> need the same treatment as kthreads.

Okay, I'm going to investigate PF_USER_WORKER.

> 
>>>> +	set_thread_esr(0, esr);
>>>
>>> Why do we set the ESR to 0?
>>
>> The purpose is to reuse the logic of arm64_notify_die() and set the
>> following parameters before sending signals to users:
>>    current->thread.fault_address = 0;
>>    current->thread.fault_code = err;
> 
> Ok, but there's no need to open-code that.
> 
> As per my above example, please continue to use the existing call to
> arm64_notify_die() rather than open-coding bits of it.

OK.

Many thanks.
Tong.
> 
> Mark.
> .
diff mbox series

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index aa7c1d435139..2cc34b5e7abb 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -20,6 +20,7 @@  config ARM64
 	select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
 	select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
 	select ARCH_HAS_CACHE_LINE_SIZE
+	select ARCH_HAS_COPY_MC if ACPI_APEI_GHES
 	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEBUG_VM_PGTABLE
diff --git a/arch/arm64/include/asm/extable.h b/arch/arm64/include/asm/extable.h
index 72b0e71cc3de..f80ebd0addfd 100644
--- a/arch/arm64/include/asm/extable.h
+++ b/arch/arm64/include/asm/extable.h
@@ -46,4 +46,5 @@  bool ex_handler_bpf(const struct exception_table_entry *ex,
 #endif /* !CONFIG_BPF_JIT */
 
 bool fixup_exception(struct pt_regs *regs);
+bool fixup_exception_mc(struct pt_regs *regs);
 #endif
diff --git a/arch/arm64/mm/extable.c b/arch/arm64/mm/extable.c
index 228d681a8715..478e639f8680 100644
--- a/arch/arm64/mm/extable.c
+++ b/arch/arm64/mm/extable.c
@@ -76,3 +76,19 @@  bool fixup_exception(struct pt_regs *regs)
 
 	BUG();
 }
+
+bool fixup_exception_mc(struct pt_regs *regs)
+{
+	const struct exception_table_entry *ex;
+
+	ex = search_exception_tables(instruction_pointer(regs));
+	if (!ex)
+		return false;
+
+	/*
+	 * This is not complete, More Machine check safe extable type can
+	 * be processed here.
+	 */
+
+	return false;
+}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 55f6455a8284..312932dc100b 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -730,6 +730,31 @@  static int do_bad(unsigned long far, unsigned long esr, struct pt_regs *regs)
 	return 1; /* "fault" */
 }
 
+static bool arm64_do_kernel_sea(unsigned long addr, unsigned int esr,
+				     struct pt_regs *regs, int sig, int code)
+{
+	if (!IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC))
+		return false;
+
+	if (user_mode(regs))
+		return false;
+
+	if (apei_claim_sea(regs) < 0)
+		return false;
+
+	if (!fixup_exception_mc(regs))
+		return false;
+
+	if (current->flags & PF_KTHREAD)
+		return true;
+
+	set_thread_esr(0, esr);
+	arm64_force_sig_fault(sig, code, addr,
+		"Uncorrected memory error on access to user memory\n");
+
+	return true;
+}
+
 static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
 {
 	const struct fault_info *inf;
@@ -755,7 +780,9 @@  static int do_sea(unsigned long far, unsigned long esr, struct pt_regs *regs)
 		 */
 		siaddr  = untagged_addr(far);
 	}
-	arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
+
+	if (!arm64_do_kernel_sea(siaddr, esr, regs, inf->sig, inf->code))
+		arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
 
 	return 0;
 }