diff mbox series

[v2] hw/poison: Add in-use hugepage filter judgement and avoid filter page impact on mce handler

Message ID 20220217030038.1552124-1-luofei@unicloud.com (mailing list archive)
State New
Headers show
Series [v2] hw/poison: Add in-use hugepage filter judgement and avoid filter page impact on mce handler | expand

Commit Message

luofei Feb. 17, 2022, 3 a.m. UTC
After successfully obtaining the reference count of the huge
page, it is still necessary to call hwpoison_filter() to make a
filter judgement, otherwise the filter hugepage will be unmaped
and the related process may be killed.

Also when the huge page meets the filter conditions, it should
not be regarded as successful memory_failure() processing for
mce handler, but should return a value to inform the caller,
otherwise the caller regards the error page has been identified
and isolated, which may lead to calling set_mce_nospec() to change
page attribute, etc.

Signed-off-by: luofei <luofei@unicloud.com>
---
 arch/x86/kernel/cpu/mce/core.c | 22 +++++++++++-----------
 include/linux/mm.h             |  1 +
 mm/memory-failure.c            | 25 +++++++++++++++++++++++--
 3 files changed, 35 insertions(+), 13 deletions(-)

Comments

HORIGUCHI NAOYA(堀口 直也) Feb. 18, 2022, 3:08 a.m. UTC | #1
On Wed, Feb 16, 2022 at 10:00:38PM -0500, luofei wrote:
> After successfully obtaining the reference count of the huge
> page, it is still necessary to call hwpoison_filter() to make a
> filter judgement, otherwise the filter hugepage will be unmaped
> and the related process may be killed.
> 
> Also when the huge page meets the filter conditions, it should
> not be regarded as successful memory_failure() processing for
> mce handler, but should return a value to inform the caller,
> otherwise the caller regards the error page has been identified
> and isolated, which may lead to calling set_mce_nospec() to change
> page attribute, etc.
> 
> Signed-off-by: luofei <luofei@unicloud.com>

This patch seems to do two separate things (introducing MF_MCE_HANDLE,
and adding hwpoison_filter() in memory_failure_hugetlb()), so could you
separate the patch into two?

> ---
>  arch/x86/kernel/cpu/mce/core.c | 22 +++++++++++-----------
>  include/linux/mm.h             |  1 +
>  mm/memory-failure.c            | 25 +++++++++++++++++++++++--
>  3 files changed, 35 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 5818b837fd4d..c2b99c60225f 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -612,7 +612,7 @@ static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
>  		return NOTIFY_DONE;
>  
>  	pfn = mce->addr >> PAGE_SHIFT;
> -	if (!memory_failure(pfn, 0)) {
> +	if (!memory_failure(pfn, MF_MCE_HANDLE)) {
>  		set_mce_nospec(pfn, whole_page(mce));
>  		mce->kflags |= MCE_HANDLED_UC;
>  	}
> @@ -1286,7 +1286,7 @@ static void kill_me_now(struct callback_head *ch)
>  static void kill_me_maybe(struct callback_head *cb)
>  {
>  	struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
> -	int flags = MF_ACTION_REQUIRED;
> +	int flags = MF_ACTION_REQUIRED | MF_MCE_HANDLE;
>  	int ret;
>  
>  	p->mce_count = 0;
> @@ -1300,14 +1300,14 @@ static void kill_me_maybe(struct callback_head *cb)
>  		set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
>  		sync_core();
>  		return;
> -	}
> -
> -	/*
> -	 * -EHWPOISON from memory_failure() means that it already sent SIGBUS
> -	 * to the current process with the proper error info, so no need to
> -	 * send SIGBUS here again.
> -	 */
> -	if (ret == -EHWPOISON)
> +	} else if (ret == -EHWPOISON || ret == 1)
> +		/*
> +		 * -EHWPOISON from memory_failure() means that it already sent SIGBUS
> +		 * to the current process with the proper error info, so no need to
> +		 * send SIGBUS here again.
> +		 *
> +		 * 1 means it's a filter page, no need to deal with.
> +		 */

The new return code 1 seems to be handled in the same manner as -EHWPOISON,
so how about simply using -EHWPOISON as return code for the new case?
Then, the meaning of -EHWPOISON at this context would change like below:

 	/*
-	 * -EHWPOISON from memory_failure() means that it already sent SIGBUS
-	 * to the current process with the proper error info, so no need to
-	 * send SIGBUS here again.
+        * -EHWPOISON from memory_failure() means that memory_failure() did
+	 * not handle the error event for the following reason:
+	 *   - SIGBUS has already been sent to the current process with the
+	 *     proper error info, or
+	 *   - hwpoison_filter() filtered the event,
+	 * so no need to deal with it more.
 	 */


Thanks,
Naoya Horiguchi

>  		return;
>  
>  	pr_err("Memory error not recovered");
> @@ -1320,7 +1320,7 @@ static void kill_me_never(struct callback_head *cb)
>  
>  	p->mce_count = 0;
>  	pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
> -	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
> +	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, MF_MCE_HANDLE))
>  		set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
>  }
>  
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 213cc569b192..f4703f948e9a 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3188,6 +3188,7 @@ enum mf_flags {
>  	MF_MUST_KILL = 1 << 2,
>  	MF_SOFT_OFFLINE = 1 << 3,
>  	MF_UNPOISON = 1 << 4,
> +	MF_MCE_HANDLE = 1 << 5,
>  };
>  extern int memory_failure(unsigned long pfn, int flags);
>  extern void memory_failure_queue(unsigned long pfn, int flags);
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 97a9ed8f87a9..1a0bd91a685b 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1526,7 +1526,10 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
>  				if (TestClearPageHWPoison(head))
>  					num_poisoned_pages_dec();
>  				unlock_page(head);
> -				return 0;
> +				if (flags & MF_MCE_HANDLE)
> +					return 1;
> +				else
> +					return 0;
>  			}
>  			unlock_page(head);
>  			res = MF_FAILED;
> @@ -1545,6 +1548,17 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
>  	lock_page(head);
>  	page_flags = head->flags;
>  
> +	if (hwpoison_filter(p)) {
> +		if (TestClearPageHWPoison(head))
> +			num_poisoned_pages_dec();
> +		put_page(p);
> +		if (flags & MF_MCE_HANDLE)
> +			res = 1;
> +		else
> +			res = 0;
> +		goto out;
> +	}
> +
>  	/*
>  	 * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
>  	 * simply disable it. In order to make it work properly, we need
> @@ -1613,7 +1627,10 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
>  		goto out;
>  
>  	if (hwpoison_filter(page)) {
> -		rc = 0;
> +		if (flags & MF_MCE_HANDLE)
> +			rc = 1;
> +		else
> +			rc = 0;
>  		goto unlock;
>  	}
>  
> @@ -1837,6 +1854,10 @@ int memory_failure(unsigned long pfn, int flags)
>  			num_poisoned_pages_dec();
>  		unlock_page(p);
>  		put_page(p);
> +		if (flags & MF_MCE_HANDLE)
> +			res = 1;
> +		else
> +			res = 0;
>  		goto unlock_mutex;
>  	}
>  
> -- 
> 2.27.0
luofei Feb. 18, 2022, 6:30 a.m. UTC | #2
>> After successfully obtaining the reference count of the huge
>> page, it is still necessary to call hwpoison_filter() to make a
>> filter judgement, otherwise the filter hugepage will be unmaped
>> and the related process may be killed.
>>
>> Also when the huge page meets the filter conditions, it should
>> not be regarded as successful memory_failure() processing for
>> mce handler, but should return a value to inform the caller,
>> otherwise the caller regards the error page has been identified
>> and isolated, which may lead to calling set_mce_nospec() to change
>> page attribute, etc.
>>
>> Signed-off-by: luofei <luofei@unicloud.com>
>
>This patch seems to do two separate things (introducing MF_MCE_HANDLE,
>and adding hwpoison_filter() in memory_failure_hugetlb()), so could you
>separate the patch into two?

Yes, these two things are not very related, I will submit two patches to

describe them separatley. :)

>> -
>> -     /*
>> -      * -EHWPOISON from memory_failure() means that it already sent SIGBUS
>> -      * to the current process with the proper error info, so no need to
>> -      * send SIGBUS here again.
>> -      */
>> -     if (ret == -EHWPOISON)
>> +     } else if (ret == -EHWPOISON || ret == 1)
>> +             /*
>> +              * -EHWPOISON from memory_failure() means that it already sent SIGBUS
>> +              * to the current process with the proper error info, so no need to
>> +              * send SIGBUS here again.
>> +              *
>> +              * 1 means it's a filter page, no need to deal with.
>> +              */
>
>The new return code 1 seems to be handled in the same manner as -EHWPOISON,
>so how about simply using -EHWPOISON as return code for the new case?
>Then, the meaning of -EHWPOISON at this context would change like below:
>
>        /*
>-        * -EHWPOISON from memory_failure() means that it already sent SIGBUS
>-        * to the current process with the proper error info, so no need to
>-        * send SIGBUS here again.
>+        * -EHWPOISON from memory_failure() means that memory_failure() did
>+        * not handle the error event for the following reason:
>+        *   - SIGBUS has already been sent to the current process with the
>+        *     proper error info, or
>+        *   - hwpoison_filter() filtered the event,
>+        * so no need to deal with it more.
>          */


Yes, here -EHWPOISON can represent the same case, I will just use -EHWPOISON

to simplify:)


Thanks
diff mbox series

Patch

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5818b837fd4d..c2b99c60225f 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -612,7 +612,7 @@  static int uc_decode_notifier(struct notifier_block *nb, unsigned long val,
 		return NOTIFY_DONE;
 
 	pfn = mce->addr >> PAGE_SHIFT;
-	if (!memory_failure(pfn, 0)) {
+	if (!memory_failure(pfn, MF_MCE_HANDLE)) {
 		set_mce_nospec(pfn, whole_page(mce));
 		mce->kflags |= MCE_HANDLED_UC;
 	}
@@ -1286,7 +1286,7 @@  static void kill_me_now(struct callback_head *ch)
 static void kill_me_maybe(struct callback_head *cb)
 {
 	struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me);
-	int flags = MF_ACTION_REQUIRED;
+	int flags = MF_ACTION_REQUIRED | MF_MCE_HANDLE;
 	int ret;
 
 	p->mce_count = 0;
@@ -1300,14 +1300,14 @@  static void kill_me_maybe(struct callback_head *cb)
 		set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
 		sync_core();
 		return;
-	}
-
-	/*
-	 * -EHWPOISON from memory_failure() means that it already sent SIGBUS
-	 * to the current process with the proper error info, so no need to
-	 * send SIGBUS here again.
-	 */
-	if (ret == -EHWPOISON)
+	} else if (ret == -EHWPOISON || ret == 1)
+		/*
+		 * -EHWPOISON from memory_failure() means that it already sent SIGBUS
+		 * to the current process with the proper error info, so no need to
+		 * send SIGBUS here again.
+		 *
+		 * 1 means it's a filter page, no need to deal with.
+		 */
 		return;
 
 	pr_err("Memory error not recovered");
@@ -1320,7 +1320,7 @@  static void kill_me_never(struct callback_head *cb)
 
 	p->mce_count = 0;
 	pr_err("Kernel accessed poison in user space at %llx\n", p->mce_addr);
-	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, 0))
+	if (!memory_failure(p->mce_addr >> PAGE_SHIFT, MF_MCE_HANDLE))
 		set_mce_nospec(p->mce_addr >> PAGE_SHIFT, p->mce_whole_page);
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 213cc569b192..f4703f948e9a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3188,6 +3188,7 @@  enum mf_flags {
 	MF_MUST_KILL = 1 << 2,
 	MF_SOFT_OFFLINE = 1 << 3,
 	MF_UNPOISON = 1 << 4,
+	MF_MCE_HANDLE = 1 << 5,
 };
 extern int memory_failure(unsigned long pfn, int flags);
 extern void memory_failure_queue(unsigned long pfn, int flags);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 97a9ed8f87a9..1a0bd91a685b 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1526,7 +1526,10 @@  static int memory_failure_hugetlb(unsigned long pfn, int flags)
 				if (TestClearPageHWPoison(head))
 					num_poisoned_pages_dec();
 				unlock_page(head);
-				return 0;
+				if (flags & MF_MCE_HANDLE)
+					return 1;
+				else
+					return 0;
 			}
 			unlock_page(head);
 			res = MF_FAILED;
@@ -1545,6 +1548,17 @@  static int memory_failure_hugetlb(unsigned long pfn, int flags)
 	lock_page(head);
 	page_flags = head->flags;
 
+	if (hwpoison_filter(p)) {
+		if (TestClearPageHWPoison(head))
+			num_poisoned_pages_dec();
+		put_page(p);
+		if (flags & MF_MCE_HANDLE)
+			res = 1;
+		else
+			res = 0;
+		goto out;
+	}
+
 	/*
 	 * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so
 	 * simply disable it. In order to make it work properly, we need
@@ -1613,7 +1627,10 @@  static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 		goto out;
 
 	if (hwpoison_filter(page)) {
-		rc = 0;
+		if (flags & MF_MCE_HANDLE)
+			rc = 1;
+		else
+			rc = 0;
 		goto unlock;
 	}
 
@@ -1837,6 +1854,10 @@  int memory_failure(unsigned long pfn, int flags)
 			num_poisoned_pages_dec();
 		unlock_page(p);
 		put_page(p);
+		if (flags & MF_MCE_HANDLE)
+			res = 1;
+		else
+			res = 0;
 		goto unlock_mutex;
 	}