diff mbox series

[v2,1/3] mm/memory-failure.c: fix race with changing page compound again

Message ID 20220312074613.4798-2-linmiaohe@huawei.com (mailing list archive)
State New
Headers show
Series A few fixup patches for memory failure | expand

Commit Message

Miaohe Lin March 12, 2022, 7:46 a.m. UTC
There is a race window where we got the compound_head, the hugetlb page
could be freed to buddy, or even changed to another compound page just
before we try to get hwpoison page. Think about the below race window:
  CPU 1					  CPU 2
  memory_failure_hugetlb
  struct page *head = compound_head(p);
					  hugetlb page might be freed to
					  buddy, or even changed to another
					  compound page.

  get_hwpoison_page -- page is not what we want now...

If this race happens, just bail out. Also MF_MSG_DIFFERENT_PAGE_SIZE is
introduced to record this event.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
---
 include/linux/mm.h      |  1 +
 include/ras/ras_event.h |  1 +
 mm/memory-failure.c     | 12 ++++++++++++
 3 files changed, 14 insertions(+)

Comments

HORIGUCHI NAOYA(堀口 直也) March 13, 2022, 11:41 p.m. UTC | #1
On Sat, Mar 12, 2022 at 03:46:11PM +0800, Miaohe Lin wrote:
> There is a race window where we got the compound_head, the hugetlb page
> could be freed to buddy, or even changed to another compound page just
> before we try to get hwpoison page. Think about the below race window:
>   CPU 1					  CPU 2
>   memory_failure_hugetlb
>   struct page *head = compound_head(p);
> 					  hugetlb page might be freed to
> 					  buddy, or even changed to another
> 					  compound page.
> 
>   get_hwpoison_page -- page is not what we want now...
> 
> If this race happens, just bail out. Also MF_MSG_DIFFERENT_PAGE_SIZE is
> introduced to record this event.
> 
> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
> ---
>  include/linux/mm.h      |  1 +
>  include/ras/ras_event.h |  1 +
>  mm/memory-failure.c     | 12 ++++++++++++
>  3 files changed, 14 insertions(+)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index c9bada4096ac..ef98cff2b253 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3253,6 +3253,7 @@ enum mf_action_page_type {
>  	MF_MSG_BUDDY,
>  	MF_MSG_DAX,
>  	MF_MSG_UNSPLIT_THP,
> +	MF_MSG_DIFFERENT_PAGE_SIZE,
>  	MF_MSG_UNKNOWN,
>  };
>  
> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
> index d0337a41141c..1e694fd239b9 100644
> --- a/include/ras/ras_event.h
> +++ b/include/ras/ras_event.h
> @@ -374,6 +374,7 @@ TRACE_EVENT(aer_event,
>  	EM ( MF_MSG_BUDDY, "free buddy page" )				\
>  	EM ( MF_MSG_DAX, "dax page" )					\
>  	EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" )			\
> +	EM ( MF_MSG_DIFFERENT_PAGE_SIZE, "different page size" )	\
>  	EMe ( MF_MSG_UNKNOWN, "unknown page" )
>  
>  /*
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 5444a8ef4867..dabecd87ad3f 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -733,6 +733,7 @@ static const char * const action_page_types[] = {
>  	[MF_MSG_BUDDY]			= "free buddy page",
>  	[MF_MSG_DAX]			= "dax page",
>  	[MF_MSG_UNSPLIT_THP]		= "unsplit thp",
> +	[MF_MSG_DIFFERENT_PAGE_SIZE]	= "different page size",
>  	[MF_MSG_UNKNOWN]		= "unknown page",
>  };
>  
> @@ -1534,6 +1535,17 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
>  	}
>  
>  	lock_page(head);
> +
> +	/**

This comment section starting with '/**' is considered as kernel-doc comment,
maybe that's not expected because it just describes an implementation detail.
So normal comment section with '/*' would be better.

Otherwise, looks good to me.

Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>

> +	 * The page could have changed compound pages due to race window.
> +	 * If this happens just bail out.
> +	 */
> +	if (!PageHuge(p) || compound_head(p) != head) {
> +		action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED);
> +		res = -EBUSY;
> +		goto out;
> +	}
> +
>  	page_flags = head->flags;
>  
>  	if (hwpoison_filter(p)) {
> -- 
> 2.23.0
Miaohe Lin March 14, 2022, 1:51 a.m. UTC | #2
On 2022/3/14 7:41, HORIGUCHI NAOYA(堀口 直也) wrote:
> On Sat, Mar 12, 2022 at 03:46:11PM +0800, Miaohe Lin wrote:
>> There is a race window where we got the compound_head, the hugetlb page
>> could be freed to buddy, or even changed to another compound page just
>> before we try to get hwpoison page. Think about the below race window:
>>   CPU 1					  CPU 2
>>   memory_failure_hugetlb
>>   struct page *head = compound_head(p);
>> 					  hugetlb page might be freed to
>> 					  buddy, or even changed to another
>> 					  compound page.
>>
>>   get_hwpoison_page -- page is not what we want now...
>>
>> If this race happens, just bail out. Also MF_MSG_DIFFERENT_PAGE_SIZE is
>> introduced to record this event.
>>
>> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
>> ---
>>  include/linux/mm.h      |  1 +
>>  include/ras/ras_event.h |  1 +
>>  mm/memory-failure.c     | 12 ++++++++++++
>>  3 files changed, 14 insertions(+)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index c9bada4096ac..ef98cff2b253 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -3253,6 +3253,7 @@ enum mf_action_page_type {
>>  	MF_MSG_BUDDY,
>>  	MF_MSG_DAX,
>>  	MF_MSG_UNSPLIT_THP,
>> +	MF_MSG_DIFFERENT_PAGE_SIZE,
>>  	MF_MSG_UNKNOWN,
>>  };
>>  
>> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
>> index d0337a41141c..1e694fd239b9 100644
>> --- a/include/ras/ras_event.h
>> +++ b/include/ras/ras_event.h
>> @@ -374,6 +374,7 @@ TRACE_EVENT(aer_event,
>>  	EM ( MF_MSG_BUDDY, "free buddy page" )				\
>>  	EM ( MF_MSG_DAX, "dax page" )					\
>>  	EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" )			\
>> +	EM ( MF_MSG_DIFFERENT_PAGE_SIZE, "different page size" )	\
>>  	EMe ( MF_MSG_UNKNOWN, "unknown page" )
>>  
>>  /*
>> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
>> index 5444a8ef4867..dabecd87ad3f 100644
>> --- a/mm/memory-failure.c
>> +++ b/mm/memory-failure.c
>> @@ -733,6 +733,7 @@ static const char * const action_page_types[] = {
>>  	[MF_MSG_BUDDY]			= "free buddy page",
>>  	[MF_MSG_DAX]			= "dax page",
>>  	[MF_MSG_UNSPLIT_THP]		= "unsplit thp",
>> +	[MF_MSG_DIFFERENT_PAGE_SIZE]	= "different page size",
>>  	[MF_MSG_UNKNOWN]		= "unknown page",
>>  };
>>  
>> @@ -1534,6 +1535,17 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
>>  	}
>>  
>>  	lock_page(head);
>> +
>> +	/**
> 
> This comment section starting with '/**' is considered as kernel-doc comment,
> maybe that's not expected because it just describes an implementation detail.
> So normal comment section with '/*' would be better.
> 

I see. Will change to use "/*".

> Otherwise, looks good to me.
> 
> Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>

Many thanks for review and comment.

> 
>> +	 * The page could have changed compound pages due to race window.
>> +	 * If this happens just bail out.
>> +	 */
>> +	if (!PageHuge(p) || compound_head(p) != head) {
>> +		action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED);
>> +		res = -EBUSY;
>> +		goto out;
>> +	}
>> +
>>  	page_flags = head->flags;
>>  
>>  	if (hwpoison_filter(p)) {
>> -- 
>> 2.23.0
Mike Kravetz March 14, 2022, 6:20 p.m. UTC | #3
On 3/11/22 23:46, Miaohe Lin wrote:
> There is a race window where we got the compound_head, the hugetlb page
> could be freed to buddy, or even changed to another compound page just
> before we try to get hwpoison page. Think about the below race window:
>   CPU 1					  CPU 2
>   memory_failure_hugetlb
>   struct page *head = compound_head(p);
> 					  hugetlb page might be freed to
> 					  buddy, or even changed to another
> 					  compound page.
> 
>   get_hwpoison_page -- page is not what we want now...
> 
> If this race happens, just bail out. Also MF_MSG_DIFFERENT_PAGE_SIZE is
> introduced to record this event.
> 
> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
> ---
>  include/linux/mm.h      |  1 +
>  include/ras/ras_event.h |  1 +
>  mm/memory-failure.c     | 12 ++++++++++++
>  3 files changed, 14 insertions(+)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index c9bada4096ac..ef98cff2b253 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3253,6 +3253,7 @@ enum mf_action_page_type {
>  	MF_MSG_BUDDY,
>  	MF_MSG_DAX,
>  	MF_MSG_UNSPLIT_THP,
> +	MF_MSG_DIFFERENT_PAGE_SIZE,
>  	MF_MSG_UNKNOWN,
>  };
>  
> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
> index d0337a41141c..1e694fd239b9 100644
> --- a/include/ras/ras_event.h
> +++ b/include/ras/ras_event.h
> @@ -374,6 +374,7 @@ TRACE_EVENT(aer_event,
>  	EM ( MF_MSG_BUDDY, "free buddy page" )				\
>  	EM ( MF_MSG_DAX, "dax page" )					\
>  	EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" )			\
> +	EM ( MF_MSG_DIFFERENT_PAGE_SIZE, "different page size" )	\
>  	EMe ( MF_MSG_UNKNOWN, "unknown page" )
>  
>  /*
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 5444a8ef4867..dabecd87ad3f 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -733,6 +733,7 @@ static const char * const action_page_types[] = {
>  	[MF_MSG_BUDDY]			= "free buddy page",
>  	[MF_MSG_DAX]			= "dax page",
>  	[MF_MSG_UNSPLIT_THP]		= "unsplit thp",
> +	[MF_MSG_DIFFERENT_PAGE_SIZE]	= "different page size",
>  	[MF_MSG_UNKNOWN]		= "unknown page",
>  };
>  
> @@ -1534,6 +1535,17 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
>  	}
>  
>  	lock_page(head);
> +
> +	/**
> +	 * The page could have changed compound pages due to race window.
> +	 * If this happens just bail out.
> +	 */
> +	if (!PageHuge(p) || compound_head(p) != head) {
> +		action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED);
> +		res = -EBUSY;

We have discussed this race in other versions of the patch.  When we encounter
the race, we have likely marked poison on the wrong page.  Correct?

Instead of printing a "different page size", would it be better to perhaps:
- Print a message that wrong page may be marked for poison?
- Clear the poison flag in the "head page" previously set?
Miaohe Lin March 15, 2022, 2:19 p.m. UTC | #4
On 2022/3/15 2:20, Mike Kravetz wrote:
> On 3/11/22 23:46, Miaohe Lin wrote:
>> There is a race window where we got the compound_head, the hugetlb page
>> could be freed to buddy, or even changed to another compound page just
>> before we try to get hwpoison page. Think about the below race window:
>>   CPU 1					  CPU 2
>>   memory_failure_hugetlb
>>   struct page *head = compound_head(p);
>> 					  hugetlb page might be freed to
>> 					  buddy, or even changed to another
>> 					  compound page.
>>
>>   get_hwpoison_page -- page is not what we want now...
>>
>> If this race happens, just bail out. Also MF_MSG_DIFFERENT_PAGE_SIZE is
>> introduced to record this event.
>>
>> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
>> ---
>>  include/linux/mm.h      |  1 +
>>  include/ras/ras_event.h |  1 +
>>  mm/memory-failure.c     | 12 ++++++++++++
>>  3 files changed, 14 insertions(+)
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index c9bada4096ac..ef98cff2b253 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -3253,6 +3253,7 @@ enum mf_action_page_type {
>>  	MF_MSG_BUDDY,
>>  	MF_MSG_DAX,
>>  	MF_MSG_UNSPLIT_THP,
>> +	MF_MSG_DIFFERENT_PAGE_SIZE,
>>  	MF_MSG_UNKNOWN,
>>  };
>>  
>> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
>> index d0337a41141c..1e694fd239b9 100644
>> --- a/include/ras/ras_event.h
>> +++ b/include/ras/ras_event.h
>> @@ -374,6 +374,7 @@ TRACE_EVENT(aer_event,
>>  	EM ( MF_MSG_BUDDY, "free buddy page" )				\
>>  	EM ( MF_MSG_DAX, "dax page" )					\
>>  	EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" )			\
>> +	EM ( MF_MSG_DIFFERENT_PAGE_SIZE, "different page size" )	\
>>  	EMe ( MF_MSG_UNKNOWN, "unknown page" )
>>  
>>  /*
>> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
>> index 5444a8ef4867..dabecd87ad3f 100644
>> --- a/mm/memory-failure.c
>> +++ b/mm/memory-failure.c
>> @@ -733,6 +733,7 @@ static const char * const action_page_types[] = {
>>  	[MF_MSG_BUDDY]			= "free buddy page",
>>  	[MF_MSG_DAX]			= "dax page",
>>  	[MF_MSG_UNSPLIT_THP]		= "unsplit thp",
>> +	[MF_MSG_DIFFERENT_PAGE_SIZE]	= "different page size",
>>  	[MF_MSG_UNKNOWN]		= "unknown page",
>>  };
>>  
>> @@ -1534,6 +1535,17 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
>>  	}
>>  
>>  	lock_page(head);
>> +
>> +	/**
>> +	 * The page could have changed compound pages due to race window.
>> +	 * If this happens just bail out.
>> +	 */
>> +	if (!PageHuge(p) || compound_head(p) != head) {
>> +		action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED);
>> +		res = -EBUSY;
> 
> We have discussed this race in other versions of the patch.  When we encounter
> the race, we have likely marked poison on the wrong page.  Correct?
> 

Many thanks for comment.
I assume that Naoya's patch "mm/hwpoison: set PageHWPoison after taking page lock
in memory_failure_hugetlb()" would set the PageHWPoison after the above check.
So I think the below operation is not needed as PageHWPoison is not set yet.
Does this makes sense for you?

Thanks.

> Instead of printing a "different page size", would it be better to perhaps:
> - Print a message that wrong page may be marked for poison?
> - Clear the poison flag in the "head page" previously set?
>
Yang Shi March 15, 2022, 6:19 p.m. UTC | #5
On Tue, Mar 15, 2022 at 7:19 AM Miaohe Lin <linmiaohe@huawei.com> wrote:
>
> On 2022/3/15 2:20, Mike Kravetz wrote:
> > On 3/11/22 23:46, Miaohe Lin wrote:
> >> There is a race window where we got the compound_head, the hugetlb page
> >> could be freed to buddy, or even changed to another compound page just
> >> before we try to get hwpoison page. Think about the below race window:
> >>   CPU 1                                        CPU 2
> >>   memory_failure_hugetlb
> >>   struct page *head = compound_head(p);
> >>                                        hugetlb page might be freed to
> >>                                        buddy, or even changed to another
> >>                                        compound page.
> >>
> >>   get_hwpoison_page -- page is not what we want now...
> >>
> >> If this race happens, just bail out. Also MF_MSG_DIFFERENT_PAGE_SIZE is
> >> introduced to record this event.
> >>
> >> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
> >> ---
> >>  include/linux/mm.h      |  1 +
> >>  include/ras/ras_event.h |  1 +
> >>  mm/memory-failure.c     | 12 ++++++++++++
> >>  3 files changed, 14 insertions(+)
> >>
> >> diff --git a/include/linux/mm.h b/include/linux/mm.h
> >> index c9bada4096ac..ef98cff2b253 100644
> >> --- a/include/linux/mm.h
> >> +++ b/include/linux/mm.h
> >> @@ -3253,6 +3253,7 @@ enum mf_action_page_type {
> >>      MF_MSG_BUDDY,
> >>      MF_MSG_DAX,
> >>      MF_MSG_UNSPLIT_THP,
> >> +    MF_MSG_DIFFERENT_PAGE_SIZE,
> >>      MF_MSG_UNKNOWN,
> >>  };
> >>
> >> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
> >> index d0337a41141c..1e694fd239b9 100644
> >> --- a/include/ras/ras_event.h
> >> +++ b/include/ras/ras_event.h
> >> @@ -374,6 +374,7 @@ TRACE_EVENT(aer_event,
> >>      EM ( MF_MSG_BUDDY, "free buddy page" )                          \
> >>      EM ( MF_MSG_DAX, "dax page" )                                   \
> >>      EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" )                        \
> >> +    EM ( MF_MSG_DIFFERENT_PAGE_SIZE, "different page size" )        \
> >>      EMe ( MF_MSG_UNKNOWN, "unknown page" )
> >>
> >>  /*
> >> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> >> index 5444a8ef4867..dabecd87ad3f 100644
> >> --- a/mm/memory-failure.c
> >> +++ b/mm/memory-failure.c
> >> @@ -733,6 +733,7 @@ static const char * const action_page_types[] = {
> >>      [MF_MSG_BUDDY]                  = "free buddy page",
> >>      [MF_MSG_DAX]                    = "dax page",
> >>      [MF_MSG_UNSPLIT_THP]            = "unsplit thp",
> >> +    [MF_MSG_DIFFERENT_PAGE_SIZE]    = "different page size",
> >>      [MF_MSG_UNKNOWN]                = "unknown page",
> >>  };
> >>
> >> @@ -1534,6 +1535,17 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
> >>      }
> >>
> >>      lock_page(head);
> >> +
> >> +    /**
> >> +     * The page could have changed compound pages due to race window.
> >> +     * If this happens just bail out.
> >> +     */
> >> +    if (!PageHuge(p) || compound_head(p) != head) {
> >> +            action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED);
> >> +            res = -EBUSY;
> >
> > We have discussed this race in other versions of the patch.  When we encounter
> > the race, we have likely marked poison on the wrong page.  Correct?
> >
>
> Many thanks for comment.
> I assume that Naoya's patch "mm/hwpoison: set PageHWPoison after taking page lock
> in memory_failure_hugetlb()" would set the PageHWPoison after the above check.
> So I think the below operation is not needed as PageHWPoison is not set yet.
> Does this makes sense for you?

I'm wondering if it might be better and helpful for review to squash
this patch with Naoya's patch together? It seems we always missed the
other part when reviewing the patches.

>
> Thanks.
>
> > Instead of printing a "different page size", would it be better to perhaps:
> > - Print a message that wrong page may be marked for poison?
> > - Clear the poison flag in the "head page" previously set?
> >
>
Miaohe Lin March 16, 2022, 8:18 a.m. UTC | #6
On 2022/3/16 2:19, Yang Shi wrote:
> On Tue, Mar 15, 2022 at 7:19 AM Miaohe Lin <linmiaohe@huawei.com> wrote:
>>
>> On 2022/3/15 2:20, Mike Kravetz wrote:
>>> On 3/11/22 23:46, Miaohe Lin wrote:
>>>> There is a race window where we got the compound_head, the hugetlb page
>>>> could be freed to buddy, or even changed to another compound page just
>>>> before we try to get hwpoison page. Think about the below race window:
>>>>   CPU 1                                        CPU 2
>>>>   memory_failure_hugetlb
>>>>   struct page *head = compound_head(p);
>>>>                                        hugetlb page might be freed to
>>>>                                        buddy, or even changed to another
>>>>                                        compound page.
>>>>
>>>>   get_hwpoison_page -- page is not what we want now...
>>>>
>>>> If this race happens, just bail out. Also MF_MSG_DIFFERENT_PAGE_SIZE is
>>>> introduced to record this event.
>>>>
>>>> Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
>>>> ---
>>>>  include/linux/mm.h      |  1 +
>>>>  include/ras/ras_event.h |  1 +
>>>>  mm/memory-failure.c     | 12 ++++++++++++
>>>>  3 files changed, 14 insertions(+)
>>>>
>>>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>>>> index c9bada4096ac..ef98cff2b253 100644
>>>> --- a/include/linux/mm.h
>>>> +++ b/include/linux/mm.h
>>>> @@ -3253,6 +3253,7 @@ enum mf_action_page_type {
>>>>      MF_MSG_BUDDY,
>>>>      MF_MSG_DAX,
>>>>      MF_MSG_UNSPLIT_THP,
>>>> +    MF_MSG_DIFFERENT_PAGE_SIZE,
>>>>      MF_MSG_UNKNOWN,
>>>>  };
>>>>
>>>> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
>>>> index d0337a41141c..1e694fd239b9 100644
>>>> --- a/include/ras/ras_event.h
>>>> +++ b/include/ras/ras_event.h
>>>> @@ -374,6 +374,7 @@ TRACE_EVENT(aer_event,
>>>>      EM ( MF_MSG_BUDDY, "free buddy page" )                          \
>>>>      EM ( MF_MSG_DAX, "dax page" )                                   \
>>>>      EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" )                        \
>>>> +    EM ( MF_MSG_DIFFERENT_PAGE_SIZE, "different page size" )        \
>>>>      EMe ( MF_MSG_UNKNOWN, "unknown page" )
>>>>
>>>>  /*
>>>> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
>>>> index 5444a8ef4867..dabecd87ad3f 100644
>>>> --- a/mm/memory-failure.c
>>>> +++ b/mm/memory-failure.c
>>>> @@ -733,6 +733,7 @@ static const char * const action_page_types[] = {
>>>>      [MF_MSG_BUDDY]                  = "free buddy page",
>>>>      [MF_MSG_DAX]                    = "dax page",
>>>>      [MF_MSG_UNSPLIT_THP]            = "unsplit thp",
>>>> +    [MF_MSG_DIFFERENT_PAGE_SIZE]    = "different page size",
>>>>      [MF_MSG_UNKNOWN]                = "unknown page",
>>>>  };
>>>>
>>>> @@ -1534,6 +1535,17 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
>>>>      }
>>>>
>>>>      lock_page(head);
>>>> +
>>>> +    /**
>>>> +     * The page could have changed compound pages due to race window.
>>>> +     * If this happens just bail out.
>>>> +     */
>>>> +    if (!PageHuge(p) || compound_head(p) != head) {
>>>> +            action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED);
>>>> +            res = -EBUSY;
>>>
>>> We have discussed this race in other versions of the patch.  When we encounter
>>> the race, we have likely marked poison on the wrong page.  Correct?
>>>
>>
>> Many thanks for comment.
>> I assume that Naoya's patch "mm/hwpoison: set PageHWPoison after taking page lock
>> in memory_failure_hugetlb()" would set the PageHWPoison after the above check.
>> So I think the below operation is not needed as PageHWPoison is not set yet.
>> Does this makes sense for you?
> 
> I'm wondering if it might be better and helpful for review to squash
> this patch with Naoya's patch together? It seems we always missed the
> other part when reviewing the patches.
> 

Sounds like a good idea. This would make the reviewer's life easier. I'm fine if
this patch is squashed into Naoya's patch altogether. But we might have to consult
the opinion from Naoya.

Thanks.

>>
>> Thanks.
>>
>>> Instead of printing a "different page size", would it be better to perhaps:
>>> - Print a message that wrong page may be marked for poison?
>>> - Clear the poison flag in the "head page" previously set?
>>>
>>
> .
>
HORIGUCHI NAOYA(堀口 直也) March 16, 2022, 8:30 a.m. UTC | #7
On Wed, Mar 16, 2022 at 04:18:30PM +0800, Miaohe Lin wrote:
> On 2022/3/16 2:19, Yang Shi wrote:
> > On Tue, Mar 15, 2022 at 7:19 AM Miaohe Lin <linmiaohe@huawei.com> wrote:
...
> >>
> >>
> >> Many thanks for comment.
> >> I assume that Naoya's patch "mm/hwpoison: set PageHWPoison after taking page lock
> >> in memory_failure_hugetlb()" would set the PageHWPoison after the above check.
> >> So I think the below operation is not needed as PageHWPoison is not set yet.
> >> Does this makes sense for you?
> > 
> > I'm wondering if it might be better and helpful for review to squash
> > this patch with Naoya's patch together? It seems we always missed the
> > other part when reviewing the patches.
> > 
> 
> Sounds like a good idea. This would make the reviewer's life easier. I'm fine if
> this patch is squashed into Naoya's patch altogether. But we might have to consult
> the opinion from Naoya.

I'm fine with the squashing, so I'll send v4.

Thanks,
Naoya Horiguchi
Miaohe Lin March 16, 2022, 8:41 a.m. UTC | #8
On 2022/3/16 16:30, HORIGUCHI NAOYA(堀口 直也) wrote:
> On Wed, Mar 16, 2022 at 04:18:30PM +0800, Miaohe Lin wrote:
>> On 2022/3/16 2:19, Yang Shi wrote:
>>> On Tue, Mar 15, 2022 at 7:19 AM Miaohe Lin <linmiaohe@huawei.com> wrote:
> ...
>>>>
>>>>
>>>> Many thanks for comment.
>>>> I assume that Naoya's patch "mm/hwpoison: set PageHWPoison after taking page lock
>>>> in memory_failure_hugetlb()" would set the PageHWPoison after the above check.
>>>> So I think the below operation is not needed as PageHWPoison is not set yet.
>>>> Does this makes sense for you?
>>>
>>> I'm wondering if it might be better and helpful for review to squash
>>> this patch with Naoya's patch together? It seems we always missed the
>>> other part when reviewing the patches.
>>>
>>
>> Sounds like a good idea. This would make the reviewer's life easier. I'm fine if
>> this patch is squashed into Naoya's patch altogether. But we might have to consult
>> the opinion from Naoya.
> 
> I'm fine with the squashing, so I'll send v4.

Many thanks for doing this.

So I'll send v3 later to fix the "stale commit id" in the commit log of the [PATCH v2 2/3]
mm/memory-failure.c: avoid calling invalidate_inode_page() with unexpected pages.

Thanks.

> 
> Thanks,
> Naoya Horiguchi
>
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c9bada4096ac..ef98cff2b253 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3253,6 +3253,7 @@  enum mf_action_page_type {
 	MF_MSG_BUDDY,
 	MF_MSG_DAX,
 	MF_MSG_UNSPLIT_THP,
+	MF_MSG_DIFFERENT_PAGE_SIZE,
 	MF_MSG_UNKNOWN,
 };
 
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index d0337a41141c..1e694fd239b9 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -374,6 +374,7 @@  TRACE_EVENT(aer_event,
 	EM ( MF_MSG_BUDDY, "free buddy page" )				\
 	EM ( MF_MSG_DAX, "dax page" )					\
 	EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" )			\
+	EM ( MF_MSG_DIFFERENT_PAGE_SIZE, "different page size" )	\
 	EMe ( MF_MSG_UNKNOWN, "unknown page" )
 
 /*
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 5444a8ef4867..dabecd87ad3f 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -733,6 +733,7 @@  static const char * const action_page_types[] = {
 	[MF_MSG_BUDDY]			= "free buddy page",
 	[MF_MSG_DAX]			= "dax page",
 	[MF_MSG_UNSPLIT_THP]		= "unsplit thp",
+	[MF_MSG_DIFFERENT_PAGE_SIZE]	= "different page size",
 	[MF_MSG_UNKNOWN]		= "unknown page",
 };
 
@@ -1534,6 +1535,17 @@  static int memory_failure_hugetlb(unsigned long pfn, int flags)
 	}
 
 	lock_page(head);
+
+	/**
+	 * The page could have changed compound pages due to race window.
+	 * If this happens just bail out.
+	 */
+	if (!PageHuge(p) || compound_head(p) != head) {
+		action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED);
+		res = -EBUSY;
+		goto out;
+	}
+
 	page_flags = head->flags;
 
 	if (hwpoison_filter(p)) {