diff mbox series

[4/8] mm/memory-failure: Convert shake_page() to shake_folio()

Message ID 20240229212036.2160900-5-willy@infradead.org (mailing list archive)
State New
Headers show
Series Some cleanups for memory-failure | expand

Commit Message

Matthew Wilcox Feb. 29, 2024, 9:20 p.m. UTC
Removes two calls to compound_head().  Move the prototype to
internal.h; we definitely don't want code outside mm using it.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/mm.h   |  1 -
 mm/hwpoison-inject.c | 11 ++++++-----
 mm/internal.h        |  1 +
 mm/memory-failure.c  | 15 ++++++++++-----
 4 files changed, 17 insertions(+), 11 deletions(-)

Comments

Miaohe Lin March 6, 2024, 9:31 a.m. UTC | #1
On 2024/3/1 5:20, Matthew Wilcox (Oracle) wrote:
> Removes two calls to compound_head().  Move the prototype to
> internal.h; we definitely don't want code outside mm using it.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> ---
>  include/linux/mm.h   |  1 -
>  mm/hwpoison-inject.c | 11 ++++++-----
>  mm/internal.h        |  1 +
>  mm/memory-failure.c  | 15 ++++++++++-----
>  4 files changed, 17 insertions(+), 11 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 03e4841673d7..184f57cdab60 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3962,7 +3962,6 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
>  extern int memory_failure(unsigned long pfn, int flags);
>  extern void memory_failure_queue_kick(int cpu);
>  extern int unpoison_memory(unsigned long pfn);
> -extern void shake_page(struct page *p);
>  extern atomic_long_t num_poisoned_pages __read_mostly;
>  extern int soft_offline_page(unsigned long pfn, int flags);
>  #ifdef CONFIG_MEMORY_FAILURE
> diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
> index d0548e382b6b..c9d653f51e45 100644
> --- a/mm/hwpoison-inject.c
> +++ b/mm/hwpoison-inject.c
> @@ -15,7 +15,7 @@ static int hwpoison_inject(void *data, u64 val)
>  {
>  	unsigned long pfn = val;
>  	struct page *p;
> -	struct page *hpage;
> +	struct folio *folio;
>  	int err;
>  
>  	if (!capable(CAP_SYS_ADMIN))
> @@ -25,16 +25,17 @@ static int hwpoison_inject(void *data, u64 val)
>  		return -ENXIO;
>  
>  	p = pfn_to_page(pfn);
> -	hpage = compound_head(p);
> +	folio = page_folio(p);
>  
>  	if (!hwpoison_filter_enable)
>  		goto inject;
>  
> -	shake_page(hpage);
> +	shake_folio(folio);
>  	/*
>  	 * This implies unable to support non-LRU pages except free page.
>  	 */
> -	if (!PageLRU(hpage) && !PageHuge(p) && !is_free_buddy_page(p))
> +	if (!folio_test_lru(folio) && !folio_test_hugetlb(folio) &&
> +	    !is_free_buddy_page(p))
>  		return 0;
>  
>  	/*
> @@ -42,7 +43,7 @@ static int hwpoison_inject(void *data, u64 val)
>  	 * the targeted owner (or on a free page).
>  	 * memory_failure() will redo the check reliably inside page lock.
>  	 */
> -	err = hwpoison_filter(hpage);
> +	err = hwpoison_filter(&folio->page);
>  	if (err)
>  		return 0;
>  
> diff --git a/mm/internal.h b/mm/internal.h
> index 13f4d6ccccdf..b900730f43c0 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -855,6 +855,7 @@ static inline int find_next_best_node(int node, nodemask_t *used_node_mask)
>  /*
>   * mm/memory-failure.c
>   */
> +void shake_folio(struct folio *folio);
>  extern int hwpoison_filter(struct page *p);
>  
>  extern u32 hwpoison_filter_dev_major;
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 40a8964954e5..27dc21063552 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -358,20 +358,25 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
>   * Unknown page type encountered. Try to check whether it can turn PageLRU by
>   * lru_add_drain_all.
>   */
> -void shake_page(struct page *p)
> +void shake_folio(struct folio *folio)

It might not be a good idea to convert shake_page() into shake_folio(). shake_page() can
be called without holding page refcnt. So the below race could happen:

 hwpoison_inject
  folio = page_folio(p); --> assume p is 4k page [1]
  shake_folio(folio)
   folio_test_slab(folio)
    test_bit(PG_slab, folio_flags(folio, FOLIO_PF_NO_TAIL) [2]
     VM_BUG_ON_PGFLAGS(PageTail(page), page) in folio_flags

Between [1] and [2], page can become PageTail of a THP. So the VM_BUG_ON_PGFLAGS will trigger.
Or am I miss something?

Thanks.

>  {
> -	if (PageHuge(p))
> +	if (folio_test_hugetlb(folio))
>  		return;
>  	/*
>  	 * TODO: Could shrink slab caches here if a lightweight range-based
>  	 * shrinker will be available.
>  	 */
> -	if (PageSlab(p))
> +	if (folio_test_slab(folio))
>  		return;
>  
>  	lru_add_drain_all();
>  }
> -EXPORT_SYMBOL_GPL(shake_page);
> +EXPORT_SYMBOL_GPL(shake_folio);
> +
> +static void shake_page(struct page *page)
> +{
> +	shake_folio(page_folio(page));
> +}
>  
>  static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
>  		unsigned long address)
> @@ -1639,7 +1644,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
>  	 * shake_page() again to ensure that it's flushed.
>  	 */
>  	if (mlocked)
> -		shake_page(hpage);
> +		shake_folio(folio);
>  
>  	/*
>  	 * Now that the dirty bit has been propagated to the
>
Matthew Wilcox April 8, 2024, 3:36 p.m. UTC | #2
On Wed, Mar 06, 2024 at 05:31:19PM +0800, Miaohe Lin wrote:
> > -void shake_page(struct page *p)
> > +void shake_folio(struct folio *folio)
> 
> It might not be a good idea to convert shake_page() into shake_folio(). shake_page() can
> be called without holding page refcnt. So the below race could happen:
> 
>  hwpoison_inject
>   folio = page_folio(p); --> assume p is 4k page [1]
>   shake_folio(folio)
>    folio_test_slab(folio)
>     test_bit(PG_slab, folio_flags(folio, FOLIO_PF_NO_TAIL) [2]
>      VM_BUG_ON_PGFLAGS(PageTail(page), page) in folio_flags
> 
> Between [1] and [2], page can become PageTail of a THP. So the VM_BUG_ON_PGFLAGS will trigger.
> Or am I miss something?

No, you're not missing anything.  This race can happen.  However, I've
removed the VM_BUG_ON for folio_test_slab() with "mm: free up PG_slab".
Now it goes through:

static inline bool PageSlab(const struct page *page)
{
        return folio_test_slab(page_folio(page));
}

static __always_inline bool folio_test_##fname(const struct folio *folio)\
{                                                                       \
        return folio_test_type(folio, PG_##lname);                      \
}                                                                       \

#define folio_test_type(folio, flag)                                    \
        ((folio->page.page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)

which has no assertion that the folio is not a tail page.  Maybe it
should, but until then we'll silently get the wrong result ;-)
Jane Chu April 8, 2024, 6:31 p.m. UTC | #3
On 4/8/2024 8:36 AM, Matthew Wilcox wrote:

> On Wed, Mar 06, 2024 at 05:31:19PM +0800, Miaohe Lin wrote:
>>> -void shake_page(struct page *p)
>>> +void shake_folio(struct folio *folio)
>> It might not be a good idea to convert shake_page() into shake_folio(). shake_page() can
>> be called without holding page refcnt. So the below race could happen:
>>
>>   hwpoison_inject
>>    folio = page_folio(p); --> assume p is 4k page [1]
>>    shake_folio(folio)
>>     folio_test_slab(folio)
>>      test_bit(PG_slab, folio_flags(folio, FOLIO_PF_NO_TAIL) [2]
>>       VM_BUG_ON_PGFLAGS(PageTail(page), page) in folio_flags
>>
>> Between [1] and [2], page can become PageTail of a THP. So the VM_BUG_ON_PGFLAGS will trigger.
>> Or am I miss something?
> No, you're not missing anything.  This race can happen.  However, I've
> removed the VM_BUG_ON for folio_test_slab() with "mm: free up PG_slab".
> Now it goes through:
>
> static inline bool PageSlab(const struct page *page)
> {
>          return folio_test_slab(page_folio(page));
> }
>
> static __always_inline bool folio_test_##fname(const struct folio *folio)\
> {                                                                       \
>          return folio_test_type(folio, PG_##lname);                      \
> }                                                                       \
>
> #define folio_test_type(folio, flag)                                    \
>          ((folio->page.page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
>
> which has no assertion that the folio is not a tail page.  Maybe it
> should, but until then we'll silently get the wrong result ;-)
>
In this case (4k page -> THP), the act of drain will be triggered with 
perhaps nothing to do, so looks like a harmless 'wrong result' to me.

thanks,

-jane
Miaohe Lin April 10, 2024, 4:01 a.m. UTC | #4
On 2024/4/9 2:31, Jane Chu wrote:
> On 4/8/2024 8:36 AM, Matthew Wilcox wrote:
> 
>> On Wed, Mar 06, 2024 at 05:31:19PM +0800, Miaohe Lin wrote:
>>>> -void shake_page(struct page *p)
>>>> +void shake_folio(struct folio *folio)
>>> It might not be a good idea to convert shake_page() into shake_folio(). shake_page() can
>>> be called without holding page refcnt. So the below race could happen:
>>>
>>>   hwpoison_inject
>>>    folio = page_folio(p); --> assume p is 4k page [1]
>>>    shake_folio(folio)
>>>     folio_test_slab(folio)
>>>      test_bit(PG_slab, folio_flags(folio, FOLIO_PF_NO_TAIL) [2]
>>>       VM_BUG_ON_PGFLAGS(PageTail(page), page) in folio_flags
>>>
>>> Between [1] and [2], page can become PageTail of a THP. So the VM_BUG_ON_PGFLAGS will trigger.
>>> Or am I miss something?
>> No, you're not missing anything.  This race can happen.  However, I've
>> removed the VM_BUG_ON for folio_test_slab() with "mm: free up PG_slab".
>> Now it goes through:
>>
>> static inline bool PageSlab(const struct page *page)
>> {
>>          return folio_test_slab(page_folio(page));
>> }
>>
>> static __always_inline bool folio_test_##fname(const struct folio *folio)\
>> {                                                                       \
>>          return folio_test_type(folio, PG_##lname);                      \
>> }                                                                       \
>>
>> #define folio_test_type(folio, flag)                                    \
>>          ((folio->page.page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
>>
>> which has no assertion that the folio is not a tail page.  Maybe it
>> should, but until then we'll silently get the wrong result ;-)
>>
> In this case (4k page -> THP), the act of drain will be triggered with perhaps nothing to do, so looks like a harmless 'wrong result' to me.
> 
> thanks,

I tend to agree that with "free up PG_slab", this race should be harmless.
Thanks both.
.

> 
> -jane
> 
> .
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 03e4841673d7..184f57cdab60 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3962,7 +3962,6 @@  int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
 extern int memory_failure(unsigned long pfn, int flags);
 extern void memory_failure_queue_kick(int cpu);
 extern int unpoison_memory(unsigned long pfn);
-extern void shake_page(struct page *p);
 extern atomic_long_t num_poisoned_pages __read_mostly;
 extern int soft_offline_page(unsigned long pfn, int flags);
 #ifdef CONFIG_MEMORY_FAILURE
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index d0548e382b6b..c9d653f51e45 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -15,7 +15,7 @@  static int hwpoison_inject(void *data, u64 val)
 {
 	unsigned long pfn = val;
 	struct page *p;
-	struct page *hpage;
+	struct folio *folio;
 	int err;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -25,16 +25,17 @@  static int hwpoison_inject(void *data, u64 val)
 		return -ENXIO;
 
 	p = pfn_to_page(pfn);
-	hpage = compound_head(p);
+	folio = page_folio(p);
 
 	if (!hwpoison_filter_enable)
 		goto inject;
 
-	shake_page(hpage);
+	shake_folio(folio);
 	/*
 	 * This implies unable to support non-LRU pages except free page.
 	 */
-	if (!PageLRU(hpage) && !PageHuge(p) && !is_free_buddy_page(p))
+	if (!folio_test_lru(folio) && !folio_test_hugetlb(folio) &&
+	    !is_free_buddy_page(p))
 		return 0;
 
 	/*
@@ -42,7 +43,7 @@  static int hwpoison_inject(void *data, u64 val)
 	 * the targeted owner (or on a free page).
 	 * memory_failure() will redo the check reliably inside page lock.
 	 */
-	err = hwpoison_filter(hpage);
+	err = hwpoison_filter(&folio->page);
 	if (err)
 		return 0;
 
diff --git a/mm/internal.h b/mm/internal.h
index 13f4d6ccccdf..b900730f43c0 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -855,6 +855,7 @@  static inline int find_next_best_node(int node, nodemask_t *used_node_mask)
 /*
  * mm/memory-failure.c
  */
+void shake_folio(struct folio *folio);
 extern int hwpoison_filter(struct page *p);
 
 extern u32 hwpoison_filter_dev_major;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 40a8964954e5..27dc21063552 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -358,20 +358,25 @@  static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
  * Unknown page type encountered. Try to check whether it can turn PageLRU by
  * lru_add_drain_all.
  */
-void shake_page(struct page *p)
+void shake_folio(struct folio *folio)
 {
-	if (PageHuge(p))
+	if (folio_test_hugetlb(folio))
 		return;
 	/*
 	 * TODO: Could shrink slab caches here if a lightweight range-based
 	 * shrinker will be available.
 	 */
-	if (PageSlab(p))
+	if (folio_test_slab(folio))
 		return;
 
 	lru_add_drain_all();
 }
-EXPORT_SYMBOL_GPL(shake_page);
+EXPORT_SYMBOL_GPL(shake_folio);
+
+static void shake_page(struct page *page)
+{
+	shake_folio(page_folio(page));
+}
 
 static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
 		unsigned long address)
@@ -1639,7 +1644,7 @@  static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
 	 * shake_page() again to ensure that it's flushed.
 	 */
 	if (mlocked)
-		shake_page(hpage);
+		shake_folio(folio);
 
 	/*
 	 * Now that the dirty bit has been propagated to the