diff mbox series

[1/8] mm/memory-failure: Remove fsdax_pgoff argument from __add_to_kill

Message ID 20240229212036.2160900-2-willy@infradead.org (mailing list archive)
State New
Headers show
Series Some cleanups for memory-failure | expand

Commit Message

Matthew Wilcox (Oracle) Feb. 29, 2024, 9:20 p.m. UTC
Unify the KSM and DAX codepaths by calculating the addr in
add_to_kill_fsdax() instead of telling __add_to_kill() to calculate it.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Dan Williams <dan.j.williams@intel.com>
---
 mm/memory-failure.c | 27 +++++++++------------------
 1 file changed, 9 insertions(+), 18 deletions(-)

Comments

Miaohe Lin March 4, 2024, 12:09 p.m. UTC | #1
On 2024/3/1 5:20, Matthew Wilcox (Oracle) wrote:
> Unify the KSM and DAX codepaths by calculating the addr in
> add_to_kill_fsdax() instead of telling __add_to_kill() to calculate it.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Cc: Dan Williams <dan.j.williams@intel.com>

This patch looks good to me. Thanks.

Acked-by: Miaohe Lin <linmiaohe@huawei.com>

> ---
>  mm/memory-failure.c | 27 +++++++++------------------
>  1 file changed, 9 insertions(+), 18 deletions(-)
> 
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 9349948f1abf..9356227a50bb 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -416,21 +416,13 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
>   * not much we can do.	We just print a message and ignore otherwise.
>   */
>  
> -#define FSDAX_INVALID_PGOFF ULONG_MAX
> -
>  /*
>   * Schedule a process for later kill.
>   * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
> - *
> - * Note: @fsdax_pgoff is used only when @p is a fsdax page and a
> - * filesystem with a memory failure handler has claimed the
> - * memory_failure event. In all other cases, page->index and
> - * page->mapping are sufficient for mapping the page back to its
> - * corresponding user virtual address.
>   */
>  static void __add_to_kill(struct task_struct *tsk, struct page *p,
>  			  struct vm_area_struct *vma, struct list_head *to_kill,
> -			  unsigned long ksm_addr, pgoff_t fsdax_pgoff)
> +			  unsigned long addr)
>  {
>  	struct to_kill *tk;
>  
> @@ -440,12 +432,10 @@ static void __add_to_kill(struct task_struct *tsk, struct page *p,
>  		return;
>  	}
>  
> -	tk->addr = ksm_addr ? ksm_addr : page_address_in_vma(p, vma);
> -	if (is_zone_device_page(p)) {
> -		if (fsdax_pgoff != FSDAX_INVALID_PGOFF)
> -			tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
> +	tk->addr = addr ? addr : page_address_in_vma(p, vma);
> +	if (is_zone_device_page(p))
>  		tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
> -	} else
> +	else
>  		tk->size_shift = page_shift(compound_head(p));
>  
>  	/*
> @@ -475,7 +465,7 @@ static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p,
>  				  struct vm_area_struct *vma,
>  				  struct list_head *to_kill)
>  {
> -	__add_to_kill(tsk, p, vma, to_kill, 0, FSDAX_INVALID_PGOFF);
> +	__add_to_kill(tsk, p, vma, to_kill, 0);
>  }
>  
>  #ifdef CONFIG_KSM
> @@ -493,10 +483,10 @@ static bool task_in_to_kill_list(struct list_head *to_kill,
>  }
>  void add_to_kill_ksm(struct task_struct *tsk, struct page *p,
>  		     struct vm_area_struct *vma, struct list_head *to_kill,
> -		     unsigned long ksm_addr)
> +		     unsigned long addr)
>  {
>  	if (!task_in_to_kill_list(to_kill, tsk))
> -		__add_to_kill(tsk, p, vma, to_kill, ksm_addr, FSDAX_INVALID_PGOFF);
> +		__add_to_kill(tsk, p, vma, to_kill, addr);
>  }
>  #endif
>  /*
> @@ -670,7 +660,8 @@ static void add_to_kill_fsdax(struct task_struct *tsk, struct page *p,
>  			      struct vm_area_struct *vma,
>  			      struct list_head *to_kill, pgoff_t pgoff)
>  {
> -	__add_to_kill(tsk, p, vma, to_kill, 0, pgoff);
> +	unsigned long addr = vma_pgoff_address(pgoff, 1, vma);
> +	__add_to_kill(tsk, p, vma, to_kill, addr);
>  }
>  
>  /*
>
Jane Chu March 13, 2024, 2:07 a.m. UTC | #2
On 2/29/2024 1:20 PM, Matthew Wilcox (Oracle) wrote:

> Unify the KSM and DAX codepaths by calculating the addr in
> add_to_kill_fsdax() instead of telling __add_to_kill() to calculate it.
>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Cc: Dan Williams <dan.j.williams@intel.com>
> ---
>   mm/memory-failure.c | 27 +++++++++------------------
>   1 file changed, 9 insertions(+), 18 deletions(-)
>
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 9349948f1abf..9356227a50bb 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -416,21 +416,13 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
>    * not much we can do.	We just print a message and ignore otherwise.
>    */
>   
> -#define FSDAX_INVALID_PGOFF ULONG_MAX
> -
>   /*
>    * Schedule a process for later kill.
>    * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
> - *
> - * Note: @fsdax_pgoff is used only when @p is a fsdax page and a
> - * filesystem with a memory failure handler has claimed the
> - * memory_failure event. In all other cases, page->index and
> - * page->mapping are sufficient for mapping the page back to its
> - * corresponding user virtual address.
>    */
>   static void __add_to_kill(struct task_struct *tsk, struct page *p,
>   			  struct vm_area_struct *vma, struct list_head *to_kill,
> -			  unsigned long ksm_addr, pgoff_t fsdax_pgoff)
> +			  unsigned long addr)
>   {
>   	struct to_kill *tk;
>   
> @@ -440,12 +432,10 @@ static void __add_to_kill(struct task_struct *tsk, struct page *p,
>   		return;
>   	}
>   
> -	tk->addr = ksm_addr ? ksm_addr : page_address_in_vma(p, vma);
> -	if (is_zone_device_page(p)) {
> -		if (fsdax_pgoff != FSDAX_INVALID_PGOFF)
> -			tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
> +	tk->addr = addr ? addr : page_address_in_vma(p, vma);
> +	if (is_zone_device_page(p))
>   		tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);

Not sure about the simplification.  The fsdax special calculation was added by

commit c36e2024957120566efd99395b5c8cc95b5175c1
Author: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Date:   Fri Jun 3 13:37:29 2022 +0800

     mm: introduce mf_dax_kill_procs() for fsdax case

     This new function is a variant of mf_generic_kill_procs that accepts a
     file, offset pair instead of a struct to support multiple files sharing a
     DAX mapping.  It is intended to be called by the file systems as part of
     the memory_failure handler after the file system performed a reverse
     mapping from the storage address to the file and file offset.

     Link: https://lkml.kernel.org/r/20220603053738.1218681-6-ruansy.fnst@fujitsu.co
m
[..]

@@ -354,9 +357,15 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,

         }

         tk->addr = page_address_in_vma(p, vma);

-       if (is_zone_device_page(p))

-               tk->size_shift = dev_pagemap_mapping_shift(p, vma);

-       else

+       if (is_zone_device_page(p)) {

+               /*

+                * Since page->mapping is not used for fsdax, we need

+                * calculate the address based on the vma.

+                */

+               if (p->pgmap->type == MEMORY_DEVICE_FS_DAX)

+                       tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);

+               tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);

+       } else

                 tk->size_shift = page_shift(compound_head(p));

Looks like it was to deal away with this check

unsignedlongpage_address_in_vma 
<https://elixir.bootlin.com/linux/v6.8/C/ident/page_address_in_vma>(structpage 
<https://elixir.bootlin.com/linux/v6.8/C/ident/page>*page 
<https://elixir.bootlin.com/linux/v6.8/C/ident/page>,structvm_area_struct 
<https://elixir.bootlin.com/linux/v6.8/C/ident/vm_area_struct>*vma 
<https://elixir.bootlin.com/linux/v6.8/C/ident/vma>)
{ [..]

}elseif(vma <https://elixir.bootlin.com/linux/v6.8/C/ident/vma>->vm_file 
<https://elixir.bootlin.com/linux/v6.8/C/ident/vm_file>->f_mapping 
<https://elixir.bootlin.com/linux/v6.8/C/ident/f_mapping>!=folio 
<https://elixir.bootlin.com/linux/v6.8/C/ident/folio>->mapping 
<https://elixir.bootlin.com/linux/v6.8/C/ident/mapping>){
return-EFAULT <https://elixir.bootlin.com/linux/v6.8/C/ident/EFAULT>;
}

But, by providing nr=1 in vma_pgoff_address(fsdax_pgoff, 1, vma), it also avoided dealing with

wrap around, which perhaps doesn't matter? perhaps noone runs fsdax on 32-bit machine?

Add Shiyang Ruan for comment.

thanks,

-jane



> -	} else
> +	else
>   		tk->size_shift = page_shift(compound_head(p));
>   
>   	/*
> @@ -475,7 +465,7 @@ static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p,
>   				  struct vm_area_struct *vma,
>   				  struct list_head *to_kill)
>   {
> -	__add_to_kill(tsk, p, vma, to_kill, 0, FSDAX_INVALID_PGOFF);
> +	__add_to_kill(tsk, p, vma, to_kill, 0);
>   }
>   
>   #ifdef CONFIG_KSM
> @@ -493,10 +483,10 @@ static bool task_in_to_kill_list(struct list_head *to_kill,
>   }
>   void add_to_kill_ksm(struct task_struct *tsk, struct page *p,
>   		     struct vm_area_struct *vma, struct list_head *to_kill,
> -		     unsigned long ksm_addr)
> +		     unsigned long addr)
>   {
>   	if (!task_in_to_kill_list(to_kill, tsk))
> -		__add_to_kill(tsk, p, vma, to_kill, ksm_addr, FSDAX_INVALID_PGOFF);
> +		__add_to_kill(tsk, p, vma, to_kill, addr);
>   }
>   #endif
>   /*
> @@ -670,7 +660,8 @@ static void add_to_kill_fsdax(struct task_struct *tsk, struct page *p,
>   			      struct vm_area_struct *vma,
>   			      struct list_head *to_kill, pgoff_t pgoff)
>   {
> -	__add_to_kill(tsk, p, vma, to_kill, 0, pgoff);
> +	unsigned long addr = vma_pgoff_address(pgoff, 1, vma);
> +	__add_to_kill(tsk, p, vma, to_kill, addr);
>   }
>   
>   /*
Matthew Wilcox (Oracle) March 13, 2024, 3:23 a.m. UTC | #3
On Tue, Mar 12, 2024 at 07:07:10PM -0700, Jane Chu wrote:
> On 2/29/2024 1:20 PM, Matthew Wilcox (Oracle) wrote:
> 
> > Unify the KSM and DAX codepaths by calculating the addr in
> > add_to_kill_fsdax() instead of telling __add_to_kill() to calculate it.
> > 
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > Cc: Dan Williams <dan.j.williams@intel.com>
> > ---
> >   mm/memory-failure.c | 27 +++++++++------------------
> >   1 file changed, 9 insertions(+), 18 deletions(-)
> > 
> > diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> > index 9349948f1abf..9356227a50bb 100644
> > --- a/mm/memory-failure.c
> > +++ b/mm/memory-failure.c
> > @@ -416,21 +416,13 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
> >    * not much we can do.	We just print a message and ignore otherwise.
> >    */
> > -#define FSDAX_INVALID_PGOFF ULONG_MAX
> > -
> >   /*
> >    * Schedule a process for later kill.
> >    * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
> > - *
> > - * Note: @fsdax_pgoff is used only when @p is a fsdax page and a
> > - * filesystem with a memory failure handler has claimed the
> > - * memory_failure event. In all other cases, page->index and
> > - * page->mapping are sufficient for mapping the page back to its
> > - * corresponding user virtual address.
> >    */
> >   static void __add_to_kill(struct task_struct *tsk, struct page *p,
> >   			  struct vm_area_struct *vma, struct list_head *to_kill,
> > -			  unsigned long ksm_addr, pgoff_t fsdax_pgoff)
> > +			  unsigned long addr)
> >   {
> >   	struct to_kill *tk;
> > @@ -440,12 +432,10 @@ static void __add_to_kill(struct task_struct *tsk, struct page *p,
> >   		return;
> >   	}
> > -	tk->addr = ksm_addr ? ksm_addr : page_address_in_vma(p, vma);
> > -	if (is_zone_device_page(p)) {
> > -		if (fsdax_pgoff != FSDAX_INVALID_PGOFF)
> > -			tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
> > +	tk->addr = addr ? addr : page_address_in_vma(p, vma);
> > +	if (is_zone_device_page(p))
> >   		tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
> 
> Not sure about the simplification.  The fsdax special calculation was added by
> 
> commit c36e2024957120566efd99395b5c8cc95b5175c1
> Author: Shiyang Ruan <ruansy.fnst@fujitsu.com>
> Date:   Fri Jun 3 13:37:29 2022 +0800
> 
>     mm: introduce mf_dax_kill_procs() for fsdax case
> 
>     This new function is a variant of mf_generic_kill_procs that accepts a
>     file, offset pair instead of a struct to support multiple files sharing a
>     DAX mapping.  It is intended to be called by the file systems as part of
>     the memory_failure handler after the file system performed a reverse
>     mapping from the storage address to the file and file offset.
> 
>     Link: https://lkml.kernel.org/r/20220603053738.1218681-6-ruansy.fnst@fujitsu.co
> m
> [..]
> 
> @@ -354,9 +357,15 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
> 
>         }
> 
>         tk->addr = page_address_in_vma(p, vma);
> 
> -       if (is_zone_device_page(p))
> 
> -               tk->size_shift = dev_pagemap_mapping_shift(p, vma);
> 
> -       else
> 
> +       if (is_zone_device_page(p)) {
> 
> +               /*
> 
> +                * Since page->mapping is not used for fsdax, we need
> 
> +                * calculate the address based on the vma.
> 
> +                */
> 
> +               if (p->pgmap->type == MEMORY_DEVICE_FS_DAX)
> 
> +                       tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
> 
> +               tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
> 
> +       } else
> 
>                 tk->size_shift = page_shift(compound_head(p));
> 
> Looks like it was to deal away with this check
> 
> unsignedlongpage_address_in_vma <https://elixir.bootlin.com/linux/v6.8/C/ident/page_address_in_vma>(structpage
> <https://elixir.bootlin.com/linux/v6.8/C/ident/page>*page
> <https://elixir.bootlin.com/linux/v6.8/C/ident/page>,structvm_area_struct
> <https://elixir.bootlin.com/linux/v6.8/C/ident/vm_area_struct>*vma
> <https://elixir.bootlin.com/linux/v6.8/C/ident/vma>)
> { [..]
> 
> }elseif(vma <https://elixir.bootlin.com/linux/v6.8/C/ident/vma>->vm_file
> <https://elixir.bootlin.com/linux/v6.8/C/ident/vm_file>->f_mapping
> <https://elixir.bootlin.com/linux/v6.8/C/ident/f_mapping>!=folio
> <https://elixir.bootlin.com/linux/v6.8/C/ident/folio>->mapping
> <https://elixir.bootlin.com/linux/v6.8/C/ident/mapping>){
> return-EFAULT <https://elixir.bootlin.com/linux/v6.8/C/ident/EFAULT>;
> }
> 
> But, by providing nr=1 in vma_pgoff_address(fsdax_pgoff, 1, vma), it also avoided dealing with
> 
> wrap around, which perhaps doesn't matter? perhaps noone runs fsdax on 32-bit machine?

I don't understand what you're saying is wrong with this patch.
All I'm doing (apart from the renaming of ksm_addr to addr) is moving
the vma_pgoff_address() call from __add_to_kill() to its caller.
Is there some reason this doesn't work?
Jane Chu March 13, 2024, 6:11 p.m. UTC | #4
On 3/12/2024 8:23 PM, Matthew Wilcox wrote:

> On Tue, Mar 12, 2024 at 07:07:10PM -0700, Jane Chu wrote:
>> On 2/29/2024 1:20 PM, Matthew Wilcox (Oracle) wrote:
>>
>>> Unify the KSM and DAX codepaths by calculating the addr in
>>> add_to_kill_fsdax() instead of telling __add_to_kill() to calculate it.
>>>
>>> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
>>> Cc: Dan Williams <dan.j.williams@intel.com>
>>> ---
>>>    mm/memory-failure.c | 27 +++++++++------------------
>>>    1 file changed, 9 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
>>> index 9349948f1abf..9356227a50bb 100644
>>> --- a/mm/memory-failure.c
>>> +++ b/mm/memory-failure.c
>>> @@ -416,21 +416,13 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
>>>     * not much we can do.	We just print a message and ignore otherwise.
>>>     */
>>> -#define FSDAX_INVALID_PGOFF ULONG_MAX
>>> -
>>>    /*
>>>     * Schedule a process for later kill.
>>>     * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
>>> - *
>>> - * Note: @fsdax_pgoff is used only when @p is a fsdax page and a
>>> - * filesystem with a memory failure handler has claimed the
>>> - * memory_failure event. In all other cases, page->index and
>>> - * page->mapping are sufficient for mapping the page back to its
>>> - * corresponding user virtual address.
>>>     */
>>>    static void __add_to_kill(struct task_struct *tsk, struct page *p,
>>>    			  struct vm_area_struct *vma, struct list_head *to_kill,
>>> -			  unsigned long ksm_addr, pgoff_t fsdax_pgoff)
>>> +			  unsigned long addr)
>>>    {
>>>    	struct to_kill *tk;
>>> @@ -440,12 +432,10 @@ static void __add_to_kill(struct task_struct *tsk, struct page *p,
>>>    		return;
>>>    	}
>>> -	tk->addr = ksm_addr ? ksm_addr : page_address_in_vma(p, vma);
>>> -	if (is_zone_device_page(p)) {
>>> -		if (fsdax_pgoff != FSDAX_INVALID_PGOFF)
>>> -			tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
>>> +	tk->addr = addr ? addr : page_address_in_vma(p, vma);
>>> +	if (is_zone_device_page(p))
>>>    		tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
>> Not sure about the simplification.  The fsdax special calculation was added by
>>
>> commit c36e2024957120566efd99395b5c8cc95b5175c1
>> Author: Shiyang Ruan <ruansy.fnst@fujitsu.com>
>> Date:   Fri Jun 3 13:37:29 2022 +0800
>>
>>      mm: introduce mf_dax_kill_procs() for fsdax case
>>
>>      This new function is a variant of mf_generic_kill_procs that accepts a
>>      file, offset pair instead of a struct to support multiple files sharing a
>>      DAX mapping.  It is intended to be called by the file systems as part of
>>      the memory_failure handler after the file system performed a reverse
>>      mapping from the storage address to the file and file offset.
>>
>>      Link: https://lkml.kernel.org/r/20220603053738.1218681-6-ruansy.fnst@fujitsu.co
>> m
>> [..]
>>
>> @@ -354,9 +357,15 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
>>
>>          }
>>
>>          tk->addr = page_address_in_vma(p, vma);
>>
>> -       if (is_zone_device_page(p))
>>
>> -               tk->size_shift = dev_pagemap_mapping_shift(p, vma);
>>
>> -       else
>>
>> +       if (is_zone_device_page(p)) {
>>
>> +               /*
>>
>> +                * Since page->mapping is not used for fsdax, we need
>>
>> +                * calculate the address based on the vma.
>>
>> +                */
>>
>> +               if (p->pgmap->type == MEMORY_DEVICE_FS_DAX)
>>
>> +                       tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
>>
>> +               tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
>>
>> +       } else
>>
>>                  tk->size_shift = page_shift(compound_head(p));
>>
>> Looks like it was to deal away with this check
>>
>> unsignedlongpage_address_in_vma <https://elixir.bootlin.com/linux/v6.8/C/ident/page_address_in_vma>(structpage
>> <https://elixir.bootlin.com/linux/v6.8/C/ident/page>*page
>> <https://elixir.bootlin.com/linux/v6.8/C/ident/page>,structvm_area_struct
>> <https://elixir.bootlin.com/linux/v6.8/C/ident/vm_area_struct>*vma
>> <https://elixir.bootlin.com/linux/v6.8/C/ident/vma>)
>> { [..]
>>
>> }elseif(vma <https://elixir.bootlin.com/linux/v6.8/C/ident/vma>->vm_file
>> <https://elixir.bootlin.com/linux/v6.8/C/ident/vm_file>->f_mapping
>> <https://elixir.bootlin.com/linux/v6.8/C/ident/f_mapping>!=folio
>> <https://elixir.bootlin.com/linux/v6.8/C/ident/folio>->mapping
>> <https://elixir.bootlin.com/linux/v6.8/C/ident/mapping>){
>> return-EFAULT <https://elixir.bootlin.com/linux/v6.8/C/ident/EFAULT>;
>> }
>>
>> But, by providing nr=1 in vma_pgoff_address(fsdax_pgoff, 1, vma), it also avoided dealing with
>>
>> wrap around, which perhaps doesn't matter? perhaps noone runs fsdax on 32-bit machine?
> I don't understand what you're saying is wrong with this patch.
> All I'm doing (apart from the renaming of ksm_addr to addr) is moving
> the vma_pgoff_address() call from __add_to_kill() to its caller.
> Is there some reason this doesn't work?

Sorry for not being clear.   What I meant to say is that, before the 
patch, page_address_in_vma(p, vma) is the means for determining 
tk->addr, except for fsdax when filesystems made a claim of wanting to 
participate in the UE handling, in that case, 
vma_pgoff_address(fsdax_pgoff, 1, vma) is used instead. The difference 
between the two means is that, although the former eventually calls the 
latter _if_ vma->vm_file->f_mapping exists and is stable, what I am 
unclear from Shiyang Ruan's earlier patch is that, he seems to be 
concerning a case where f_mapping is not reliable, hence his patch went 
straight to call vma_pgoff_address(fsdax_pgoff, 1, vma), and on top of 
that, providing nr=1 to ignore the address wrap around case.

So I don't know whether removing the fsdax special case is okay.

thanks!

-jane
Matthew Wilcox (Oracle) March 14, 2024, 3:51 a.m. UTC | #5
On Wed, Mar 13, 2024 at 11:11:04AM -0700, Jane Chu wrote:
> On 3/12/2024 8:23 PM, Matthew Wilcox wrote:
> > I don't understand what you're saying is wrong with this patch.
> > All I'm doing (apart from the renaming of ksm_addr to addr) is moving
> > the vma_pgoff_address() call from __add_to_kill() to its caller.
> > Is there some reason this doesn't work?
> 
> Sorry for not being clear.   What I meant to say is that, before the patch,
> page_address_in_vma(p, vma) is the means for determining tk->addr, except
> for fsdax when filesystems made a claim of wanting to participate in the UE
> handling, in that case, vma_pgoff_address(fsdax_pgoff, 1, vma) is used
> instead. The difference between the two means is that, although the former
> eventually calls the latter _if_ vma->vm_file->f_mapping exists and is
> stable, what I am unclear from Shiyang Ruan's earlier patch is that, he
> seems to be concerning a case where f_mapping is not reliable, hence his
> patch went straight to call vma_pgoff_address(fsdax_pgoff, 1, vma), and on
> top of that, providing nr=1 to ignore the address wrap around case.
> 
> So I don't know whether removing the fsdax special case is okay.

I don't think I'm removing the fsdax special case, just moving it.

If I subtract out the renaming from this patch, it looks like:

 	tk->addr = addr ? addr : page_address_in_vma(p, vma);
-	if (is_zone_device_page(p)) {
-		if (fsdax_pgoff != FSDAX_INVALID_PGOFF)
-			tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
+	if (is_zone_device_page(p)) {
...
 {
-	__add_to_kill(tsk, p, vma, to_kill, 0, pgoff);
+	unsigned long addr = vma_pgoff_address(pgoff, 1, vma);
+	__add_to_kill(tsk, p, vma, to_kill, addr);
 }

So instead of passing in '0' as the addr from add_to_kill_fsdax(),
we do the address lookup in add_to_kill_fsdax() and pass it in.
That means we don't call page_address_in_vma() for DAX because
addr is not 0.
Jane Chu March 14, 2024, 5:54 p.m. UTC | #6
On 3/13/2024 8:51 PM, Matthew Wilcox wrote:

> On Wed, Mar 13, 2024 at 11:11:04AM -0700, Jane Chu wrote:
>> On 3/12/2024 8:23 PM, Matthew Wilcox wrote:
>>> I don't understand what you're saying is wrong with this patch.
>>> All I'm doing (apart from the renaming of ksm_addr to addr) is moving
>>> the vma_pgoff_address() call from __add_to_kill() to its caller.
>>> Is there some reason this doesn't work?
>> Sorry for not being clear.   What I meant to say is that, before the patch,
>> page_address_in_vma(p, vma) is the means for determining tk->addr, except
>> for fsdax when filesystems made a claim of wanting to participate in the UE
>> handling, in that case, vma_pgoff_address(fsdax_pgoff, 1, vma) is used
>> instead. The difference between the two means is that, although the former
>> eventually calls the latter _if_ vma->vm_file->f_mapping exists and is
>> stable, what I am unclear from Shiyang Ruan's earlier patch is that, he
>> seems to be concerning a case where f_mapping is not reliable, hence his
>> patch went straight to call vma_pgoff_address(fsdax_pgoff, 1, vma), and on
>> top of that, providing nr=1 to ignore the address wrap around case.
>>
>> So I don't know whether removing the fsdax special case is okay.
> I don't think I'm removing the fsdax special case, just moving it.
>
> If I subtract out the renaming from this patch, it looks like:
>
>   	tk->addr = addr ? addr : page_address_in_vma(p, vma);
> -	if (is_zone_device_page(p)) {
> -		if (fsdax_pgoff != FSDAX_INVALID_PGOFF)
> -			tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
> +	if (is_zone_device_page(p)) {
> ...
>   {
> -	__add_to_kill(tsk, p, vma, to_kill, 0, pgoff);
> +	unsigned long addr = vma_pgoff_address(pgoff, 1, vma);
> +	__add_to_kill(tsk, p, vma, to_kill, addr);
>   }
>
> So instead of passing in '0' as the addr from add_to_kill_fsdax(),
> we do the address lookup in add_to_kill_fsdax() and pass it in.
> That means we don't call page_address_in_vma() for DAX because
> addr is not 0.

You're right!  I overlooked the addr being passed in in fsdax case is 
different.

Thanks for taking the time explaining to me.

Reviewed-by: Jane Chu <jane.chu@oracle.com>

-jane
Dan Williams March 19, 2024, 12:36 a.m. UTC | #7
Matthew Wilcox (Oracle) wrote:
> Unify the KSM and DAX codepaths by calculating the addr in
> add_to_kill_fsdax() instead of telling __add_to_kill() to calculate it.
> 
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Cc: Dan Williams <dan.j.williams@intel.com>

Looks good, passes tests.

Reviewed-by: Dan Williams <dan.j.williams@intel.com>
diff mbox series

Patch

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 9349948f1abf..9356227a50bb 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -416,21 +416,13 @@  static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma,
  * not much we can do.	We just print a message and ignore otherwise.
  */
 
-#define FSDAX_INVALID_PGOFF ULONG_MAX
-
 /*
  * Schedule a process for later kill.
  * Uses GFP_ATOMIC allocations to avoid potential recursions in the VM.
- *
- * Note: @fsdax_pgoff is used only when @p is a fsdax page and a
- * filesystem with a memory failure handler has claimed the
- * memory_failure event. In all other cases, page->index and
- * page->mapping are sufficient for mapping the page back to its
- * corresponding user virtual address.
  */
 static void __add_to_kill(struct task_struct *tsk, struct page *p,
 			  struct vm_area_struct *vma, struct list_head *to_kill,
-			  unsigned long ksm_addr, pgoff_t fsdax_pgoff)
+			  unsigned long addr)
 {
 	struct to_kill *tk;
 
@@ -440,12 +432,10 @@  static void __add_to_kill(struct task_struct *tsk, struct page *p,
 		return;
 	}
 
-	tk->addr = ksm_addr ? ksm_addr : page_address_in_vma(p, vma);
-	if (is_zone_device_page(p)) {
-		if (fsdax_pgoff != FSDAX_INVALID_PGOFF)
-			tk->addr = vma_pgoff_address(fsdax_pgoff, 1, vma);
+	tk->addr = addr ? addr : page_address_in_vma(p, vma);
+	if (is_zone_device_page(p))
 		tk->size_shift = dev_pagemap_mapping_shift(vma, tk->addr);
-	} else
+	else
 		tk->size_shift = page_shift(compound_head(p));
 
 	/*
@@ -475,7 +465,7 @@  static void add_to_kill_anon_file(struct task_struct *tsk, struct page *p,
 				  struct vm_area_struct *vma,
 				  struct list_head *to_kill)
 {
-	__add_to_kill(tsk, p, vma, to_kill, 0, FSDAX_INVALID_PGOFF);
+	__add_to_kill(tsk, p, vma, to_kill, 0);
 }
 
 #ifdef CONFIG_KSM
@@ -493,10 +483,10 @@  static bool task_in_to_kill_list(struct list_head *to_kill,
 }
 void add_to_kill_ksm(struct task_struct *tsk, struct page *p,
 		     struct vm_area_struct *vma, struct list_head *to_kill,
-		     unsigned long ksm_addr)
+		     unsigned long addr)
 {
 	if (!task_in_to_kill_list(to_kill, tsk))
-		__add_to_kill(tsk, p, vma, to_kill, ksm_addr, FSDAX_INVALID_PGOFF);
+		__add_to_kill(tsk, p, vma, to_kill, addr);
 }
 #endif
 /*
@@ -670,7 +660,8 @@  static void add_to_kill_fsdax(struct task_struct *tsk, struct page *p,
 			      struct vm_area_struct *vma,
 			      struct list_head *to_kill, pgoff_t pgoff)
 {
-	__add_to_kill(tsk, p, vma, to_kill, 0, pgoff);
+	unsigned long addr = vma_pgoff_address(pgoff, 1, vma);
+	__add_to_kill(tsk, p, vma, to_kill, addr);
 }
 
 /*