diff mbox series

[v8,resend,1/2] mm: Add become_kswapd and restore_kswapd

Message ID 20201103131754.94949-2-laoar.shao@gmail.com (mailing list archive)
State New, archived
Headers show
Series avoid xfs transaction reservation recursion | expand

Commit Message

Yafang Shao Nov. 3, 2020, 1:17 p.m. UTC
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>

Since XFS needs to pretend to be kswapd in some of its worker threads,
create methods to save & restore kswapd state.  Don't bother restoring
kswapd state in kswapd -- the only time we reach this code is when we're
exiting and the task_struct is about to be destroyed anyway.

Cc: Dave Chinner <david@fromorbit.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 fs/xfs/libxfs/xfs_btree.c | 14 ++++++++------
 include/linux/sched/mm.h  | 23 +++++++++++++++++++++++
 mm/vmscan.c               | 16 +---------------
 3 files changed, 32 insertions(+), 21 deletions(-)

Comments

Darrick J. Wong Nov. 3, 2020, 7:48 p.m. UTC | #1
On Tue, Nov 03, 2020 at 09:17:53PM +0800, Yafang Shao wrote:
> From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> 
> Since XFS needs to pretend to be kswapd in some of its worker threads,
> create methods to save & restore kswapd state.  Don't bother restoring
> kswapd state in kswapd -- the only time we reach this code is when we're
> exiting and the task_struct is about to be destroyed anyway.
> 
> Cc: Dave Chinner <david@fromorbit.com>
> Acked-by: Michal Hocko <mhocko@suse.com>
> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
> Reviewed-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> ---
>  fs/xfs/libxfs/xfs_btree.c | 14 ++++++++------
>  include/linux/sched/mm.h  | 23 +++++++++++++++++++++++
>  mm/vmscan.c               | 16 +---------------
>  3 files changed, 32 insertions(+), 21 deletions(-)
> 
> diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
> index 2d25bab..a04a442 100644
> --- a/fs/xfs/libxfs/xfs_btree.c
> +++ b/fs/xfs/libxfs/xfs_btree.c
> @@ -2813,8 +2813,9 @@ struct xfs_btree_split_args {
>  {
>  	struct xfs_btree_split_args	*args = container_of(work,
>  						struct xfs_btree_split_args, work);
> +	bool			is_kswapd = args->kswapd;
>  	unsigned long		pflags;
> -	unsigned long		new_pflags = PF_MEMALLOC_NOFS;
> +	int			memalloc_nofs;
>  
>  	/*
>  	 * we are in a transaction context here, but may also be doing work
> @@ -2822,16 +2823,17 @@ struct xfs_btree_split_args {
>  	 * temporarily to ensure that we don't block waiting for memory reclaim
>  	 * in any way.
>  	 */
> -	if (args->kswapd)
> -		new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
> -
> -	current_set_flags_nested(&pflags, new_pflags);
> +	if (is_kswapd)
> +		pflags = become_kswapd();
> +	memalloc_nofs = memalloc_nofs_save();
>  
>  	args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
>  					 args->key, args->curp, args->stat);
>  	complete(args->done);
>  
> -	current_restore_flags_nested(&pflags, new_pflags);
> +	memalloc_nofs_restore(memalloc_nofs);
> +	if (is_kswapd)
> +		restore_kswapd(pflags);

Note that there's a trivial merge conflict with the mrlock_t removal
series.  I'll carry the fix in the tree, assuming that everything
passes.

--D

>  }
>  
>  /*
> diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
> index d5ece7a..2faf03e 100644
> --- a/include/linux/sched/mm.h
> +++ b/include/linux/sched/mm.h
> @@ -278,6 +278,29 @@ static inline void memalloc_nocma_restore(unsigned int flags)
>  }
>  #endif
>  
> +/*
> + * Tell the memory management code that this thread is working on behalf
> + * of background memory reclaim (like kswapd).  That means that it will
> + * get access to memory reserves should it need to allocate memory in
> + * order to make forward progress.  With this great power comes great
> + * responsibility to not exhaust those reserves.
> + */
> +#define KSWAPD_PF_FLAGS		(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD)
> +
> +static inline unsigned long become_kswapd(void)
> +{
> +	unsigned long flags = current->flags & KSWAPD_PF_FLAGS;
> +
> +	current->flags |= KSWAPD_PF_FLAGS;
> +
> +	return flags;
> +}
> +
> +static inline void restore_kswapd(unsigned long flags)
> +{
> +	current->flags &= ~(flags ^ KSWAPD_PF_FLAGS);
> +}
> +
>  #ifdef CONFIG_MEMCG
>  DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg);
>  /**
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 1b8f0e0..77bc1dd 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -3869,19 +3869,7 @@ static int kswapd(void *p)
>  	if (!cpumask_empty(cpumask))
>  		set_cpus_allowed_ptr(tsk, cpumask);
>  
> -	/*
> -	 * Tell the memory management that we're a "memory allocator",
> -	 * and that if we need more memory we should get access to it
> -	 * regardless (see "__alloc_pages()"). "kswapd" should
> -	 * never get caught in the normal page freeing logic.
> -	 *
> -	 * (Kswapd normally doesn't need memory anyway, but sometimes
> -	 * you need a small amount of memory in order to be able to
> -	 * page out something else, and this flag essentially protects
> -	 * us from recursively trying to free more memory as we're
> -	 * trying to free the first piece of memory in the first place).
> -	 */
> -	tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
> +	become_kswapd();
>  	set_freezable();
>  
>  	WRITE_ONCE(pgdat->kswapd_order, 0);
> @@ -3931,8 +3919,6 @@ static int kswapd(void *p)
>  			goto kswapd_try_sleep;
>  	}
>  
> -	tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
> -
>  	return 0;
>  }
>  
> -- 
> 1.8.3.1
>
Yafang Shao Nov. 4, 2020, 2:17 p.m. UTC | #2
On Wed, Nov 4, 2020 at 3:48 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> On Tue, Nov 03, 2020 at 09:17:53PM +0800, Yafang Shao wrote:
> > From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> >
> > Since XFS needs to pretend to be kswapd in some of its worker threads,
> > create methods to save & restore kswapd state.  Don't bother restoring
> > kswapd state in kswapd -- the only time we reach this code is when we're
> > exiting and the task_struct is about to be destroyed anyway.
> >
> > Cc: Dave Chinner <david@fromorbit.com>
> > Acked-by: Michal Hocko <mhocko@suse.com>
> > Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
> > Reviewed-by: Christoph Hellwig <hch@lst.de>
> > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> > ---
> >  fs/xfs/libxfs/xfs_btree.c | 14 ++++++++------
> >  include/linux/sched/mm.h  | 23 +++++++++++++++++++++++
> >  mm/vmscan.c               | 16 +---------------
> >  3 files changed, 32 insertions(+), 21 deletions(-)
> >
> > diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
> > index 2d25bab..a04a442 100644
> > --- a/fs/xfs/libxfs/xfs_btree.c
> > +++ b/fs/xfs/libxfs/xfs_btree.c
> > @@ -2813,8 +2813,9 @@ struct xfs_btree_split_args {
> >  {
> >       struct xfs_btree_split_args     *args = container_of(work,
> >                                               struct xfs_btree_split_args, work);
> > +     bool                    is_kswapd = args->kswapd;
> >       unsigned long           pflags;
> > -     unsigned long           new_pflags = PF_MEMALLOC_NOFS;
> > +     int                     memalloc_nofs;
> >
> >       /*
> >        * we are in a transaction context here, but may also be doing work
> > @@ -2822,16 +2823,17 @@ struct xfs_btree_split_args {
> >        * temporarily to ensure that we don't block waiting for memory reclaim
> >        * in any way.
> >        */
> > -     if (args->kswapd)
> > -             new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
> > -
> > -     current_set_flags_nested(&pflags, new_pflags);
> > +     if (is_kswapd)
> > +             pflags = become_kswapd();
> > +     memalloc_nofs = memalloc_nofs_save();
> >
> >       args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
> >                                        args->key, args->curp, args->stat);
> >       complete(args->done);
> >
> > -     current_restore_flags_nested(&pflags, new_pflags);
> > +     memalloc_nofs_restore(memalloc_nofs);
> > +     if (is_kswapd)
> > +             restore_kswapd(pflags);
>
> Note that there's a trivial merge conflict with the mrlock_t removal
> series.  I'll carry the fix in the tree, assuming that everything
> passes.
>

This patchset is based on Andrew's tree currently.
Seems I should rebase this patchset on your tree instead of Andrew's tree ?

> --D
>
> >  }
> >
> >  /*
> > diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
> > index d5ece7a..2faf03e 100644
> > --- a/include/linux/sched/mm.h
> > +++ b/include/linux/sched/mm.h
> > @@ -278,6 +278,29 @@ static inline void memalloc_nocma_restore(unsigned int flags)
> >  }
> >  #endif
> >
> > +/*
> > + * Tell the memory management code that this thread is working on behalf
> > + * of background memory reclaim (like kswapd).  That means that it will
> > + * get access to memory reserves should it need to allocate memory in
> > + * order to make forward progress.  With this great power comes great
> > + * responsibility to not exhaust those reserves.
> > + */
> > +#define KSWAPD_PF_FLAGS              (PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD)
> > +
> > +static inline unsigned long become_kswapd(void)
> > +{
> > +     unsigned long flags = current->flags & KSWAPD_PF_FLAGS;
> > +
> > +     current->flags |= KSWAPD_PF_FLAGS;
> > +
> > +     return flags;
> > +}
> > +
> > +static inline void restore_kswapd(unsigned long flags)
> > +{
> > +     current->flags &= ~(flags ^ KSWAPD_PF_FLAGS);
> > +}
> > +
> >  #ifdef CONFIG_MEMCG
> >  DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg);
> >  /**
> > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > index 1b8f0e0..77bc1dd 100644
> > --- a/mm/vmscan.c
> > +++ b/mm/vmscan.c
> > @@ -3869,19 +3869,7 @@ static int kswapd(void *p)
> >       if (!cpumask_empty(cpumask))
> >               set_cpus_allowed_ptr(tsk, cpumask);
> >
> > -     /*
> > -      * Tell the memory management that we're a "memory allocator",
> > -      * and that if we need more memory we should get access to it
> > -      * regardless (see "__alloc_pages()"). "kswapd" should
> > -      * never get caught in the normal page freeing logic.
> > -      *
> > -      * (Kswapd normally doesn't need memory anyway, but sometimes
> > -      * you need a small amount of memory in order to be able to
> > -      * page out something else, and this flag essentially protects
> > -      * us from recursively trying to free more memory as we're
> > -      * trying to free the first piece of memory in the first place).
> > -      */
> > -     tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
> > +     become_kswapd();
> >       set_freezable();
> >
> >       WRITE_ONCE(pgdat->kswapd_order, 0);
> > @@ -3931,8 +3919,6 @@ static int kswapd(void *p)
> >                       goto kswapd_try_sleep;
> >       }
> >
> > -     tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
> > -
> >       return 0;
> >  }
> >
> > --
> > 1.8.3.1
> >
Darrick J. Wong Nov. 4, 2020, 4:26 p.m. UTC | #3
On Wed, Nov 04, 2020 at 10:17:16PM +0800, Yafang Shao wrote:
> On Wed, Nov 4, 2020 at 3:48 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
> >
> > On Tue, Nov 03, 2020 at 09:17:53PM +0800, Yafang Shao wrote:
> > > From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> > >
> > > Since XFS needs to pretend to be kswapd in some of its worker threads,
> > > create methods to save & restore kswapd state.  Don't bother restoring
> > > kswapd state in kswapd -- the only time we reach this code is when we're
> > > exiting and the task_struct is about to be destroyed anyway.
> > >
> > > Cc: Dave Chinner <david@fromorbit.com>
> > > Acked-by: Michal Hocko <mhocko@suse.com>
> > > Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > Reviewed-by: Christoph Hellwig <hch@lst.de>
> > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> > > ---
> > >  fs/xfs/libxfs/xfs_btree.c | 14 ++++++++------
> > >  include/linux/sched/mm.h  | 23 +++++++++++++++++++++++
> > >  mm/vmscan.c               | 16 +---------------
> > >  3 files changed, 32 insertions(+), 21 deletions(-)
> > >
> > > diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
> > > index 2d25bab..a04a442 100644
> > > --- a/fs/xfs/libxfs/xfs_btree.c
> > > +++ b/fs/xfs/libxfs/xfs_btree.c
> > > @@ -2813,8 +2813,9 @@ struct xfs_btree_split_args {
> > >  {
> > >       struct xfs_btree_split_args     *args = container_of(work,
> > >                                               struct xfs_btree_split_args, work);
> > > +     bool                    is_kswapd = args->kswapd;
> > >       unsigned long           pflags;
> > > -     unsigned long           new_pflags = PF_MEMALLOC_NOFS;
> > > +     int                     memalloc_nofs;
> > >
> > >       /*
> > >        * we are in a transaction context here, but may also be doing work
> > > @@ -2822,16 +2823,17 @@ struct xfs_btree_split_args {
> > >        * temporarily to ensure that we don't block waiting for memory reclaim
> > >        * in any way.
> > >        */
> > > -     if (args->kswapd)
> > > -             new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
> > > -
> > > -     current_set_flags_nested(&pflags, new_pflags);
> > > +     if (is_kswapd)
> > > +             pflags = become_kswapd();
> > > +     memalloc_nofs = memalloc_nofs_save();
> > >
> > >       args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
> > >                                        args->key, args->curp, args->stat);
> > >       complete(args->done);
> > >
> > > -     current_restore_flags_nested(&pflags, new_pflags);
> > > +     memalloc_nofs_restore(memalloc_nofs);
> > > +     if (is_kswapd)
> > > +             restore_kswapd(pflags);
> >
> > Note that there's a trivial merge conflict with the mrlock_t removal
> > series.  I'll carry the fix in the tree, assuming that everything
> > passes.
> >
> 
> This patchset is based on Andrew's tree currently.
> Seems I should rebase this patchset on your tree instead of Andrew's tree ?

That depends on whether or not you want me to push these two patches
through the xfs tree or if they're going through Andrew (Morton?)'s
quiltset.

Frankly I'd rather take them via xfs since most of the diff is xfs...

--D

> > --D
> >
> > >  }
> > >
> > >  /*
> > > diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
> > > index d5ece7a..2faf03e 100644
> > > --- a/include/linux/sched/mm.h
> > > +++ b/include/linux/sched/mm.h
> > > @@ -278,6 +278,29 @@ static inline void memalloc_nocma_restore(unsigned int flags)
> > >  }
> > >  #endif
> > >
> > > +/*
> > > + * Tell the memory management code that this thread is working on behalf
> > > + * of background memory reclaim (like kswapd).  That means that it will
> > > + * get access to memory reserves should it need to allocate memory in
> > > + * order to make forward progress.  With this great power comes great
> > > + * responsibility to not exhaust those reserves.
> > > + */
> > > +#define KSWAPD_PF_FLAGS              (PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD)
> > > +
> > > +static inline unsigned long become_kswapd(void)
> > > +{
> > > +     unsigned long flags = current->flags & KSWAPD_PF_FLAGS;
> > > +
> > > +     current->flags |= KSWAPD_PF_FLAGS;
> > > +
> > > +     return flags;
> > > +}
> > > +
> > > +static inline void restore_kswapd(unsigned long flags)
> > > +{
> > > +     current->flags &= ~(flags ^ KSWAPD_PF_FLAGS);
> > > +}
> > > +
> > >  #ifdef CONFIG_MEMCG
> > >  DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg);
> > >  /**
> > > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > > index 1b8f0e0..77bc1dd 100644
> > > --- a/mm/vmscan.c
> > > +++ b/mm/vmscan.c
> > > @@ -3869,19 +3869,7 @@ static int kswapd(void *p)
> > >       if (!cpumask_empty(cpumask))
> > >               set_cpus_allowed_ptr(tsk, cpumask);
> > >
> > > -     /*
> > > -      * Tell the memory management that we're a "memory allocator",
> > > -      * and that if we need more memory we should get access to it
> > > -      * regardless (see "__alloc_pages()"). "kswapd" should
> > > -      * never get caught in the normal page freeing logic.
> > > -      *
> > > -      * (Kswapd normally doesn't need memory anyway, but sometimes
> > > -      * you need a small amount of memory in order to be able to
> > > -      * page out something else, and this flag essentially protects
> > > -      * us from recursively trying to free more memory as we're
> > > -      * trying to free the first piece of memory in the first place).
> > > -      */
> > > -     tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
> > > +     become_kswapd();
> > >       set_freezable();
> > >
> > >       WRITE_ONCE(pgdat->kswapd_order, 0);
> > > @@ -3931,8 +3919,6 @@ static int kswapd(void *p)
> > >                       goto kswapd_try_sleep;
> > >       }
> > >
> > > -     tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
> > > -
> > >       return 0;
> > >  }
> > >
> > > --
> > > 1.8.3.1
> > >
> 
> 
> 
> -- 
> Thanks
> Yafang
Amy Parker Nov. 4, 2020, 5:50 p.m. UTC | #4
On Wed, Nov 4, 2020 at 8:30 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> On Wed, Nov 04, 2020 at 10:17:16PM +0800, Yafang Shao wrote:
> > On Wed, Nov 4, 2020 at 3:48 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
> > >
> > > On Tue, Nov 03, 2020 at 09:17:53PM +0800, Yafang Shao wrote:
> > > > From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> > > >
> > > > Since XFS needs to pretend to be kswapd in some of its worker threads,
> > > > create methods to save & restore kswapd state.  Don't bother restoring
> > > > kswapd state in kswapd -- the only time we reach this code is when we're
> > > > exiting and the task_struct is about to be destroyed anyway.
> > > >
> > > > Cc: Dave Chinner <david@fromorbit.com>
> > > > Acked-by: Michal Hocko <mhocko@suse.com>
> > > > Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > Reviewed-by: Christoph Hellwig <hch@lst.de>
> > > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > > > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> > > > ---
> > > >  fs/xfs/libxfs/xfs_btree.c | 14 ++++++++------
> > > >  include/linux/sched/mm.h  | 23 +++++++++++++++++++++++
> > > >  mm/vmscan.c               | 16 +---------------
> > > >  3 files changed, 32 insertions(+), 21 deletions(-)
> > > >
> > > > diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
> > > > index 2d25bab..a04a442 100644
> > > > --- a/fs/xfs/libxfs/xfs_btree.c
> > > > +++ b/fs/xfs/libxfs/xfs_btree.c
> > > > @@ -2813,8 +2813,9 @@ struct xfs_btree_split_args {
> > > >  {
> > > >       struct xfs_btree_split_args     *args = container_of(work,
> > > >                                               struct xfs_btree_split_args, work);
> > > > +     bool                    is_kswapd = args->kswapd;
> > > >       unsigned long           pflags;
> > > > -     unsigned long           new_pflags = PF_MEMALLOC_NOFS;
> > > > +     int                     memalloc_nofs;
> > > >
> > > >       /*
> > > >        * we are in a transaction context here, but may also be doing work
> > > > @@ -2822,16 +2823,17 @@ struct xfs_btree_split_args {
> > > >        * temporarily to ensure that we don't block waiting for memory reclaim
> > > >        * in any way.
> > > >        */
> > > > -     if (args->kswapd)
> > > > -             new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
> > > > -
> > > > -     current_set_flags_nested(&pflags, new_pflags);
> > > > +     if (is_kswapd)
> > > > +             pflags = become_kswapd();
> > > > +     memalloc_nofs = memalloc_nofs_save();
> > > >
> > > >       args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
> > > >                                        args->key, args->curp, args->stat);
> > > >       complete(args->done);
> > > >
> > > > -     current_restore_flags_nested(&pflags, new_pflags);
> > > > +     memalloc_nofs_restore(memalloc_nofs);
> > > > +     if (is_kswapd)
> > > > +             restore_kswapd(pflags);
> > >
> > > Note that there's a trivial merge conflict with the mrlock_t removal
> > > series.  I'll carry the fix in the tree, assuming that everything
> > > passes.
> > >
> >
> > This patchset is based on Andrew's tree currently.
> > Seems I should rebase this patchset on your tree instead of Andrew's tree ?
>
> That depends on whether or not you want me to push these two patches
> through the xfs tree or if they're going through Andrew (Morton?)'s
> quiltset.

I think they could go through either, but the XFS tree might be ever-so-slightly
better.

>
> Frankly I'd rather take them via xfs since most of the diff is xfs...

Yeah, it's an XFS patch so let's throw it through the XFS tree.

>
> --D
>
> > > --D
> > >
> > > >  }
> > > >
> > > >  /*
> > > > diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
> > > > index d5ece7a..2faf03e 100644
> > > > --- a/include/linux/sched/mm.h
> > > > +++ b/include/linux/sched/mm.h
> > > > @@ -278,6 +278,29 @@ static inline void memalloc_nocma_restore(unsigned int flags)
> > > >  }
> > > >  #endif
> > > >
> > > > +/*
> > > > + * Tell the memory management code that this thread is working on behalf
> > > > + * of background memory reclaim (like kswapd).  That means that it will
> > > > + * get access to memory reserves should it need to allocate memory in
> > > > + * order to make forward progress.  With this great power comes great
> > > > + * responsibility to not exhaust those reserves.
> > > > + */
> > > > +#define KSWAPD_PF_FLAGS              (PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD)
> > > > +
> > > > +static inline unsigned long become_kswapd(void)
> > > > +{
> > > > +     unsigned long flags = current->flags & KSWAPD_PF_FLAGS;
> > > > +
> > > > +     current->flags |= KSWAPD_PF_FLAGS;
> > > > +
> > > > +     return flags;
> > > > +}
> > > > +
> > > > +static inline void restore_kswapd(unsigned long flags)
> > > > +{
> > > > +     current->flags &= ~(flags ^ KSWAPD_PF_FLAGS);
> > > > +}
> > > > +
> > > >  #ifdef CONFIG_MEMCG
> > > >  DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg);
> > > >  /**
> > > > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > > > index 1b8f0e0..77bc1dd 100644
> > > > --- a/mm/vmscan.c
> > > > +++ b/mm/vmscan.c
> > > > @@ -3869,19 +3869,7 @@ static int kswapd(void *p)
> > > >       if (!cpumask_empty(cpumask))
> > > >               set_cpus_allowed_ptr(tsk, cpumask);
> > > >
> > > > -     /*
> > > > -      * Tell the memory management that we're a "memory allocator",
> > > > -      * and that if we need more memory we should get access to it
> > > > -      * regardless (see "__alloc_pages()"). "kswapd" should
> > > > -      * never get caught in the normal page freeing logic.
> > > > -      *
> > > > -      * (Kswapd normally doesn't need memory anyway, but sometimes
> > > > -      * you need a small amount of memory in order to be able to
> > > > -      * page out something else, and this flag essentially protects
> > > > -      * us from recursively trying to free more memory as we're
> > > > -      * trying to free the first piece of memory in the first place).
> > > > -      */
> > > > -     tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
> > > > +     become_kswapd();
> > > >       set_freezable();
> > > >
> > > >       WRITE_ONCE(pgdat->kswapd_order, 0);
> > > > @@ -3931,8 +3919,6 @@ static int kswapd(void *p)
> > > >                       goto kswapd_try_sleep;
> > > >       }
> > > >
> > > > -     tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
> > > > -
> > > >       return 0;
> > > >  }
> > > >
> > > > --
> > > > 1.8.3.1
> > > >
> >
> >
> >
> > --
> > Thanks
> > Yafang

Best regards,
Amy Parker
(they/them)
Yafang Shao Nov. 5, 2020, 1:04 p.m. UTC | #5
On Thu, Nov 5, 2020 at 12:26 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
>
> On Wed, Nov 04, 2020 at 10:17:16PM +0800, Yafang Shao wrote:
> > On Wed, Nov 4, 2020 at 3:48 AM Darrick J. Wong <darrick.wong@oracle.com> wrote:
> > >
> > > On Tue, Nov 03, 2020 at 09:17:53PM +0800, Yafang Shao wrote:
> > > > From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
> > > >
> > > > Since XFS needs to pretend to be kswapd in some of its worker threads,
> > > > create methods to save & restore kswapd state.  Don't bother restoring
> > > > kswapd state in kswapd -- the only time we reach this code is when we're
> > > > exiting and the task_struct is about to be destroyed anyway.
> > > >
> > > > Cc: Dave Chinner <david@fromorbit.com>
> > > > Acked-by: Michal Hocko <mhocko@suse.com>
> > > > Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
> > > > Reviewed-by: Christoph Hellwig <hch@lst.de>
> > > > Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
> > > > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> > > > ---
> > > >  fs/xfs/libxfs/xfs_btree.c | 14 ++++++++------
> > > >  include/linux/sched/mm.h  | 23 +++++++++++++++++++++++
> > > >  mm/vmscan.c               | 16 +---------------
> > > >  3 files changed, 32 insertions(+), 21 deletions(-)
> > > >
> > > > diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
> > > > index 2d25bab..a04a442 100644
> > > > --- a/fs/xfs/libxfs/xfs_btree.c
> > > > +++ b/fs/xfs/libxfs/xfs_btree.c
> > > > @@ -2813,8 +2813,9 @@ struct xfs_btree_split_args {
> > > >  {
> > > >       struct xfs_btree_split_args     *args = container_of(work,
> > > >                                               struct xfs_btree_split_args, work);
> > > > +     bool                    is_kswapd = args->kswapd;
> > > >       unsigned long           pflags;
> > > > -     unsigned long           new_pflags = PF_MEMALLOC_NOFS;
> > > > +     int                     memalloc_nofs;
> > > >
> > > >       /*
> > > >        * we are in a transaction context here, but may also be doing work
> > > > @@ -2822,16 +2823,17 @@ struct xfs_btree_split_args {
> > > >        * temporarily to ensure that we don't block waiting for memory reclaim
> > > >        * in any way.
> > > >        */
> > > > -     if (args->kswapd)
> > > > -             new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
> > > > -
> > > > -     current_set_flags_nested(&pflags, new_pflags);
> > > > +     if (is_kswapd)
> > > > +             pflags = become_kswapd();
> > > > +     memalloc_nofs = memalloc_nofs_save();
> > > >
> > > >       args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
> > > >                                        args->key, args->curp, args->stat);
> > > >       complete(args->done);
> > > >
> > > > -     current_restore_flags_nested(&pflags, new_pflags);
> > > > +     memalloc_nofs_restore(memalloc_nofs);
> > > > +     if (is_kswapd)
> > > > +             restore_kswapd(pflags);
> > >
> > > Note that there's a trivial merge conflict with the mrlock_t removal
> > > series.  I'll carry the fix in the tree, assuming that everything
> > > passes.
> > >
> >
> > This patchset is based on Andrew's tree currently.
> > Seems I should rebase this patchset on your tree instead of Andrew's tree ?
>
> That depends on whether or not you want me to push these two patches
> through the xfs tree or if they're going through Andrew (Morton?)'s
> quiltset.
>
> Frankly I'd rather take them via xfs since most of the diff is xfs...
>

Sure, I will rebase in on the xfs tree.

> > >
> > > >  }
> > > >
> > > >  /*
> > > > diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
> > > > index d5ece7a..2faf03e 100644
> > > > --- a/include/linux/sched/mm.h
> > > > +++ b/include/linux/sched/mm.h
> > > > @@ -278,6 +278,29 @@ static inline void memalloc_nocma_restore(unsigned int flags)
> > > >  }
> > > >  #endif
> > > >
> > > > +/*
> > > > + * Tell the memory management code that this thread is working on behalf
> > > > + * of background memory reclaim (like kswapd).  That means that it will
> > > > + * get access to memory reserves should it need to allocate memory in
> > > > + * order to make forward progress.  With this great power comes great
> > > > + * responsibility to not exhaust those reserves.
> > > > + */
> > > > +#define KSWAPD_PF_FLAGS              (PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD)
> > > > +
> > > > +static inline unsigned long become_kswapd(void)
> > > > +{
> > > > +     unsigned long flags = current->flags & KSWAPD_PF_FLAGS;
> > > > +
> > > > +     current->flags |= KSWAPD_PF_FLAGS;
> > > > +
> > > > +     return flags;
> > > > +}
> > > > +
> > > > +static inline void restore_kswapd(unsigned long flags)
> > > > +{
> > > > +     current->flags &= ~(flags ^ KSWAPD_PF_FLAGS);
> > > > +}
> > > > +
> > > >  #ifdef CONFIG_MEMCG
> > > >  DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg);
> > > >  /**
> > > > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > > > index 1b8f0e0..77bc1dd 100644
> > > > --- a/mm/vmscan.c
> > > > +++ b/mm/vmscan.c
> > > > @@ -3869,19 +3869,7 @@ static int kswapd(void *p)
> > > >       if (!cpumask_empty(cpumask))
> > > >               set_cpus_allowed_ptr(tsk, cpumask);
> > > >
> > > > -     /*
> > > > -      * Tell the memory management that we're a "memory allocator",
> > > > -      * and that if we need more memory we should get access to it
> > > > -      * regardless (see "__alloc_pages()"). "kswapd" should
> > > > -      * never get caught in the normal page freeing logic.
> > > > -      *
> > > > -      * (Kswapd normally doesn't need memory anyway, but sometimes
> > > > -      * you need a small amount of memory in order to be able to
> > > > -      * page out something else, and this flag essentially protects
> > > > -      * us from recursively trying to free more memory as we're
> > > > -      * trying to free the first piece of memory in the first place).
> > > > -      */
> > > > -     tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
> > > > +     become_kswapd();
> > > >       set_freezable();
> > > >
> > > >       WRITE_ONCE(pgdat->kswapd_order, 0);
> > > > @@ -3931,8 +3919,6 @@ static int kswapd(void *p)
> > > >                       goto kswapd_try_sleep;
> > > >       }
> > > >
> > > > -     tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
> > > > -
> > > >       return 0;
> > > >  }
> > > >
> > > > --
> > > > 1.8.3.1
> > > >
> >
> >
> >
> > --
> > Thanks
> > Yafang
diff mbox series

Patch

diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 2d25bab..a04a442 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -2813,8 +2813,9 @@  struct xfs_btree_split_args {
 {
 	struct xfs_btree_split_args	*args = container_of(work,
 						struct xfs_btree_split_args, work);
+	bool			is_kswapd = args->kswapd;
 	unsigned long		pflags;
-	unsigned long		new_pflags = PF_MEMALLOC_NOFS;
+	int			memalloc_nofs;
 
 	/*
 	 * we are in a transaction context here, but may also be doing work
@@ -2822,16 +2823,17 @@  struct xfs_btree_split_args {
 	 * temporarily to ensure that we don't block waiting for memory reclaim
 	 * in any way.
 	 */
-	if (args->kswapd)
-		new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
-
-	current_set_flags_nested(&pflags, new_pflags);
+	if (is_kswapd)
+		pflags = become_kswapd();
+	memalloc_nofs = memalloc_nofs_save();
 
 	args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
 					 args->key, args->curp, args->stat);
 	complete(args->done);
 
-	current_restore_flags_nested(&pflags, new_pflags);
+	memalloc_nofs_restore(memalloc_nofs);
+	if (is_kswapd)
+		restore_kswapd(pflags);
 }
 
 /*
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index d5ece7a..2faf03e 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -278,6 +278,29 @@  static inline void memalloc_nocma_restore(unsigned int flags)
 }
 #endif
 
+/*
+ * Tell the memory management code that this thread is working on behalf
+ * of background memory reclaim (like kswapd).  That means that it will
+ * get access to memory reserves should it need to allocate memory in
+ * order to make forward progress.  With this great power comes great
+ * responsibility to not exhaust those reserves.
+ */
+#define KSWAPD_PF_FLAGS		(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD)
+
+static inline unsigned long become_kswapd(void)
+{
+	unsigned long flags = current->flags & KSWAPD_PF_FLAGS;
+
+	current->flags |= KSWAPD_PF_FLAGS;
+
+	return flags;
+}
+
+static inline void restore_kswapd(unsigned long flags)
+{
+	current->flags &= ~(flags ^ KSWAPD_PF_FLAGS);
+}
+
 #ifdef CONFIG_MEMCG
 DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg);
 /**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1b8f0e0..77bc1dd 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3869,19 +3869,7 @@  static int kswapd(void *p)
 	if (!cpumask_empty(cpumask))
 		set_cpus_allowed_ptr(tsk, cpumask);
 
-	/*
-	 * Tell the memory management that we're a "memory allocator",
-	 * and that if we need more memory we should get access to it
-	 * regardless (see "__alloc_pages()"). "kswapd" should
-	 * never get caught in the normal page freeing logic.
-	 *
-	 * (Kswapd normally doesn't need memory anyway, but sometimes
-	 * you need a small amount of memory in order to be able to
-	 * page out something else, and this flag essentially protects
-	 * us from recursively trying to free more memory as we're
-	 * trying to free the first piece of memory in the first place).
-	 */
-	tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
+	become_kswapd();
 	set_freezable();
 
 	WRITE_ONCE(pgdat->kswapd_order, 0);
@@ -3931,8 +3919,6 @@  static int kswapd(void *p)
 			goto kswapd_try_sleep;
 	}
 
-	tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD);
-
 	return 0;
 }