[04/30] xfs: mark inode buffers in cache
diff mbox series

Message ID 20200601214251.4167140-5-david@fromorbit.com
State Superseded
Headers show
Series
  • xfs: rework inode flushing to make inode reclaim fully asynchronous
Related show

Commit Message

Dave Chinner June 1, 2020, 9:42 p.m. UTC
From: Dave Chinner <dchinner@redhat.com>

Inode buffers always have write IO callbacks, so by marking them
directly we can avoid needing to attach ->b_iodone functions to
them. This avoids an indirect call, and makes future modifications
much simpler.

This is largely a rearrangement of the code at this point - no IO
completion functionality changes at this point, just how the
code is run is modified.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_buf.c       | 21 ++++++++++++++++-----
 fs/xfs/xfs_buf.h       | 38 +++++++++++++++++++++++++-------------
 fs/xfs/xfs_buf_item.c  | 42 +++++++++++++++++++++++++++++++-----------
 fs/xfs/xfs_buf_item.h  |  1 +
 fs/xfs/xfs_inode.c     |  2 +-
 fs/xfs/xfs_trans_buf.c |  3 +++
 6 files changed, 77 insertions(+), 30 deletions(-)

Comments

Brian Foster June 2, 2020, 4:45 p.m. UTC | #1
On Tue, Jun 02, 2020 at 07:42:25AM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> Inode buffers always have write IO callbacks, so by marking them
> directly we can avoid needing to attach ->b_iodone functions to
> them. This avoids an indirect call, and makes future modifications
> much simpler.
> 
> This is largely a rearrangement of the code at this point - no IO
> completion functionality changes at this point, just how the
> code is run is modified.
> 

Ok, I was initially thinking this patch looked incomplete in that we
continue to set ->b_iodone() on inode buffers even though we'd never
call it. Looking ahead, I see that the next few patches continue to
clean that up to eventually remove ->b_iodone(), so that addresses that.

My only other curiosity is that while there may not be any functional
difference, this technically changes callback behavior in that we set
the new flag in some contexts that don't currently attach anything to
the buffer, right? E.g., xfs_trans_inode_alloc_buf() sets the flag on
inode chunk init, which means we can write out an inode buffer without
any attached/flushed inodes. Is the intent of that to support future
changes? If so, a note about that in the commit log would be helpful.

Brian

> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_buf.c       | 21 ++++++++++++++++-----
>  fs/xfs/xfs_buf.h       | 38 +++++++++++++++++++++++++-------------
>  fs/xfs/xfs_buf_item.c  | 42 +++++++++++++++++++++++++++++++-----------
>  fs/xfs/xfs_buf_item.h  |  1 +
>  fs/xfs/xfs_inode.c     |  2 +-
>  fs/xfs/xfs_trans_buf.c |  3 +++
>  6 files changed, 77 insertions(+), 30 deletions(-)
> 
> diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> index 9c2fbb6bbf89d..fcf650575be61 100644
> --- a/fs/xfs/xfs_buf.c
> +++ b/fs/xfs/xfs_buf.c
> @@ -14,6 +14,8 @@
>  #include "xfs_mount.h"
>  #include "xfs_trace.h"
>  #include "xfs_log.h"
> +#include "xfs_trans.h"
> +#include "xfs_buf_item.h"
>  #include "xfs_errortag.h"
>  #include "xfs_error.h"
>  
> @@ -1202,12 +1204,21 @@ xfs_buf_ioend(
>  		bp->b_flags |= XBF_DONE;
>  	}
>  
> -	if (bp->b_iodone)
> +	if (read)
> +		goto out_finish;
> +
> +	if (bp->b_flags & _XBF_INODES) {
> +		xfs_buf_inode_iodone(bp);
> +		return;
> +	}
> +
> +	if (bp->b_iodone) {
>  		(*(bp->b_iodone))(bp);
> -	else if (bp->b_flags & XBF_ASYNC)
> -		xfs_buf_relse(bp);
> -	else
> -		complete(&bp->b_iowait);
> +		return;
> +	}
> +
> +out_finish:
> +	xfs_buf_ioend_finish(bp);
>  }
>  
>  static void
> diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> index 050c53b739e24..2400cb90a04c6 100644
> --- a/fs/xfs/xfs_buf.h
> +++ b/fs/xfs/xfs_buf.h
> @@ -30,15 +30,18 @@
>  #define XBF_STALE	 (1 << 6) /* buffer has been staled, do not find it */
>  #define XBF_WRITE_FAIL	 (1 << 7) /* async writes have failed on this buffer */
>  
> -/* flags used only as arguments to access routines */
> -#define XBF_TRYLOCK	 (1 << 16)/* lock requested, but do not wait */
> -#define XBF_UNMAPPED	 (1 << 17)/* do not map the buffer */
> +/* buffer type flags for write callbacks */
> +#define _XBF_INODES	 (1 << 16)/* inode buffer */
>  
>  /* flags used only internally */
>  #define _XBF_PAGES	 (1 << 20)/* backed by refcounted pages */
>  #define _XBF_KMEM	 (1 << 21)/* backed by heap memory */
>  #define _XBF_DELWRI_Q	 (1 << 22)/* buffer on a delwri queue */
>  
> +/* flags used only as arguments to access routines */
> +#define XBF_TRYLOCK	 (1 << 30)/* lock requested, but do not wait */
> +#define XBF_UNMAPPED	 (1 << 31)/* do not map the buffer */
> +
>  typedef unsigned int xfs_buf_flags_t;
>  
>  #define XFS_BUF_FLAGS \
> @@ -50,12 +53,13 @@ typedef unsigned int xfs_buf_flags_t;
>  	{ XBF_DONE,		"DONE" }, \
>  	{ XBF_STALE,		"STALE" }, \
>  	{ XBF_WRITE_FAIL,	"WRITE_FAIL" }, \
> -	{ XBF_TRYLOCK,		"TRYLOCK" },	/* should never be set */\
> -	{ XBF_UNMAPPED,		"UNMAPPED" },	/* ditto */\
> +	{ _XBF_INODES,		"INODES" }, \
>  	{ _XBF_PAGES,		"PAGES" }, \
>  	{ _XBF_KMEM,		"KMEM" }, \
> -	{ _XBF_DELWRI_Q,	"DELWRI_Q" }
> -
> +	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
> +	/* The following interface flags should never be set */ \
> +	{ XBF_TRYLOCK,		"TRYLOCK" }, \
> +	{ XBF_UNMAPPED,		"UNMAPPED" }
>  
>  /*
>   * Internal state flags.
> @@ -257,9 +261,23 @@ extern void xfs_buf_unlock(xfs_buf_t *);
>  #define xfs_buf_islocked(bp) \
>  	((bp)->b_sema.count <= 0)
>  
> +static inline void xfs_buf_relse(xfs_buf_t *bp)
> +{
> +	xfs_buf_unlock(bp);
> +	xfs_buf_rele(bp);
> +}
> +
>  /* Buffer Read and Write Routines */
>  extern int xfs_bwrite(struct xfs_buf *bp);
>  extern void xfs_buf_ioend(struct xfs_buf *bp);
> +static inline void xfs_buf_ioend_finish(struct xfs_buf *bp)
> +{
> +	if (bp->b_flags & XBF_ASYNC)
> +		xfs_buf_relse(bp);
> +	else
> +		complete(&bp->b_iowait);
> +}
> +
>  extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
>  		xfs_failaddr_t failaddr);
>  #define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address)
> @@ -324,12 +342,6 @@ static inline int xfs_buf_ispinned(struct xfs_buf *bp)
>  	return atomic_read(&bp->b_pin_count);
>  }
>  
> -static inline void xfs_buf_relse(xfs_buf_t *bp)
> -{
> -	xfs_buf_unlock(bp);
> -	xfs_buf_rele(bp);
> -}
> -
>  static inline int
>  xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
>  {
> diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
> index 9e75e8d6042ec..8659cf4282a64 100644
> --- a/fs/xfs/xfs_buf_item.c
> +++ b/fs/xfs/xfs_buf_item.c
> @@ -1158,20 +1158,15 @@ xfs_buf_iodone_callback_error(
>  	return false;
>  }
>  
> -/*
> - * This is the iodone() function for buffers which have had callbacks attached
> - * to them by xfs_buf_attach_iodone(). We need to iterate the items on the
> - * callback list, mark the buffer as having no more callbacks and then push the
> - * buffer through IO completion processing.
> - */
> -void
> -xfs_buf_iodone_callbacks(
> +static void
> +xfs_buf_run_callbacks(
>  	struct xfs_buf		*bp)
>  {
> +
>  	/*
> -	 * If there is an error, process it. Some errors require us
> -	 * to run callbacks after failure processing is done so we
> -	 * detect that and take appropriate action.
> +	 * If there is an error, process it. Some errors require us to run
> +	 * callbacks after failure processing is done so we detect that and take
> +	 * appropriate action.
>  	 */
>  	if (bp->b_error && xfs_buf_iodone_callback_error(bp))
>  		return;
> @@ -1188,9 +1183,34 @@ xfs_buf_iodone_callbacks(
>  	bp->b_log_item = NULL;
>  	list_del_init(&bp->b_li_list);
>  	bp->b_iodone = NULL;
> +}
> +
> +/*
> + * This is the iodone() function for buffers which have had callbacks attached
> + * to them by xfs_buf_attach_iodone(). We need to iterate the items on the
> + * callback list, mark the buffer as having no more callbacks and then push the
> + * buffer through IO completion processing.
> + */
> +void
> +xfs_buf_iodone_callbacks(
> +	struct xfs_buf		*bp)
> +{
> +	xfs_buf_run_callbacks(bp);
>  	xfs_buf_ioend(bp);
>  }
>  
> +/*
> + * Inode buffer iodone callback function.
> + */
> +void
> +xfs_buf_inode_iodone(
> +	struct xfs_buf		*bp)
> +{
> +	xfs_buf_run_callbacks(bp);
> +	xfs_buf_ioend_finish(bp);
> +}
> +
> +
>  /*
>   * This is the iodone() function for buffers which have been
>   * logged.  It is called when they are eventually flushed out.
> diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
> index c9c57e2da9327..a342933ad9b8d 100644
> --- a/fs/xfs/xfs_buf_item.h
> +++ b/fs/xfs/xfs_buf_item.h
> @@ -59,6 +59,7 @@ void	xfs_buf_attach_iodone(struct xfs_buf *,
>  			      struct xfs_log_item *);
>  void	xfs_buf_iodone_callbacks(struct xfs_buf *);
>  void	xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
> +void	xfs_buf_inode_iodone(struct xfs_buf *);
>  bool	xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec);
>  
>  extern kmem_zone_t	*xfs_buf_item_zone;
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index ac3c8af8c9a14..d5dee57f914a9 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -3860,13 +3860,13 @@ xfs_iflush_int(
>  	 * completion on the buffer to remove the inode from the AIL and release
>  	 * the flush lock.
>  	 */
> +	bp->b_flags |= _XBF_INODES;
>  	xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
>  
>  	/* generate the checksum. */
>  	xfs_dinode_calc_crc(mp, dip);
>  
>  	ASSERT(!list_empty(&bp->b_li_list));
> -	ASSERT(bp->b_iodone != NULL);
>  	return error;
>  }
>  
> diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
> index 08174ffa21189..552d0869aa0fe 100644
> --- a/fs/xfs/xfs_trans_buf.c
> +++ b/fs/xfs/xfs_trans_buf.c
> @@ -626,6 +626,7 @@ xfs_trans_inode_buf(
>  	ASSERT(atomic_read(&bip->bli_refcount) > 0);
>  
>  	bip->bli_flags |= XFS_BLI_INODE_BUF;
> +	bp->b_flags |= _XBF_INODES;
>  	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
>  }
>  
> @@ -651,6 +652,7 @@ xfs_trans_stale_inode_buf(
>  
>  	bip->bli_flags |= XFS_BLI_STALE_INODE;
>  	bip->bli_item.li_cb = xfs_buf_iodone;
> +	bp->b_flags |= _XBF_INODES;
>  	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
>  }
>  
> @@ -675,6 +677,7 @@ xfs_trans_inode_alloc_buf(
>  	ASSERT(atomic_read(&bip->bli_refcount) > 0);
>  
>  	bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
> +	bp->b_flags |= _XBF_INODES;
>  	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
>  }
>  
> -- 
> 2.26.2.761.g0e0b3e54be
>
Darrick J. Wong June 2, 2020, 7:22 p.m. UTC | #2
On Tue, Jun 02, 2020 at 12:45:35PM -0400, Brian Foster wrote:
> On Tue, Jun 02, 2020 at 07:42:25AM +1000, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > Inode buffers always have write IO callbacks, so by marking them
> > directly we can avoid needing to attach ->b_iodone functions to
> > them. This avoids an indirect call, and makes future modifications
> > much simpler.
> > 
> > This is largely a rearrangement of the code at this point - no IO
> > completion functionality changes at this point, just how the
> > code is run is modified.
> > 
> 
> Ok, I was initially thinking this patch looked incomplete in that we
> continue to set ->b_iodone() on inode buffers even though we'd never
> call it. Looking ahead, I see that the next few patches continue to
> clean that up to eventually remove ->b_iodone(), so that addresses that.
> 
> My only other curiosity is that while there may not be any functional
> difference, this technically changes callback behavior in that we set
> the new flag in some contexts that don't currently attach anything to
> the buffer, right? E.g., xfs_trans_inode_alloc_buf() sets the flag on
> inode chunk init, which means we can write out an inode buffer without
> any attached/flushed inodes. Is the intent of that to support future
> changes? If so, a note about that in the commit log would be helpful.

I had kinda wondered that myself...  I /think/ in the
xfs_trans_inode_alloc_buf case there won't be any inodes attached
because we mark the buffer delwri (v4) or ordered (v5) so the buffer
should get written out before we ever get the chance to attach inodes;
and in the stale case, the inodes were already staled so we're done
writing them?

--D

> Brian
> 
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > ---
> >  fs/xfs/xfs_buf.c       | 21 ++++++++++++++++-----
> >  fs/xfs/xfs_buf.h       | 38 +++++++++++++++++++++++++-------------
> >  fs/xfs/xfs_buf_item.c  | 42 +++++++++++++++++++++++++++++++-----------
> >  fs/xfs/xfs_buf_item.h  |  1 +
> >  fs/xfs/xfs_inode.c     |  2 +-
> >  fs/xfs/xfs_trans_buf.c |  3 +++
> >  6 files changed, 77 insertions(+), 30 deletions(-)
> > 
> > diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
> > index 9c2fbb6bbf89d..fcf650575be61 100644
> > --- a/fs/xfs/xfs_buf.c
> > +++ b/fs/xfs/xfs_buf.c
> > @@ -14,6 +14,8 @@
> >  #include "xfs_mount.h"
> >  #include "xfs_trace.h"
> >  #include "xfs_log.h"
> > +#include "xfs_trans.h"
> > +#include "xfs_buf_item.h"
> >  #include "xfs_errortag.h"
> >  #include "xfs_error.h"
> >  
> > @@ -1202,12 +1204,21 @@ xfs_buf_ioend(
> >  		bp->b_flags |= XBF_DONE;
> >  	}
> >  
> > -	if (bp->b_iodone)
> > +	if (read)
> > +		goto out_finish;
> > +
> > +	if (bp->b_flags & _XBF_INODES) {
> > +		xfs_buf_inode_iodone(bp);
> > +		return;
> > +	}
> > +
> > +	if (bp->b_iodone) {
> >  		(*(bp->b_iodone))(bp);
> > -	else if (bp->b_flags & XBF_ASYNC)
> > -		xfs_buf_relse(bp);
> > -	else
> > -		complete(&bp->b_iowait);
> > +		return;
> > +	}
> > +
> > +out_finish:
> > +	xfs_buf_ioend_finish(bp);
> >  }
> >  
> >  static void
> > diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
> > index 050c53b739e24..2400cb90a04c6 100644
> > --- a/fs/xfs/xfs_buf.h
> > +++ b/fs/xfs/xfs_buf.h
> > @@ -30,15 +30,18 @@
> >  #define XBF_STALE	 (1 << 6) /* buffer has been staled, do not find it */
> >  #define XBF_WRITE_FAIL	 (1 << 7) /* async writes have failed on this buffer */
> >  
> > -/* flags used only as arguments to access routines */
> > -#define XBF_TRYLOCK	 (1 << 16)/* lock requested, but do not wait */
> > -#define XBF_UNMAPPED	 (1 << 17)/* do not map the buffer */
> > +/* buffer type flags for write callbacks */
> > +#define _XBF_INODES	 (1 << 16)/* inode buffer */
> >  
> >  /* flags used only internally */
> >  #define _XBF_PAGES	 (1 << 20)/* backed by refcounted pages */
> >  #define _XBF_KMEM	 (1 << 21)/* backed by heap memory */
> >  #define _XBF_DELWRI_Q	 (1 << 22)/* buffer on a delwri queue */
> >  
> > +/* flags used only as arguments to access routines */
> > +#define XBF_TRYLOCK	 (1 << 30)/* lock requested, but do not wait */
> > +#define XBF_UNMAPPED	 (1 << 31)/* do not map the buffer */
> > +
> >  typedef unsigned int xfs_buf_flags_t;
> >  
> >  #define XFS_BUF_FLAGS \
> > @@ -50,12 +53,13 @@ typedef unsigned int xfs_buf_flags_t;
> >  	{ XBF_DONE,		"DONE" }, \
> >  	{ XBF_STALE,		"STALE" }, \
> >  	{ XBF_WRITE_FAIL,	"WRITE_FAIL" }, \
> > -	{ XBF_TRYLOCK,		"TRYLOCK" },	/* should never be set */\
> > -	{ XBF_UNMAPPED,		"UNMAPPED" },	/* ditto */\
> > +	{ _XBF_INODES,		"INODES" }, \
> >  	{ _XBF_PAGES,		"PAGES" }, \
> >  	{ _XBF_KMEM,		"KMEM" }, \
> > -	{ _XBF_DELWRI_Q,	"DELWRI_Q" }
> > -
> > +	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
> > +	/* The following interface flags should never be set */ \
> > +	{ XBF_TRYLOCK,		"TRYLOCK" }, \
> > +	{ XBF_UNMAPPED,		"UNMAPPED" }
> >  
> >  /*
> >   * Internal state flags.
> > @@ -257,9 +261,23 @@ extern void xfs_buf_unlock(xfs_buf_t *);
> >  #define xfs_buf_islocked(bp) \
> >  	((bp)->b_sema.count <= 0)
> >  
> > +static inline void xfs_buf_relse(xfs_buf_t *bp)
> > +{
> > +	xfs_buf_unlock(bp);
> > +	xfs_buf_rele(bp);
> > +}
> > +
> >  /* Buffer Read and Write Routines */
> >  extern int xfs_bwrite(struct xfs_buf *bp);
> >  extern void xfs_buf_ioend(struct xfs_buf *bp);
> > +static inline void xfs_buf_ioend_finish(struct xfs_buf *bp)
> > +{
> > +	if (bp->b_flags & XBF_ASYNC)
> > +		xfs_buf_relse(bp);
> > +	else
> > +		complete(&bp->b_iowait);
> > +}
> > +
> >  extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
> >  		xfs_failaddr_t failaddr);
> >  #define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address)
> > @@ -324,12 +342,6 @@ static inline int xfs_buf_ispinned(struct xfs_buf *bp)
> >  	return atomic_read(&bp->b_pin_count);
> >  }
> >  
> > -static inline void xfs_buf_relse(xfs_buf_t *bp)
> > -{
> > -	xfs_buf_unlock(bp);
> > -	xfs_buf_rele(bp);
> > -}
> > -
> >  static inline int
> >  xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
> >  {
> > diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
> > index 9e75e8d6042ec..8659cf4282a64 100644
> > --- a/fs/xfs/xfs_buf_item.c
> > +++ b/fs/xfs/xfs_buf_item.c
> > @@ -1158,20 +1158,15 @@ xfs_buf_iodone_callback_error(
> >  	return false;
> >  }
> >  
> > -/*
> > - * This is the iodone() function for buffers which have had callbacks attached
> > - * to them by xfs_buf_attach_iodone(). We need to iterate the items on the
> > - * callback list, mark the buffer as having no more callbacks and then push the
> > - * buffer through IO completion processing.
> > - */
> > -void
> > -xfs_buf_iodone_callbacks(
> > +static void
> > +xfs_buf_run_callbacks(
> >  	struct xfs_buf		*bp)
> >  {
> > +
> >  	/*
> > -	 * If there is an error, process it. Some errors require us
> > -	 * to run callbacks after failure processing is done so we
> > -	 * detect that and take appropriate action.
> > +	 * If there is an error, process it. Some errors require us to run
> > +	 * callbacks after failure processing is done so we detect that and take
> > +	 * appropriate action.
> >  	 */
> >  	if (bp->b_error && xfs_buf_iodone_callback_error(bp))
> >  		return;
> > @@ -1188,9 +1183,34 @@ xfs_buf_iodone_callbacks(
> >  	bp->b_log_item = NULL;
> >  	list_del_init(&bp->b_li_list);
> >  	bp->b_iodone = NULL;
> > +}
> > +
> > +/*
> > + * This is the iodone() function for buffers which have had callbacks attached
> > + * to them by xfs_buf_attach_iodone(). We need to iterate the items on the
> > + * callback list, mark the buffer as having no more callbacks and then push the
> > + * buffer through IO completion processing.
> > + */
> > +void
> > +xfs_buf_iodone_callbacks(
> > +	struct xfs_buf		*bp)
> > +{
> > +	xfs_buf_run_callbacks(bp);
> >  	xfs_buf_ioend(bp);
> >  }
> >  
> > +/*
> > + * Inode buffer iodone callback function.
> > + */
> > +void
> > +xfs_buf_inode_iodone(
> > +	struct xfs_buf		*bp)
> > +{
> > +	xfs_buf_run_callbacks(bp);
> > +	xfs_buf_ioend_finish(bp);
> > +}
> > +
> > +
> >  /*
> >   * This is the iodone() function for buffers which have been
> >   * logged.  It is called when they are eventually flushed out.
> > diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
> > index c9c57e2da9327..a342933ad9b8d 100644
> > --- a/fs/xfs/xfs_buf_item.h
> > +++ b/fs/xfs/xfs_buf_item.h
> > @@ -59,6 +59,7 @@ void	xfs_buf_attach_iodone(struct xfs_buf *,
> >  			      struct xfs_log_item *);
> >  void	xfs_buf_iodone_callbacks(struct xfs_buf *);
> >  void	xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
> > +void	xfs_buf_inode_iodone(struct xfs_buf *);
> >  bool	xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec);
> >  
> >  extern kmem_zone_t	*xfs_buf_item_zone;
> > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > index ac3c8af8c9a14..d5dee57f914a9 100644
> > --- a/fs/xfs/xfs_inode.c
> > +++ b/fs/xfs/xfs_inode.c
> > @@ -3860,13 +3860,13 @@ xfs_iflush_int(
> >  	 * completion on the buffer to remove the inode from the AIL and release
> >  	 * the flush lock.
> >  	 */
> > +	bp->b_flags |= _XBF_INODES;
> >  	xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
> >  
> >  	/* generate the checksum. */
> >  	xfs_dinode_calc_crc(mp, dip);
> >  
> >  	ASSERT(!list_empty(&bp->b_li_list));
> > -	ASSERT(bp->b_iodone != NULL);
> >  	return error;
> >  }
> >  
> > diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
> > index 08174ffa21189..552d0869aa0fe 100644
> > --- a/fs/xfs/xfs_trans_buf.c
> > +++ b/fs/xfs/xfs_trans_buf.c
> > @@ -626,6 +626,7 @@ xfs_trans_inode_buf(
> >  	ASSERT(atomic_read(&bip->bli_refcount) > 0);
> >  
> >  	bip->bli_flags |= XFS_BLI_INODE_BUF;
> > +	bp->b_flags |= _XBF_INODES;
> >  	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
> >  }
> >  
> > @@ -651,6 +652,7 @@ xfs_trans_stale_inode_buf(
> >  
> >  	bip->bli_flags |= XFS_BLI_STALE_INODE;
> >  	bip->bli_item.li_cb = xfs_buf_iodone;
> > +	bp->b_flags |= _XBF_INODES;
> >  	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
> >  }
> >  
> > @@ -675,6 +677,7 @@ xfs_trans_inode_alloc_buf(
> >  	ASSERT(atomic_read(&bip->bli_refcount) > 0);
> >  
> >  	bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
> > +	bp->b_flags |= _XBF_INODES;
> >  	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
> >  }
> >  
> > -- 
> > 2.26.2.761.g0e0b3e54be
> > 
>
Dave Chinner June 2, 2020, 9:29 p.m. UTC | #3
On Tue, Jun 02, 2020 at 12:45:35PM -0400, Brian Foster wrote:
> On Tue, Jun 02, 2020 at 07:42:25AM +1000, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > Inode buffers always have write IO callbacks, so by marking them
> > directly we can avoid needing to attach ->b_iodone functions to
> > them. This avoids an indirect call, and makes future modifications
> > much simpler.
> > 
> > This is largely a rearrangement of the code at this point - no IO
> > completion functionality changes at this point, just how the
> > code is run is modified.
> > 
> 
> Ok, I was initially thinking this patch looked incomplete in that we
> continue to set ->b_iodone() on inode buffers even though we'd never
> call it. Looking ahead, I see that the next few patches continue to
> clean that up to eventually remove ->b_iodone(), so that addresses that.
> 
> My only other curiosity is that while there may not be any functional
> difference, this technically changes callback behavior in that we set
> the new flag in some contexts that don't currently attach anything to
> the buffer, right? E.g., xfs_trans_inode_alloc_buf() sets the flag on
> inode chunk init, which means we can write out an inode buffer without
> any attached/flushed inodes.

Yes, it can happen, and it happens before this patch, too, because
the AIL can push the buffer log item directly and that does not
flush dirty inodes to the buffer before it writes back(*).

As it is, xfs_buf_inode_iodone() on a buffer with no inode attached
if functionally identical to the existing xfs_buf_iodone() callback
that would otherwise be done. i.e. it just runs the buffer log item
completion callback. Hence the change here rearranges code, but it
does not change behaviour at all.

(*) this is a double-write bug that this patch set does not address.
i.e. buffer log item flushes the buffer without flushing inodes, IO
compeletes, then inode flush to the buffer and we do another IO to
clean them.  This is addressed by a follow-on patchset that tracks
dirty inodes via ordered cluster buffers, such that pushing the
buffer always triggers xfs_iflush_cluster() on buffers tagged
_XBF_INODES...

> Is the intent of that to support future
> changes? If so, a note about that in the commit log would be helpful.

That's part of it, as you can see from the (*) above. But the commit
log already says "..., and makes future modifications much simpler."
Was that insufficient to indicate that it will be used later on?

Cheers,

Dave.
Brian Foster June 3, 2020, 2:57 p.m. UTC | #4
On Wed, Jun 03, 2020 at 07:29:18AM +1000, Dave Chinner wrote:
> On Tue, Jun 02, 2020 at 12:45:35PM -0400, Brian Foster wrote:
> > On Tue, Jun 02, 2020 at 07:42:25AM +1000, Dave Chinner wrote:
> > > From: Dave Chinner <dchinner@redhat.com>
> > > 
> > > Inode buffers always have write IO callbacks, so by marking them
> > > directly we can avoid needing to attach ->b_iodone functions to
> > > them. This avoids an indirect call, and makes future modifications
> > > much simpler.
> > > 
> > > This is largely a rearrangement of the code at this point - no IO
> > > completion functionality changes at this point, just how the
> > > code is run is modified.
> > > 
> > 
> > Ok, I was initially thinking this patch looked incomplete in that we
> > continue to set ->b_iodone() on inode buffers even though we'd never
> > call it. Looking ahead, I see that the next few patches continue to
> > clean that up to eventually remove ->b_iodone(), so that addresses that.
> > 
> > My only other curiosity is that while there may not be any functional
> > difference, this technically changes callback behavior in that we set
> > the new flag in some contexts that don't currently attach anything to
> > the buffer, right? E.g., xfs_trans_inode_alloc_buf() sets the flag on
> > inode chunk init, which means we can write out an inode buffer without
> > any attached/flushed inodes.
> 
> Yes, it can happen, and it happens before this patch, too, because
> the AIL can push the buffer log item directly and that does not
> flush dirty inodes to the buffer before it writes back(*).
> 

I was thinking more about cases where there are actually no inodes
attached.

> As it is, xfs_buf_inode_iodone() on a buffer with no inode attached
> if functionally identical to the existing xfs_buf_iodone() callback
> that would otherwise be done. i.e. it just runs the buffer log item
> completion callback. Hence the change here rearranges code, but it
> does not change behaviour at all.
> 

Right. That's indicative from the code, but doesn't help me understand
why the change is made. That's all I'm asking for...

> (*) this is a double-write bug that this patch set does not address.
> i.e. buffer log item flushes the buffer without flushing inodes, IO
> compeletes, then inode flush to the buffer and we do another IO to
> clean them.  This is addressed by a follow-on patchset that tracks
> dirty inodes via ordered cluster buffers, such that pushing the
> buffer always triggers xfs_iflush_cluster() on buffers tagged
> _XBF_INODES...
> 

Ok, interesting (but seems beyond the scope of this series).

> > Is the intent of that to support future
> > changes? If so, a note about that in the commit log would be helpful.
> 
> That's part of it, as you can see from the (*) above. But the commit
> log already says "..., and makes future modifications much simpler."
> Was that insufficient to indicate that it will be used later on?
> 

That's a rather vague hint. ;P I was more hoping for something like:
"While this is largely a refactor of existing functionality, broaden the
scope of the flag to beyond where inodes are explicitly attached because
<some actual reason>. This has the effect of possibly invoking the
callback in cases where it wouldn't have been previously, but this is
not a functional change because the callback is effectively a no-op when
inodes are not attached."

Brian

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
>
Dave Chinner June 3, 2020, 9:21 p.m. UTC | #5
On Wed, Jun 03, 2020 at 10:57:49AM -0400, Brian Foster wrote:
> On Wed, Jun 03, 2020 at 07:29:18AM +1000, Dave Chinner wrote:
> > On Tue, Jun 02, 2020 at 12:45:35PM -0400, Brian Foster wrote:
> > > On Tue, Jun 02, 2020 at 07:42:25AM +1000, Dave Chinner wrote:
> > > > From: Dave Chinner <dchinner@redhat.com>
> > > > 
> > > > Inode buffers always have write IO callbacks, so by marking them
> > > > directly we can avoid needing to attach ->b_iodone functions to
> > > > them. This avoids an indirect call, and makes future modifications
> > > > much simpler.
> > > > 
> > > > This is largely a rearrangement of the code at this point - no IO
> > > > completion functionality changes at this point, just how the
> > > > code is run is modified.
> > > > 
> > > 
> > > Ok, I was initially thinking this patch looked incomplete in that we
> > > continue to set ->b_iodone() on inode buffers even though we'd never
> > > call it. Looking ahead, I see that the next few patches continue to
> > > clean that up to eventually remove ->b_iodone(), so that addresses that.
> > > 
> > > My only other curiosity is that while there may not be any functional
> > > difference, this technically changes callback behavior in that we set
> > > the new flag in some contexts that don't currently attach anything to
> > > the buffer, right? E.g., xfs_trans_inode_alloc_buf() sets the flag on
> > > inode chunk init, which means we can write out an inode buffer without
> > > any attached/flushed inodes.
> > 
> > Yes, it can happen, and it happens before this patch, too, because
> > the AIL can push the buffer log item directly and that does not
> > flush dirty inodes to the buffer before it writes back(*).
> > 
> 
> I was thinking more about cases where there are actually no inodes
> attached.
> 
> > As it is, xfs_buf_inode_iodone() on a buffer with no inode attached
> > if functionally identical to the existing xfs_buf_iodone() callback
> > that would otherwise be done. i.e. it just runs the buffer log item
> > completion callback. Hence the change here rearranges code, but it
> > does not change behaviour at all.
> > 
> 
> Right. That's indicative from the code, but doesn't help me understand
> why the change is made. That's all I'm asking for...
> 
> > (*) this is a double-write bug that this patch set does not address.
> > i.e. buffer log item flushes the buffer without flushing inodes, IO
> > compeletes, then inode flush to the buffer and we do another IO to
> > clean them.  This is addressed by a follow-on patchset that tracks
> > dirty inodes via ordered cluster buffers, such that pushing the
> > buffer always triggers xfs_iflush_cluster() on buffers tagged
> > _XBF_INODES...
> > 
> 
> Ok, interesting (but seems beyond the scope of this series).

It is used in this series in the ail buffer resubmit code to clear
the LI_FAILED state appropriately, because inode items are treated
differently to dquot items once they track the cluster buffer...

> > > Is the intent of that to support future
> > > changes? If so, a note about that in the commit log would be helpful.
> > 
> > That's part of it, as you can see from the (*) above. But the commit
> > log already says "..., and makes future modifications much simpler."
> > Was that insufficient to indicate that it will be used later on?
> > 
> 
> That's a rather vague hint. ;P I was more hoping for something like:
> "While this is largely a refactor of existing functionality, broaden the
> scope of the flag to beyond where inodes are explicitly attached because
> <some actual reason>. This has the effect of possibly invoking the
> callback in cases where it wouldn't have been previously, but this is
> not a functional change because the callback is effectively a no-op when
> inodes are not attached."

Ok.

-Dave.

Patch
diff mbox series

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 9c2fbb6bbf89d..fcf650575be61 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -14,6 +14,8 @@ 
 #include "xfs_mount.h"
 #include "xfs_trace.h"
 #include "xfs_log.h"
+#include "xfs_trans.h"
+#include "xfs_buf_item.h"
 #include "xfs_errortag.h"
 #include "xfs_error.h"
 
@@ -1202,12 +1204,21 @@  xfs_buf_ioend(
 		bp->b_flags |= XBF_DONE;
 	}
 
-	if (bp->b_iodone)
+	if (read)
+		goto out_finish;
+
+	if (bp->b_flags & _XBF_INODES) {
+		xfs_buf_inode_iodone(bp);
+		return;
+	}
+
+	if (bp->b_iodone) {
 		(*(bp->b_iodone))(bp);
-	else if (bp->b_flags & XBF_ASYNC)
-		xfs_buf_relse(bp);
-	else
-		complete(&bp->b_iowait);
+		return;
+	}
+
+out_finish:
+	xfs_buf_ioend_finish(bp);
 }
 
 static void
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 050c53b739e24..2400cb90a04c6 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -30,15 +30,18 @@ 
 #define XBF_STALE	 (1 << 6) /* buffer has been staled, do not find it */
 #define XBF_WRITE_FAIL	 (1 << 7) /* async writes have failed on this buffer */
 
-/* flags used only as arguments to access routines */
-#define XBF_TRYLOCK	 (1 << 16)/* lock requested, but do not wait */
-#define XBF_UNMAPPED	 (1 << 17)/* do not map the buffer */
+/* buffer type flags for write callbacks */
+#define _XBF_INODES	 (1 << 16)/* inode buffer */
 
 /* flags used only internally */
 #define _XBF_PAGES	 (1 << 20)/* backed by refcounted pages */
 #define _XBF_KMEM	 (1 << 21)/* backed by heap memory */
 #define _XBF_DELWRI_Q	 (1 << 22)/* buffer on a delwri queue */
 
+/* flags used only as arguments to access routines */
+#define XBF_TRYLOCK	 (1 << 30)/* lock requested, but do not wait */
+#define XBF_UNMAPPED	 (1 << 31)/* do not map the buffer */
+
 typedef unsigned int xfs_buf_flags_t;
 
 #define XFS_BUF_FLAGS \
@@ -50,12 +53,13 @@  typedef unsigned int xfs_buf_flags_t;
 	{ XBF_DONE,		"DONE" }, \
 	{ XBF_STALE,		"STALE" }, \
 	{ XBF_WRITE_FAIL,	"WRITE_FAIL" }, \
-	{ XBF_TRYLOCK,		"TRYLOCK" },	/* should never be set */\
-	{ XBF_UNMAPPED,		"UNMAPPED" },	/* ditto */\
+	{ _XBF_INODES,		"INODES" }, \
 	{ _XBF_PAGES,		"PAGES" }, \
 	{ _XBF_KMEM,		"KMEM" }, \
-	{ _XBF_DELWRI_Q,	"DELWRI_Q" }
-
+	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \
+	/* The following interface flags should never be set */ \
+	{ XBF_TRYLOCK,		"TRYLOCK" }, \
+	{ XBF_UNMAPPED,		"UNMAPPED" }
 
 /*
  * Internal state flags.
@@ -257,9 +261,23 @@  extern void xfs_buf_unlock(xfs_buf_t *);
 #define xfs_buf_islocked(bp) \
 	((bp)->b_sema.count <= 0)
 
+static inline void xfs_buf_relse(xfs_buf_t *bp)
+{
+	xfs_buf_unlock(bp);
+	xfs_buf_rele(bp);
+}
+
 /* Buffer Read and Write Routines */
 extern int xfs_bwrite(struct xfs_buf *bp);
 extern void xfs_buf_ioend(struct xfs_buf *bp);
+static inline void xfs_buf_ioend_finish(struct xfs_buf *bp)
+{
+	if (bp->b_flags & XBF_ASYNC)
+		xfs_buf_relse(bp);
+	else
+		complete(&bp->b_iowait);
+}
+
 extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
 		xfs_failaddr_t failaddr);
 #define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address)
@@ -324,12 +342,6 @@  static inline int xfs_buf_ispinned(struct xfs_buf *bp)
 	return atomic_read(&bp->b_pin_count);
 }
 
-static inline void xfs_buf_relse(xfs_buf_t *bp)
-{
-	xfs_buf_unlock(bp);
-	xfs_buf_rele(bp);
-}
-
 static inline int
 xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
 {
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 9e75e8d6042ec..8659cf4282a64 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1158,20 +1158,15 @@  xfs_buf_iodone_callback_error(
 	return false;
 }
 
-/*
- * This is the iodone() function for buffers which have had callbacks attached
- * to them by xfs_buf_attach_iodone(). We need to iterate the items on the
- * callback list, mark the buffer as having no more callbacks and then push the
- * buffer through IO completion processing.
- */
-void
-xfs_buf_iodone_callbacks(
+static void
+xfs_buf_run_callbacks(
 	struct xfs_buf		*bp)
 {
+
 	/*
-	 * If there is an error, process it. Some errors require us
-	 * to run callbacks after failure processing is done so we
-	 * detect that and take appropriate action.
+	 * If there is an error, process it. Some errors require us to run
+	 * callbacks after failure processing is done so we detect that and take
+	 * appropriate action.
 	 */
 	if (bp->b_error && xfs_buf_iodone_callback_error(bp))
 		return;
@@ -1188,9 +1183,34 @@  xfs_buf_iodone_callbacks(
 	bp->b_log_item = NULL;
 	list_del_init(&bp->b_li_list);
 	bp->b_iodone = NULL;
+}
+
+/*
+ * This is the iodone() function for buffers which have had callbacks attached
+ * to them by xfs_buf_attach_iodone(). We need to iterate the items on the
+ * callback list, mark the buffer as having no more callbacks and then push the
+ * buffer through IO completion processing.
+ */
+void
+xfs_buf_iodone_callbacks(
+	struct xfs_buf		*bp)
+{
+	xfs_buf_run_callbacks(bp);
 	xfs_buf_ioend(bp);
 }
 
+/*
+ * Inode buffer iodone callback function.
+ */
+void
+xfs_buf_inode_iodone(
+	struct xfs_buf		*bp)
+{
+	xfs_buf_run_callbacks(bp);
+	xfs_buf_ioend_finish(bp);
+}
+
+
 /*
  * This is the iodone() function for buffers which have been
  * logged.  It is called when they are eventually flushed out.
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index c9c57e2da9327..a342933ad9b8d 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -59,6 +59,7 @@  void	xfs_buf_attach_iodone(struct xfs_buf *,
 			      struct xfs_log_item *);
 void	xfs_buf_iodone_callbacks(struct xfs_buf *);
 void	xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
+void	xfs_buf_inode_iodone(struct xfs_buf *);
 bool	xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec);
 
 extern kmem_zone_t	*xfs_buf_item_zone;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ac3c8af8c9a14..d5dee57f914a9 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3860,13 +3860,13 @@  xfs_iflush_int(
 	 * completion on the buffer to remove the inode from the AIL and release
 	 * the flush lock.
 	 */
+	bp->b_flags |= _XBF_INODES;
 	xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
 
 	/* generate the checksum. */
 	xfs_dinode_calc_crc(mp, dip);
 
 	ASSERT(!list_empty(&bp->b_li_list));
-	ASSERT(bp->b_iodone != NULL);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 08174ffa21189..552d0869aa0fe 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -626,6 +626,7 @@  xfs_trans_inode_buf(
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
 
 	bip->bli_flags |= XFS_BLI_INODE_BUF;
+	bp->b_flags |= _XBF_INODES;
 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 }
 
@@ -651,6 +652,7 @@  xfs_trans_stale_inode_buf(
 
 	bip->bli_flags |= XFS_BLI_STALE_INODE;
 	bip->bli_item.li_cb = xfs_buf_iodone;
+	bp->b_flags |= _XBF_INODES;
 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 }
 
@@ -675,6 +677,7 @@  xfs_trans_inode_alloc_buf(
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
 
 	bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
+	bp->b_flags |= _XBF_INODES;
 	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
 }