diff mbox series

[14/24] xfs: tail updates only need to occur when LSN changes

Message ID 20190801021752.4986-15-david@fromorbit.com (mailing list archive)
State New, archived
Headers show
Series mm, xfs: non-blocking inode reclaim | expand

Commit Message

Dave Chinner Aug. 1, 2019, 2:17 a.m. UTC
From: Dave Chinner <dchinner@redhat.com>

We currently wake anything waiting on the log tail to move whenever
the log item at the tail of the log is removed. Historically this
was fine behaviour because there were very few items at any given
LSN. But with delayed logging, there may be thousands of items at
any given LSN, and we can't move the tail until they are all gone.

Hence if we are removing them in near tail-first order, we might be
waking up processes waiting on the tail LSN to change (e.g. log
space waiters) repeatedly without them being able to make progress.
This also occurs with the new sync push waiters, and can result in
thousands of spurious wakeups every second when under heavy direct
reclaim pressure.

To fix this, check that the tail LSN has actually changed on the
AIL before triggering wakeups. This will reduce the number of
spurious wakeups when doing bulk AIL removal and make this code much
more efficient.

XXX: occasionally get a temporary hang in xfs_ail_push_sync() with
this change - log force from log worker gets things moving again.
Only happens under extreme memory pressure - possibly push racing
with a tail update on an empty log. Needs further investigation.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_inode_item.c | 18 +++++++++++++-----
 fs/xfs/xfs_trans_ail.c  | 37 ++++++++++++++++++++++++++++---------
 fs/xfs/xfs_trans_priv.h |  4 ++--
 3 files changed, 43 insertions(+), 16 deletions(-)

Comments

Brian Foster Aug. 5, 2019, 5:53 p.m. UTC | #1
On Thu, Aug 01, 2019 at 12:17:42PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> We currently wake anything waiting on the log tail to move whenever
> the log item at the tail of the log is removed. Historically this
> was fine behaviour because there were very few items at any given
> LSN. But with delayed logging, there may be thousands of items at
> any given LSN, and we can't move the tail until they are all gone.
> 
> Hence if we are removing them in near tail-first order, we might be
> waking up processes waiting on the tail LSN to change (e.g. log
> space waiters) repeatedly without them being able to make progress.
> This also occurs with the new sync push waiters, and can result in
> thousands of spurious wakeups every second when under heavy direct
> reclaim pressure.
> 
> To fix this, check that the tail LSN has actually changed on the
> AIL before triggering wakeups. This will reduce the number of
> spurious wakeups when doing bulk AIL removal and make this code much
> more efficient.
> 
> XXX: occasionally get a temporary hang in xfs_ail_push_sync() with
> this change - log force from log worker gets things moving again.
> Only happens under extreme memory pressure - possibly push racing
> with a tail update on an empty log. Needs further investigation.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---

Ok, this addresses the wakeup granularity issue mentioned in the
previous patch. Note that I was kind of wondering why we wouldn't base
this on the l_tail_lsn update in xlog_assign_tail_lsn_locked() as
opposed to the current approach.

For example, xlog_assign_tail_lsn_locked() could simply check the
current min item against the current l_tail_lsn before it does the
assignment and use that to trigger tail change events. If we wanted to
also filter out the other wakeups (as this patch does) then we could
just pass a bool pointer or something that returns whether the tail
actually changed.

Brian

>  fs/xfs/xfs_inode_item.c | 18 +++++++++++++-----
>  fs/xfs/xfs_trans_ail.c  | 37 ++++++++++++++++++++++++++++---------
>  fs/xfs/xfs_trans_priv.h |  4 ++--
>  3 files changed, 43 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
> index 7b942a63e992..16a7d6f752c9 100644
> --- a/fs/xfs/xfs_inode_item.c
> +++ b/fs/xfs/xfs_inode_item.c
> @@ -731,19 +731,27 @@ xfs_iflush_done(
>  	 * holding the lock before removing the inode from the AIL.
>  	 */
>  	if (need_ail) {
> -		bool			mlip_changed = false;
> +		xfs_lsn_t	tail_lsn = 0;
>  
>  		/* this is an opencoded batch version of xfs_trans_ail_delete */
>  		spin_lock(&ailp->ail_lock);
>  		list_for_each_entry(blip, &tmp, li_bio_list) {
>  			if (INODE_ITEM(blip)->ili_logged &&
> -			    blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
> -				mlip_changed |= xfs_ail_delete_one(ailp, blip);
> -			else {
> +			    blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) {
> +				/*
> +				 * xfs_ail_delete_finish() only cares about the
> +				 * lsn of the first tail item removed, any others
> +				 * will be at the same or higher lsn so we just
> +				 * ignore them.
> +				 */
> +				xfs_lsn_t lsn = xfs_ail_delete_one(ailp, blip);
> +				if (!tail_lsn && lsn)
> +					tail_lsn = lsn;
> +			} else {
>  				xfs_clear_li_failed(blip);
>  			}
>  		}
> -		xfs_ail_delete_finish(ailp, mlip_changed);
> +		xfs_ail_delete_finish(ailp, tail_lsn);
>  	}
>  
>  	/*
> diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
> index 9e3102179221..00d66175f41a 100644
> --- a/fs/xfs/xfs_trans_ail.c
> +++ b/fs/xfs/xfs_trans_ail.c
> @@ -108,17 +108,25 @@ xfs_ail_next(
>   * We need the AIL lock in order to get a coherent read of the lsn of the last
>   * item in the AIL.
>   */
> +static xfs_lsn_t
> +__xfs_ail_min_lsn(
> +	struct xfs_ail		*ailp)
> +{
> +	struct xfs_log_item	*lip = xfs_ail_min(ailp);
> +
> +	if (lip)
> +		return lip->li_lsn;
> +	return 0;
> +}
> +
>  xfs_lsn_t
>  xfs_ail_min_lsn(
>  	struct xfs_ail		*ailp)
>  {
> -	xfs_lsn_t		lsn = 0;
> -	struct xfs_log_item	*lip;
> +	xfs_lsn_t		lsn;
>  
>  	spin_lock(&ailp->ail_lock);
> -	lip = xfs_ail_min(ailp);
> -	if (lip)
> -		lsn = lip->li_lsn;
> +	lsn = __xfs_ail_min_lsn(ailp);
>  	spin_unlock(&ailp->ail_lock);
>  
>  	return lsn;
> @@ -779,12 +787,20 @@ xfs_trans_ail_update_bulk(
>  	}
>  }
>  
> -bool
> +/*
> + * Delete one log item from the AIL.
> + *
> + * If this item was at the tail of the AIL, return the LSN of the log item so
> + * that we can use it to check if the LSN of the tail of the log has moved
> + * when finishing up the AIL delete process in xfs_ail_delete_finish().
> + */
> +xfs_lsn_t
>  xfs_ail_delete_one(
>  	struct xfs_ail		*ailp,
>  	struct xfs_log_item	*lip)
>  {
>  	struct xfs_log_item	*mlip = xfs_ail_min(ailp);
> +	xfs_lsn_t		lsn = lip->li_lsn;
>  
>  	trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
>  	xfs_ail_delete(ailp, lip);
> @@ -792,17 +808,20 @@ xfs_ail_delete_one(
>  	clear_bit(XFS_LI_IN_AIL, &lip->li_flags);
>  	lip->li_lsn = 0;
>  
> -	return mlip == lip;
> +	if (mlip == lip)
> +		return lsn;
> +	return 0;
>  }
>  
>  void
>  xfs_ail_delete_finish(
>  	struct xfs_ail		*ailp,
> -	bool			do_tail_update) __releases(ailp->ail_lock)
> +	xfs_lsn_t		old_lsn) __releases(ailp->ail_lock)
>  {
>  	struct xfs_mount	*mp = ailp->ail_mount;
>  
> -	if (!do_tail_update) {
> +	/* if the tail lsn hasn't changed, don't do updates or wakeups. */
> +	if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) {
>  		spin_unlock(&ailp->ail_lock);
>  		return;
>  	}

> diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
> index 5ab70b9b896f..db589bb7468d 100644
> --- a/fs/xfs/xfs_trans_priv.h
> +++ b/fs/xfs/xfs_trans_priv.h
> @@ -92,8 +92,8 @@ xfs_trans_ail_update(
>  	xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
>  }
>  
> -bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
> -void xfs_ail_delete_finish(struct xfs_ail *ailp, bool do_tail_update)
> +xfs_lsn_t xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
> +void xfs_ail_delete_finish(struct xfs_ail *ailp, xfs_lsn_t old_lsn)
>  			__releases(ailp->ail_lock);
>  void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
>  		int shutdown_type);
> -- 
> 2.22.0
>
Dave Chinner Aug. 5, 2019, 11:28 p.m. UTC | #2
On Mon, Aug 05, 2019 at 01:53:26PM -0400, Brian Foster wrote:
> On Thu, Aug 01, 2019 at 12:17:42PM +1000, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > We currently wake anything waiting on the log tail to move whenever
> > the log item at the tail of the log is removed. Historically this
> > was fine behaviour because there were very few items at any given
> > LSN. But with delayed logging, there may be thousands of items at
> > any given LSN, and we can't move the tail until they are all gone.
> > 
> > Hence if we are removing them in near tail-first order, we might be
> > waking up processes waiting on the tail LSN to change (e.g. log
> > space waiters) repeatedly without them being able to make progress.
> > This also occurs with the new sync push waiters, and can result in
> > thousands of spurious wakeups every second when under heavy direct
> > reclaim pressure.
> > 
> > To fix this, check that the tail LSN has actually changed on the
> > AIL before triggering wakeups. This will reduce the number of
> > spurious wakeups when doing bulk AIL removal and make this code much
> > more efficient.
> > 
> > XXX: occasionally get a temporary hang in xfs_ail_push_sync() with
> > this change - log force from log worker gets things moving again.
> > Only happens under extreme memory pressure - possibly push racing
> > with a tail update on an empty log. Needs further investigation.
> > 
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > ---
> 
> Ok, this addresses the wakeup granularity issue mentioned in the
> previous patch. Note that I was kind of wondering why we wouldn't base
> this on the l_tail_lsn update in xlog_assign_tail_lsn_locked() as
> opposed to the current approach.

Because I didn't think of it? :)

There's so much other stuff in this patch set I didn't spend a
lot of time thinking about other alternatives. this was a simple
code transformation that did what I wanted, and I went on to burning
brain cells on other more complex issues that needs to be solved...

> For example, xlog_assign_tail_lsn_locked() could simply check the
> current min item against the current l_tail_lsn before it does the
> assignment and use that to trigger tail change events. If we wanted to
> also filter out the other wakeups (as this patch does) then we could
> just pass a bool pointer or something that returns whether the tail
> actually changed.

Yeah, I'll have a look at this - I might rework it as additional
patches now the code is looking at decisions based on LSN rather
than if the tail log item changed...

Cheers,

Dave.
Dave Chinner Aug. 6, 2019, 5:33 a.m. UTC | #3
On Tue, Aug 06, 2019 at 09:28:26AM +1000, Dave Chinner wrote:
> On Mon, Aug 05, 2019 at 01:53:26PM -0400, Brian Foster wrote:
> > On Thu, Aug 01, 2019 at 12:17:42PM +1000, Dave Chinner wrote:
> > > From: Dave Chinner <dchinner@redhat.com>
> > > 
> > > We currently wake anything waiting on the log tail to move whenever
> > > the log item at the tail of the log is removed. Historically this
> > > was fine behaviour because there were very few items at any given
> > > LSN. But with delayed logging, there may be thousands of items at
> > > any given LSN, and we can't move the tail until they are all gone.
> > > 
> > > Hence if we are removing them in near tail-first order, we might be
> > > waking up processes waiting on the tail LSN to change (e.g. log
> > > space waiters) repeatedly without them being able to make progress.
> > > This also occurs with the new sync push waiters, and can result in
> > > thousands of spurious wakeups every second when under heavy direct
> > > reclaim pressure.
> > > 
> > > To fix this, check that the tail LSN has actually changed on the
> > > AIL before triggering wakeups. This will reduce the number of
> > > spurious wakeups when doing bulk AIL removal and make this code much
> > > more efficient.
> > > 
> > > XXX: occasionally get a temporary hang in xfs_ail_push_sync() with
> > > this change - log force from log worker gets things moving again.
> > > Only happens under extreme memory pressure - possibly push racing
> > > with a tail update on an empty log. Needs further investigation.
> > > 
> > > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > > ---
> > 
> > Ok, this addresses the wakeup granularity issue mentioned in the
> > previous patch. Note that I was kind of wondering why we wouldn't base
> > this on the l_tail_lsn update in xlog_assign_tail_lsn_locked() as
> > opposed to the current approach.
> 
> Because I didn't think of it? :)
> 
> There's so much other stuff in this patch set I didn't spend a
> lot of time thinking about other alternatives. this was a simple
> code transformation that did what I wanted, and I went on to burning
> brain cells on other more complex issues that needs to be solved...
> 
> > For example, xlog_assign_tail_lsn_locked() could simply check the
> > current min item against the current l_tail_lsn before it does the
> > assignment and use that to trigger tail change events. If we wanted to
> > also filter out the other wakeups (as this patch does) then we could
> > just pass a bool pointer or something that returns whether the tail
> > actually changed.
> 
> Yeah, I'll have a look at this - I might rework it as additional
> patches now the code is looking at decisions based on LSN rather
> than if the tail log item changed...

Ok, this is not worth the complexity. The wakeup code has to be able
to tell the difference between a changed tail lsn and an empty AIL
so that wakeups can be issued when the AIL is finally emptied.
Unmount (xfs_ail_push_all_sync()) relies on this, and
xlog_assign_tail_lsn_locked() hides the empty AIL from the caller
by returning log->l_last_sync_lsn to the caller.

Hence the wakeup code still has to check for an empty AIL if the
tail has changed if we use the return value of
xlog_assign_tail_lsn_locked() as the tail LSN. At which point, the
logic becomes somewhat convoluted, and it's far simpler to use
__xfs_ail_min_lsn as it returns when the log is empty.

So, nice idea, but it doesn't make the code simpler or easier to
understand....

Cheers,

Dave.
Brian Foster Aug. 6, 2019, 12:53 p.m. UTC | #4
On Tue, Aug 06, 2019 at 03:33:38PM +1000, Dave Chinner wrote:
> On Tue, Aug 06, 2019 at 09:28:26AM +1000, Dave Chinner wrote:
> > On Mon, Aug 05, 2019 at 01:53:26PM -0400, Brian Foster wrote:
> > > On Thu, Aug 01, 2019 at 12:17:42PM +1000, Dave Chinner wrote:
> > > > From: Dave Chinner <dchinner@redhat.com>
> > > > 
> > > > We currently wake anything waiting on the log tail to move whenever
> > > > the log item at the tail of the log is removed. Historically this
> > > > was fine behaviour because there were very few items at any given
> > > > LSN. But with delayed logging, there may be thousands of items at
> > > > any given LSN, and we can't move the tail until they are all gone.
> > > > 
> > > > Hence if we are removing them in near tail-first order, we might be
> > > > waking up processes waiting on the tail LSN to change (e.g. log
> > > > space waiters) repeatedly without them being able to make progress.
> > > > This also occurs with the new sync push waiters, and can result in
> > > > thousands of spurious wakeups every second when under heavy direct
> > > > reclaim pressure.
> > > > 
> > > > To fix this, check that the tail LSN has actually changed on the
> > > > AIL before triggering wakeups. This will reduce the number of
> > > > spurious wakeups when doing bulk AIL removal and make this code much
> > > > more efficient.
> > > > 
> > > > XXX: occasionally get a temporary hang in xfs_ail_push_sync() with
> > > > this change - log force from log worker gets things moving again.
> > > > Only happens under extreme memory pressure - possibly push racing
> > > > with a tail update on an empty log. Needs further investigation.
> > > > 
> > > > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > > > ---
> > > 
> > > Ok, this addresses the wakeup granularity issue mentioned in the
> > > previous patch. Note that I was kind of wondering why we wouldn't base
> > > this on the l_tail_lsn update in xlog_assign_tail_lsn_locked() as
> > > opposed to the current approach.
> > 
> > Because I didn't think of it? :)
> > 
> > There's so much other stuff in this patch set I didn't spend a
> > lot of time thinking about other alternatives. this was a simple
> > code transformation that did what I wanted, and I went on to burning
> > brain cells on other more complex issues that needs to be solved...
> > 
> > > For example, xlog_assign_tail_lsn_locked() could simply check the
> > > current min item against the current l_tail_lsn before it does the
> > > assignment and use that to trigger tail change events. If we wanted to
> > > also filter out the other wakeups (as this patch does) then we could
> > > just pass a bool pointer or something that returns whether the tail
> > > actually changed.
> > 
> > Yeah, I'll have a look at this - I might rework it as additional
> > patches now the code is looking at decisions based on LSN rather
> > than if the tail log item changed...
> 
> Ok, this is not worth the complexity. The wakeup code has to be able
> to tell the difference between a changed tail lsn and an empty AIL
> so that wakeups can be issued when the AIL is finally emptied.
> Unmount (xfs_ail_push_all_sync()) relies on this, and
> xlog_assign_tail_lsn_locked() hides the empty AIL from the caller
> by returning log->l_last_sync_lsn to the caller.
> 

Wouldn't either case just be a wakeup from xlog_assign_tail_lsn_locked()
(which should probably be renamed if we took that approach)? It's called
when we've removed the min item from the AIL and so potentially need to
update the tail lsn. 

> Hence the wakeup code still has to check for an empty AIL if the
> tail has changed if we use the return value of
> xlog_assign_tail_lsn_locked() as the tail LSN. At which point, the
> logic becomes somewhat convoluted, and it's far simpler to use
> __xfs_ail_min_lsn as it returns when the log is empty.
> 
> So, nice idea, but it doesn't make the code simpler or easier to
> understand....
> 

It's not that big of a deal either way. BTW on another quick look, I
think something like xfs_ail_update_tail(ailp, old_tail) is a bit more
self-documenting that xfs_ail_delete_finish(ailp, old_lsn).

Brian

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
Dave Chinner Aug. 6, 2019, 9:11 p.m. UTC | #5
On Tue, Aug 06, 2019 at 08:53:21AM -0400, Brian Foster wrote:
> On Tue, Aug 06, 2019 at 03:33:38PM +1000, Dave Chinner wrote:
> > On Tue, Aug 06, 2019 at 09:28:26AM +1000, Dave Chinner wrote:
> > > On Mon, Aug 05, 2019 at 01:53:26PM -0400, Brian Foster wrote:
> > > > On Thu, Aug 01, 2019 at 12:17:42PM +1000, Dave Chinner wrote:
> > > > > From: Dave Chinner <dchinner@redhat.com>
> > > > > 
> > > > > We currently wake anything waiting on the log tail to move whenever
> > > > > the log item at the tail of the log is removed. Historically this
> > > > > was fine behaviour because there were very few items at any given
> > > > > LSN. But with delayed logging, there may be thousands of items at
> > > > > any given LSN, and we can't move the tail until they are all gone.
> > > > > 
> > > > > Hence if we are removing them in near tail-first order, we might be
> > > > > waking up processes waiting on the tail LSN to change (e.g. log
> > > > > space waiters) repeatedly without them being able to make progress.
> > > > > This also occurs with the new sync push waiters, and can result in
> > > > > thousands of spurious wakeups every second when under heavy direct
> > > > > reclaim pressure.
> > > > > 
> > > > > To fix this, check that the tail LSN has actually changed on the
> > > > > AIL before triggering wakeups. This will reduce the number of
> > > > > spurious wakeups when doing bulk AIL removal and make this code much
> > > > > more efficient.
> > > > > 
> > > > > XXX: occasionally get a temporary hang in xfs_ail_push_sync() with
> > > > > this change - log force from log worker gets things moving again.
> > > > > Only happens under extreme memory pressure - possibly push racing
> > > > > with a tail update on an empty log. Needs further investigation.
> > > > > 
> > > > > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > > > > ---
> > > > 
> > > > Ok, this addresses the wakeup granularity issue mentioned in the
> > > > previous patch. Note that I was kind of wondering why we wouldn't base
> > > > this on the l_tail_lsn update in xlog_assign_tail_lsn_locked() as
> > > > opposed to the current approach.
> > > 
> > > Because I didn't think of it? :)
> > > 
> > > There's so much other stuff in this patch set I didn't spend a
> > > lot of time thinking about other alternatives. this was a simple
> > > code transformation that did what I wanted, and I went on to burning
> > > brain cells on other more complex issues that needs to be solved...
> > > 
> > > > For example, xlog_assign_tail_lsn_locked() could simply check the
> > > > current min item against the current l_tail_lsn before it does the
> > > > assignment and use that to trigger tail change events. If we wanted to
> > > > also filter out the other wakeups (as this patch does) then we could
> > > > just pass a bool pointer or something that returns whether the tail
> > > > actually changed.
> > > 
> > > Yeah, I'll have a look at this - I might rework it as additional
> > > patches now the code is looking at decisions based on LSN rather
> > > than if the tail log item changed...
> > 
> > Ok, this is not worth the complexity. The wakeup code has to be able
> > to tell the difference between a changed tail lsn and an empty AIL
> > so that wakeups can be issued when the AIL is finally emptied.
> > Unmount (xfs_ail_push_all_sync()) relies on this, and
> > xlog_assign_tail_lsn_locked() hides the empty AIL from the caller
> > by returning log->l_last_sync_lsn to the caller.
> > 
> 
> Wouldn't either case just be a wakeup from xlog_assign_tail_lsn_locked()
> (which should probably be renamed if we took that approach)? It's called
> when we've removed the min item from the AIL and so potentially need to
> update the tail lsn. 

Not easily, because xlog_assign_tail_lsn_locked() is also used to
grab the current tail when we are formatting the log header during a
CIL checkpoint. We do not want to be doing wakeups there.

And, to tell the truth, I don't really want to screw with a function
that provides on-disk information for log recovery in this
series. That brings a whole new level of jeopardy to this patch set
I'd prefer to avoid....

> > Hence the wakeup code still has to check for an empty AIL if the
> > tail has changed if we use the return value of
> > xlog_assign_tail_lsn_locked() as the tail LSN. At which point, the
> > logic becomes somewhat convoluted, and it's far simpler to use
> > __xfs_ail_min_lsn as it returns when the log is empty.
> > 
> > So, nice idea, but it doesn't make the code simpler or easier to
> > understand....
> 
> It's not that big of a deal either way. BTW on another quick look, I
> think something like xfs_ail_update_tail(ailp, old_tail) is a bit more
> self-documenting that xfs_ail_delete_finish(ailp, old_lsn).

I had already renamed it to xfs_ail_update_finish() when I updated
the last patch to include the bulk update case.

Cheers,

Dave.
diff mbox series

Patch

diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 7b942a63e992..16a7d6f752c9 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -731,19 +731,27 @@  xfs_iflush_done(
 	 * holding the lock before removing the inode from the AIL.
 	 */
 	if (need_ail) {
-		bool			mlip_changed = false;
+		xfs_lsn_t	tail_lsn = 0;
 
 		/* this is an opencoded batch version of xfs_trans_ail_delete */
 		spin_lock(&ailp->ail_lock);
 		list_for_each_entry(blip, &tmp, li_bio_list) {
 			if (INODE_ITEM(blip)->ili_logged &&
-			    blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn)
-				mlip_changed |= xfs_ail_delete_one(ailp, blip);
-			else {
+			    blip->li_lsn == INODE_ITEM(blip)->ili_flush_lsn) {
+				/*
+				 * xfs_ail_delete_finish() only cares about the
+				 * lsn of the first tail item removed, any others
+				 * will be at the same or higher lsn so we just
+				 * ignore them.
+				 */
+				xfs_lsn_t lsn = xfs_ail_delete_one(ailp, blip);
+				if (!tail_lsn && lsn)
+					tail_lsn = lsn;
+			} else {
 				xfs_clear_li_failed(blip);
 			}
 		}
-		xfs_ail_delete_finish(ailp, mlip_changed);
+		xfs_ail_delete_finish(ailp, tail_lsn);
 	}
 
 	/*
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 9e3102179221..00d66175f41a 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -108,17 +108,25 @@  xfs_ail_next(
  * We need the AIL lock in order to get a coherent read of the lsn of the last
  * item in the AIL.
  */
+static xfs_lsn_t
+__xfs_ail_min_lsn(
+	struct xfs_ail		*ailp)
+{
+	struct xfs_log_item	*lip = xfs_ail_min(ailp);
+
+	if (lip)
+		return lip->li_lsn;
+	return 0;
+}
+
 xfs_lsn_t
 xfs_ail_min_lsn(
 	struct xfs_ail		*ailp)
 {
-	xfs_lsn_t		lsn = 0;
-	struct xfs_log_item	*lip;
+	xfs_lsn_t		lsn;
 
 	spin_lock(&ailp->ail_lock);
-	lip = xfs_ail_min(ailp);
-	if (lip)
-		lsn = lip->li_lsn;
+	lsn = __xfs_ail_min_lsn(ailp);
 	spin_unlock(&ailp->ail_lock);
 
 	return lsn;
@@ -779,12 +787,20 @@  xfs_trans_ail_update_bulk(
 	}
 }
 
-bool
+/*
+ * Delete one log item from the AIL.
+ *
+ * If this item was at the tail of the AIL, return the LSN of the log item so
+ * that we can use it to check if the LSN of the tail of the log has moved
+ * when finishing up the AIL delete process in xfs_ail_delete_finish().
+ */
+xfs_lsn_t
 xfs_ail_delete_one(
 	struct xfs_ail		*ailp,
 	struct xfs_log_item	*lip)
 {
 	struct xfs_log_item	*mlip = xfs_ail_min(ailp);
+	xfs_lsn_t		lsn = lip->li_lsn;
 
 	trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
 	xfs_ail_delete(ailp, lip);
@@ -792,17 +808,20 @@  xfs_ail_delete_one(
 	clear_bit(XFS_LI_IN_AIL, &lip->li_flags);
 	lip->li_lsn = 0;
 
-	return mlip == lip;
+	if (mlip == lip)
+		return lsn;
+	return 0;
 }
 
 void
 xfs_ail_delete_finish(
 	struct xfs_ail		*ailp,
-	bool			do_tail_update) __releases(ailp->ail_lock)
+	xfs_lsn_t		old_lsn) __releases(ailp->ail_lock)
 {
 	struct xfs_mount	*mp = ailp->ail_mount;
 
-	if (!do_tail_update) {
+	/* if the tail lsn hasn't changed, don't do updates or wakeups. */
+	if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) {
 		spin_unlock(&ailp->ail_lock);
 		return;
 	}
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 5ab70b9b896f..db589bb7468d 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -92,8 +92,8 @@  xfs_trans_ail_update(
 	xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
 }
 
-bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
-void xfs_ail_delete_finish(struct xfs_ail *ailp, bool do_tail_update)
+xfs_lsn_t xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
+void xfs_ail_delete_finish(struct xfs_ail *ailp, xfs_lsn_t old_lsn)
 			__releases(ailp->ail_lock);
 void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
 		int shutdown_type);