diff mbox series

[13/24] xfs: synchronous AIL pushing

Message ID 20190801021752.4986-14-david@fromorbit.com (mailing list archive)
State Superseded
Headers show
Series mm, xfs: non-blocking inode reclaim | expand

Commit Message

Dave Chinner Aug. 1, 2019, 2:17 a.m. UTC
From: Dave Chinner <dchinner@redhat.com>

Provide an interface to push the AIL to a target LSN and wait for
the tail of the log to move past that LSN. This is used to wait for
all items older than a specific LSN to either be cleaned (written
back) or relogged to a higher LSN in the AIL. The primary use for
this is to allow IO free inode reclaim throttling.

Factor the common AIL deletion code that does all the wakeups into a
helper so we only have one copy of this somewhat tricky code to
interface with all the wakeups necessary when the LSN of the log
tail changes.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_inode_item.c | 12 +------
 fs/xfs/xfs_trans_ail.c  | 69 +++++++++++++++++++++++++++++++++--------
 fs/xfs/xfs_trans_priv.h |  6 +++-
 3 files changed, 62 insertions(+), 25 deletions(-)

Comments

Brian Foster Aug. 5, 2019, 5:51 p.m. UTC | #1
On Thu, Aug 01, 2019 at 12:17:41PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@redhat.com>
> 
> Provide an interface to push the AIL to a target LSN and wait for
> the tail of the log to move past that LSN. This is used to wait for
> all items older than a specific LSN to either be cleaned (written
> back) or relogged to a higher LSN in the AIL. The primary use for
> this is to allow IO free inode reclaim throttling.
> 
> Factor the common AIL deletion code that does all the wakeups into a
> helper so we only have one copy of this somewhat tricky code to
> interface with all the wakeups necessary when the LSN of the log
> tail changes.
> 
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---
>  fs/xfs/xfs_inode_item.c | 12 +------
>  fs/xfs/xfs_trans_ail.c  | 69 +++++++++++++++++++++++++++++++++--------
>  fs/xfs/xfs_trans_priv.h |  6 +++-
>  3 files changed, 62 insertions(+), 25 deletions(-)
> 
...
> diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
> index 6ccfd75d3c24..9e3102179221 100644
> --- a/fs/xfs/xfs_trans_ail.c
> +++ b/fs/xfs/xfs_trans_ail.c
> @@ -654,6 +654,37 @@ xfs_ail_push_all(
>  		xfs_ail_push(ailp, threshold_lsn);
>  }
>  
> +/*
> + * Push the AIL to a specific lsn and wait for it to complete.
> + */
> +void
> +xfs_ail_push_sync(
> +	struct xfs_ail		*ailp,
> +	xfs_lsn_t		threshold_lsn)
> +{
> +	struct xfs_log_item	*lip;
> +	DEFINE_WAIT(wait);
> +
> +	spin_lock(&ailp->ail_lock);
> +	while ((lip = xfs_ail_min(ailp)) != NULL) {
> +		prepare_to_wait(&ailp->ail_push, &wait, TASK_UNINTERRUPTIBLE);
> +		if (XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
> +		    XFS_LSN_CMP(threshold_lsn, lip->li_lsn) <= 0)
> +			break;
> +		/* XXX: cmpxchg? */
> +		while (XFS_LSN_CMP(threshold_lsn, ailp->ail_target) > 0)
> +			xfs_trans_ail_copy_lsn(ailp, &ailp->ail_target, &threshold_lsn);

Why the need to repeatedly copy the ail_target like this? If the push
target only ever moves forward, we should only need to do this once at
the start of the function. In fact I'm kind of wondering why we can't
just call xfs_ail_push(). If we check the tail item after grabbing the
spin lock, we should be able to avoid any races with the waker, no?

> +		wake_up_process(ailp->ail_task);
> +		spin_unlock(&ailp->ail_lock);
> +		schedule();
> +		spin_lock(&ailp->ail_lock);
> +	}
> +	spin_unlock(&ailp->ail_lock);
> +
> +	finish_wait(&ailp->ail_push, &wait);
> +}
> +
> +
>  /*
>   * Push out all items in the AIL immediately and wait until the AIL is empty.
>   */
> @@ -764,6 +795,28 @@ xfs_ail_delete_one(
>  	return mlip == lip;
>  }
>  
> +void
> +xfs_ail_delete_finish(
> +	struct xfs_ail		*ailp,
> +	bool			do_tail_update) __releases(ailp->ail_lock)
> +{
> +	struct xfs_mount	*mp = ailp->ail_mount;
> +
> +	if (!do_tail_update) {
> +		spin_unlock(&ailp->ail_lock);
> +		return;
> +	}
> +

Hmm.. so while what we really care about here are tail updates, this
logic is currently driven by removing the min ail log item. That seems
like a lot of potential churn if we're waking the pusher on every object
written back covered by a single log record / checkpoint. Perhaps we
should implement a bit more coarse wakeup logic such as only when the
tail lsn actually changes, for example?

FWIW, it also doesn't look like you've handled the case of relogged
items moving the tail forward anywhere that I can see, so we might be
missing some wakeups here. See xfs_trans_ail_update_bulk() for
additional AIL manipulation.

> +	if (!XFS_FORCED_SHUTDOWN(mp))
> +		xlog_assign_tail_lsn_locked(mp);
> +
> +	wake_up_all(&ailp->ail_push);
> +	if (list_empty(&ailp->ail_head))
> +		wake_up_all(&ailp->ail_empty);
> +	spin_unlock(&ailp->ail_lock);
> +	xfs_log_space_wake(mp);
> +}
> +
>  /**
>   * Remove a log items from the AIL
>   *
> @@ -789,10 +842,9 @@ void
>  xfs_trans_ail_delete(
>  	struct xfs_ail		*ailp,
>  	struct xfs_log_item	*lip,
> -	int			shutdown_type) __releases(ailp->ail_lock)
> +	int			shutdown_type)
>  {
>  	struct xfs_mount	*mp = ailp->ail_mount;
> -	bool			mlip_changed;
>  
>  	if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
>  		spin_unlock(&ailp->ail_lock);
> @@ -805,17 +857,7 @@ xfs_trans_ail_delete(
>  		return;
>  	}
>  
> -	mlip_changed = xfs_ail_delete_one(ailp, lip);
> -	if (mlip_changed) {
> -		if (!XFS_FORCED_SHUTDOWN(mp))
> -			xlog_assign_tail_lsn_locked(mp);
> -		if (list_empty(&ailp->ail_head))
> -			wake_up_all(&ailp->ail_empty);
> -	}
> -
> -	spin_unlock(&ailp->ail_lock);
> -	if (mlip_changed)
> -		xfs_log_space_wake(ailp->ail_mount);
> +	xfs_ail_delete_finish(ailp, xfs_ail_delete_one(ailp, lip));

Nit, but I'm not a fan of the function call buried in a function call
parameter pattern. I tend to read over it at a glance so to me it's not
worth the line of code it saves.

Brian

>  }
>  
>  int
> @@ -834,6 +876,7 @@ xfs_trans_ail_init(
>  	spin_lock_init(&ailp->ail_lock);
>  	INIT_LIST_HEAD(&ailp->ail_buf_list);
>  	init_waitqueue_head(&ailp->ail_empty);
> +	init_waitqueue_head(&ailp->ail_push);
>  
>  	ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
>  			ailp->ail_mount->m_fsname);
> diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
> index 2e073c1c4614..5ab70b9b896f 100644
> --- a/fs/xfs/xfs_trans_priv.h
> +++ b/fs/xfs/xfs_trans_priv.h
> @@ -61,6 +61,7 @@ struct xfs_ail {
>  	int			ail_log_flush;
>  	struct list_head	ail_buf_list;
>  	wait_queue_head_t	ail_empty;
> +	wait_queue_head_t	ail_push;
>  };
>  
>  /*
> @@ -92,8 +93,10 @@ xfs_trans_ail_update(
>  }
>  
>  bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
> +void xfs_ail_delete_finish(struct xfs_ail *ailp, bool do_tail_update)
> +			__releases(ailp->ail_lock);
>  void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
> -		int shutdown_type) __releases(ailp->ail_lock);
> +		int shutdown_type);
>  
>  static inline void
>  xfs_trans_ail_remove(
> @@ -111,6 +114,7 @@ xfs_trans_ail_remove(
>  }
>  
>  void			xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
> +void			xfs_ail_push_sync(struct xfs_ail *, xfs_lsn_t);
>  void			xfs_ail_push_all(struct xfs_ail *);
>  void			xfs_ail_push_all_sync(struct xfs_ail *);
>  struct xfs_log_item	*xfs_ail_min(struct xfs_ail  *ailp);
> -- 
> 2.22.0
>
Dave Chinner Aug. 5, 2019, 11:21 p.m. UTC | #2
On Mon, Aug 05, 2019 at 01:51:53PM -0400, Brian Foster wrote:
> On Thu, Aug 01, 2019 at 12:17:41PM +1000, Dave Chinner wrote:
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > Provide an interface to push the AIL to a target LSN and wait for
> > the tail of the log to move past that LSN. This is used to wait for
> > all items older than a specific LSN to either be cleaned (written
> > back) or relogged to a higher LSN in the AIL. The primary use for
> > this is to allow IO free inode reclaim throttling.
> > 
> > Factor the common AIL deletion code that does all the wakeups into a
> > helper so we only have one copy of this somewhat tricky code to
> > interface with all the wakeups necessary when the LSN of the log
> > tail changes.
> > 
> > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > ---
> >  fs/xfs/xfs_inode_item.c | 12 +------
> >  fs/xfs/xfs_trans_ail.c  | 69 +++++++++++++++++++++++++++++++++--------
> >  fs/xfs/xfs_trans_priv.h |  6 +++-
> >  3 files changed, 62 insertions(+), 25 deletions(-)
> > 
> ...
> > diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
> > index 6ccfd75d3c24..9e3102179221 100644
> > --- a/fs/xfs/xfs_trans_ail.c
> > +++ b/fs/xfs/xfs_trans_ail.c
> > @@ -654,6 +654,37 @@ xfs_ail_push_all(
> >  		xfs_ail_push(ailp, threshold_lsn);
> >  }
> >  
> > +/*
> > + * Push the AIL to a specific lsn and wait for it to complete.
> > + */
> > +void
> > +xfs_ail_push_sync(
> > +	struct xfs_ail		*ailp,
> > +	xfs_lsn_t		threshold_lsn)
> > +{
> > +	struct xfs_log_item	*lip;
> > +	DEFINE_WAIT(wait);
> > +
> > +	spin_lock(&ailp->ail_lock);
> > +	while ((lip = xfs_ail_min(ailp)) != NULL) {
> > +		prepare_to_wait(&ailp->ail_push, &wait, TASK_UNINTERRUPTIBLE);
> > +		if (XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
> > +		    XFS_LSN_CMP(threshold_lsn, lip->li_lsn) <= 0)
> > +			break;
> > +		/* XXX: cmpxchg? */
> > +		while (XFS_LSN_CMP(threshold_lsn, ailp->ail_target) > 0)
> > +			xfs_trans_ail_copy_lsn(ailp, &ailp->ail_target, &threshold_lsn);
> 
> Why the need to repeatedly copy the ail_target like this? If the push

It's a hack because the other updates are done unlocked and this
doesn't contain the memroy barriers needed to make it correct
and race free.

Hence the comment "XXX: cmpxchg" to ensure that:

	a) we only ever move the target forwards;
	b) we resolve update races in an obvious, simple manner; and
	c) we can get rid of the possibly incorrect memory
	   barriers around this (unlocked) update.

RFC. WIP. :)

> target only ever moves forward, we should only need to do this once at
> the start of the function. In fact I'm kind of wondering why we can't
> just call xfs_ail_push(). If we check the tail item after grabbing the
> spin lock, we should be able to avoid any races with the waker, no?

I didn't use xfs_ail_push() because of having to prepare to wait
between determining if the AIL is empty and checking if we need
to update the target.

I also didn't want to affect the existing xfs_ail_push() as I was
modifying the xfs_ail_push_sync() code to do what was needed.
Eventually they can probably come back together, but for now I'm not
100% sure that the code is correct and race free.

> > +void
> > +xfs_ail_delete_finish(
> > +	struct xfs_ail		*ailp,
> > +	bool			do_tail_update) __releases(ailp->ail_lock)
> > +{
> > +	struct xfs_mount	*mp = ailp->ail_mount;
> > +
> > +	if (!do_tail_update) {
> > +		spin_unlock(&ailp->ail_lock);
> > +		return;
> > +	}
> > +
> 
> Hmm.. so while what we really care about here are tail updates, this
> logic is currently driven by removing the min ail log item. That seems
> like a lot of potential churn if we're waking the pusher on every object
> written back covered by a single log record / checkpoint. Perhaps we
> should implement a bit more coarse wakeup logic such as only when the
> tail lsn actually changes, for example?

You mean the next patch?

> FWIW, it also doesn't look like you've handled the case of relogged
> items moving the tail forward anywhere that I can see, so we might be
> missing some wakeups here. See xfs_trans_ail_update_bulk() for
> additional AIL manipulation.

Good catch. That might be the race the next patch exposes :)

Cheers,

Dave.
Brian Foster Aug. 6, 2019, 12:29 p.m. UTC | #3
On Tue, Aug 06, 2019 at 09:21:32AM +1000, Dave Chinner wrote:
> On Mon, Aug 05, 2019 at 01:51:53PM -0400, Brian Foster wrote:
> > On Thu, Aug 01, 2019 at 12:17:41PM +1000, Dave Chinner wrote:
> > > From: Dave Chinner <dchinner@redhat.com>
> > > 
> > > Provide an interface to push the AIL to a target LSN and wait for
> > > the tail of the log to move past that LSN. This is used to wait for
> > > all items older than a specific LSN to either be cleaned (written
> > > back) or relogged to a higher LSN in the AIL. The primary use for
> > > this is to allow IO free inode reclaim throttling.
> > > 
> > > Factor the common AIL deletion code that does all the wakeups into a
> > > helper so we only have one copy of this somewhat tricky code to
> > > interface with all the wakeups necessary when the LSN of the log
> > > tail changes.
> > > 
> > > Signed-off-by: Dave Chinner <dchinner@redhat.com>
> > > ---
> > >  fs/xfs/xfs_inode_item.c | 12 +------
> > >  fs/xfs/xfs_trans_ail.c  | 69 +++++++++++++++++++++++++++++++++--------
> > >  fs/xfs/xfs_trans_priv.h |  6 +++-
> > >  3 files changed, 62 insertions(+), 25 deletions(-)
> > > 
> > ...
> > > diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
> > > index 6ccfd75d3c24..9e3102179221 100644
> > > --- a/fs/xfs/xfs_trans_ail.c
> > > +++ b/fs/xfs/xfs_trans_ail.c
> > > @@ -654,6 +654,37 @@ xfs_ail_push_all(
> > >  		xfs_ail_push(ailp, threshold_lsn);
> > >  }
> > >  
> > > +/*
> > > + * Push the AIL to a specific lsn and wait for it to complete.
> > > + */
> > > +void
> > > +xfs_ail_push_sync(
> > > +	struct xfs_ail		*ailp,
> > > +	xfs_lsn_t		threshold_lsn)
> > > +{
> > > +	struct xfs_log_item	*lip;
> > > +	DEFINE_WAIT(wait);
> > > +
> > > +	spin_lock(&ailp->ail_lock);
> > > +	while ((lip = xfs_ail_min(ailp)) != NULL) {
> > > +		prepare_to_wait(&ailp->ail_push, &wait, TASK_UNINTERRUPTIBLE);
> > > +		if (XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
> > > +		    XFS_LSN_CMP(threshold_lsn, lip->li_lsn) <= 0)
> > > +			break;
> > > +		/* XXX: cmpxchg? */
> > > +		while (XFS_LSN_CMP(threshold_lsn, ailp->ail_target) > 0)
> > > +			xfs_trans_ail_copy_lsn(ailp, &ailp->ail_target, &threshold_lsn);
> > 
> > Why the need to repeatedly copy the ail_target like this? If the push
> 
> It's a hack because the other updates are done unlocked and this
> doesn't contain the memroy barriers needed to make it correct
> and race free.
> 
> Hence the comment "XXX: cmpxchg" to ensure that:
> 
> 	a) we only ever move the target forwards;
> 	b) we resolve update races in an obvious, simple manner; and
> 	c) we can get rid of the possibly incorrect memory
> 	   barriers around this (unlocked) update.
> 
> RFC. WIP. :)
> 

Ack..

> > target only ever moves forward, we should only need to do this once at
> > the start of the function. In fact I'm kind of wondering why we can't
> > just call xfs_ail_push(). If we check the tail item after grabbing the
> > spin lock, we should be able to avoid any races with the waker, no?
> 
> I didn't use xfs_ail_push() because of having to prepare to wait
> between determining if the AIL is empty and checking if we need
> to update the target.
> 
> I also didn't want to affect the existing xfs_ail_push() as I was
> modifying the xfs_ail_push_sync() code to do what was needed.
> Eventually they can probably come back together, but for now I'm not
> 100% sure that the code is correct and race free.
> 

Ok, just chalk this up to general feedback around seeing if we can
improve the factoring between the several AIL pushing functions we now
have once this mechanism is working/correct.

> > > +void
> > > +xfs_ail_delete_finish(
> > > +	struct xfs_ail		*ailp,
> > > +	bool			do_tail_update) __releases(ailp->ail_lock)
> > > +{
> > > +	struct xfs_mount	*mp = ailp->ail_mount;
> > > +
> > > +	if (!do_tail_update) {
> > > +		spin_unlock(&ailp->ail_lock);
> > > +		return;
> > > +	}
> > > +
> > 
> > Hmm.. so while what we really care about here are tail updates, this
> > logic is currently driven by removing the min ail log item. That seems
> > like a lot of potential churn if we're waking the pusher on every object
> > written back covered by a single log record / checkpoint. Perhaps we
> > should implement a bit more coarse wakeup logic such as only when the
> > tail lsn actually changes, for example?
> 
> You mean the next patch?
> 

Yep, hadn't got to that point yet. FWIW, I think the next patch should
probably come before this one so there isn't a transient state with
whatever behavior results from this patch by itself.

Brian

> > FWIW, it also doesn't look like you've handled the case of relogged
> > items moving the tail forward anywhere that I can see, so we might be
> > missing some wakeups here. See xfs_trans_ail_update_bulk() for
> > additional AIL manipulation.
> 
> Good catch. That might be the race the next patch exposes :)
> 
> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
diff mbox series

Patch

diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index c9a502eed204..7b942a63e992 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -743,17 +743,7 @@  xfs_iflush_done(
 				xfs_clear_li_failed(blip);
 			}
 		}
-
-		if (mlip_changed) {
-			if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
-				xlog_assign_tail_lsn_locked(ailp->ail_mount);
-			if (list_empty(&ailp->ail_head))
-				wake_up_all(&ailp->ail_empty);
-		}
-		spin_unlock(&ailp->ail_lock);
-
-		if (mlip_changed)
-			xfs_log_space_wake(ailp->ail_mount);
+		xfs_ail_delete_finish(ailp, mlip_changed);
 	}
 
 	/*
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 6ccfd75d3c24..9e3102179221 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -654,6 +654,37 @@  xfs_ail_push_all(
 		xfs_ail_push(ailp, threshold_lsn);
 }
 
+/*
+ * Push the AIL to a specific lsn and wait for it to complete.
+ */
+void
+xfs_ail_push_sync(
+	struct xfs_ail		*ailp,
+	xfs_lsn_t		threshold_lsn)
+{
+	struct xfs_log_item	*lip;
+	DEFINE_WAIT(wait);
+
+	spin_lock(&ailp->ail_lock);
+	while ((lip = xfs_ail_min(ailp)) != NULL) {
+		prepare_to_wait(&ailp->ail_push, &wait, TASK_UNINTERRUPTIBLE);
+		if (XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
+		    XFS_LSN_CMP(threshold_lsn, lip->li_lsn) <= 0)
+			break;
+		/* XXX: cmpxchg? */
+		while (XFS_LSN_CMP(threshold_lsn, ailp->ail_target) > 0)
+			xfs_trans_ail_copy_lsn(ailp, &ailp->ail_target, &threshold_lsn);
+		wake_up_process(ailp->ail_task);
+		spin_unlock(&ailp->ail_lock);
+		schedule();
+		spin_lock(&ailp->ail_lock);
+	}
+	spin_unlock(&ailp->ail_lock);
+
+	finish_wait(&ailp->ail_push, &wait);
+}
+
+
 /*
  * Push out all items in the AIL immediately and wait until the AIL is empty.
  */
@@ -764,6 +795,28 @@  xfs_ail_delete_one(
 	return mlip == lip;
 }
 
+void
+xfs_ail_delete_finish(
+	struct xfs_ail		*ailp,
+	bool			do_tail_update) __releases(ailp->ail_lock)
+{
+	struct xfs_mount	*mp = ailp->ail_mount;
+
+	if (!do_tail_update) {
+		spin_unlock(&ailp->ail_lock);
+		return;
+	}
+
+	if (!XFS_FORCED_SHUTDOWN(mp))
+		xlog_assign_tail_lsn_locked(mp);
+
+	wake_up_all(&ailp->ail_push);
+	if (list_empty(&ailp->ail_head))
+		wake_up_all(&ailp->ail_empty);
+	spin_unlock(&ailp->ail_lock);
+	xfs_log_space_wake(mp);
+}
+
 /**
  * Remove a log items from the AIL
  *
@@ -789,10 +842,9 @@  void
 xfs_trans_ail_delete(
 	struct xfs_ail		*ailp,
 	struct xfs_log_item	*lip,
-	int			shutdown_type) __releases(ailp->ail_lock)
+	int			shutdown_type)
 {
 	struct xfs_mount	*mp = ailp->ail_mount;
-	bool			mlip_changed;
 
 	if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
 		spin_unlock(&ailp->ail_lock);
@@ -805,17 +857,7 @@  xfs_trans_ail_delete(
 		return;
 	}
 
-	mlip_changed = xfs_ail_delete_one(ailp, lip);
-	if (mlip_changed) {
-		if (!XFS_FORCED_SHUTDOWN(mp))
-			xlog_assign_tail_lsn_locked(mp);
-		if (list_empty(&ailp->ail_head))
-			wake_up_all(&ailp->ail_empty);
-	}
-
-	spin_unlock(&ailp->ail_lock);
-	if (mlip_changed)
-		xfs_log_space_wake(ailp->ail_mount);
+	xfs_ail_delete_finish(ailp, xfs_ail_delete_one(ailp, lip));
 }
 
 int
@@ -834,6 +876,7 @@  xfs_trans_ail_init(
 	spin_lock_init(&ailp->ail_lock);
 	INIT_LIST_HEAD(&ailp->ail_buf_list);
 	init_waitqueue_head(&ailp->ail_empty);
+	init_waitqueue_head(&ailp->ail_push);
 
 	ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
 			ailp->ail_mount->m_fsname);
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 2e073c1c4614..5ab70b9b896f 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -61,6 +61,7 @@  struct xfs_ail {
 	int			ail_log_flush;
 	struct list_head	ail_buf_list;
 	wait_queue_head_t	ail_empty;
+	wait_queue_head_t	ail_push;
 };
 
 /*
@@ -92,8 +93,10 @@  xfs_trans_ail_update(
 }
 
 bool xfs_ail_delete_one(struct xfs_ail *ailp, struct xfs_log_item *lip);
+void xfs_ail_delete_finish(struct xfs_ail *ailp, bool do_tail_update)
+			__releases(ailp->ail_lock);
 void xfs_trans_ail_delete(struct xfs_ail *ailp, struct xfs_log_item *lip,
-		int shutdown_type) __releases(ailp->ail_lock);
+		int shutdown_type);
 
 static inline void
 xfs_trans_ail_remove(
@@ -111,6 +114,7 @@  xfs_trans_ail_remove(
 }
 
 void			xfs_ail_push(struct xfs_ail *, xfs_lsn_t);
+void			xfs_ail_push_sync(struct xfs_ail *, xfs_lsn_t);
 void			xfs_ail_push_all(struct xfs_ail *);
 void			xfs_ail_push_all_sync(struct xfs_ail *);
 struct xfs_log_item	*xfs_ail_min(struct xfs_ail  *ailp);