diff mbox series

[09/14] xfs: move log shut down handling out of xlog_state_iodone_process_iclog

Message ID 20200316144233.900390-10-hch@lst.de (mailing list archive)
State Deferred, archived
Headers show
Series [01/14] xfs: merge xlog_cil_push into xlog_cil_push_work | expand

Commit Message

Christoph Hellwig March 16, 2020, 2:42 p.m. UTC
Move handling of a shut down log out of xlog_state_iodone_process_iclog
and into xlog_state_do_callback so that it can be moved into an entirely
separate branch.  While doing so switch to using XLOG_FORCED_SHUTDOWN to
check the shutdown condition global to the log instead of the per-iclog
flag, and make sure the comments match reality.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/xfs_log.c | 64 ++++++++++++++++++++----------------------------
 1 file changed, 26 insertions(+), 38 deletions(-)

Comments

Darrick J. Wong March 16, 2020, 9:02 p.m. UTC | #1
On Mon, Mar 16, 2020 at 03:42:28PM +0100, Christoph Hellwig wrote:
> Move handling of a shut down log out of xlog_state_iodone_process_iclog
> and into xlog_state_do_callback so that it can be moved into an entirely
> separate branch.  While doing so switch to using XLOG_FORCED_SHUTDOWN to
> check the shutdown condition global to the log instead of the per-iclog
> flag, and make sure the comments match reality.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Seems reasonable,
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>

--D

> ---
>  fs/xfs/xfs_log.c | 64 ++++++++++++++++++++----------------------------
>  1 file changed, 26 insertions(+), 38 deletions(-)
> 
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index c534d7007aa3..4efaa248a03d 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
> @@ -2746,8 +2746,7 @@ xlog_state_do_iclog_callbacks(
>  static bool
>  xlog_state_iodone_process_iclog(
>  	struct xlog		*log,
> -	struct xlog_in_core	*iclog,
> -	bool			*ioerror)
> +	struct xlog_in_core	*iclog)
>  {
>  	xfs_lsn_t		lowest_lsn;
>  	xfs_lsn_t		header_lsn;
> @@ -2759,15 +2758,6 @@ xlog_state_iodone_process_iclog(
>  		 * Skip all iclogs in the ACTIVE & DIRTY states:
>  		 */
>  		return false;
> -	case XLOG_STATE_IOERROR:
> -		/*
> -		 * Between marking a filesystem SHUTDOWN and stopping the log,
> -		 * we do flush all iclogs to disk (if there wasn't a log I/O
> -		 * error). So, we do want things to go smoothly in case of just
> -		 * a SHUTDOWN w/o a LOG_IO_ERROR.
> -		 */
> -		*ioerror = true;
> -		return false;
>  	case XLOG_STATE_DONE_SYNC:
>  		/*
>  		 * Now that we have an iclog that is in the DONE_SYNC state, do
> @@ -2795,39 +2785,41 @@ STATIC void
>  xlog_state_do_callback(
>  	struct xlog		*log)
>  {
> -	struct xlog_in_core	*iclog;
> -	struct xlog_in_core	*first_iclog;
>  	bool			cycled_icloglock;
> -	bool			ioerror;
>  	int			flushcnt = 0;
>  	int			repeats = 0;
>  
> +	/*
> +	 * Scan all iclogs starting with the one pointed to by the log.  Reset
> +	 * this starting point each time the log is unlocked (during callbacks).
> +	 *
> +	 * Keep looping through iclogs until one full pass is made without
> +	 * running any callbacks.
> +	 *
> +	 * If the log has been shut down, still perform the callbacks once per
> +	 * iclog to abort all log items, but don't bother to restart the loop
> +	 * after dropping the log as no new callbacks can show up.
> +	 */
>  	spin_lock(&log->l_icloglock);
>  	do {
> -		/*
> -		 * Scan all iclogs starting with the one pointed to by the
> -		 * log.  Reset this starting point each time the log is
> -		 * unlocked (during callbacks).
> -		 *
> -		 * Keep looping through iclogs until one full pass is made
> -		 * without running any callbacks.
> -		 */
> -		first_iclog = log->l_iclog;
> -		iclog = log->l_iclog;
> +		struct xlog_in_core	*first_iclog = log->l_iclog;
> +		struct xlog_in_core	*iclog = first_iclog;
> +
>  		cycled_icloglock = false;
> -		ioerror = false;
>  		repeats++;
>  
>  		do {
> -			if (xlog_state_iodone_process_iclog(log, iclog,
> -							&ioerror))
> +			if (XLOG_FORCED_SHUTDOWN(log)) {
> +				xlog_state_do_iclog_callbacks(log, iclog);
> +				wake_up_all(&iclog->ic_force_wait);
> +				continue;
> +			}
> +
> +			if (xlog_state_iodone_process_iclog(log, iclog))
>  				break;
>  
> -			if (iclog->ic_state != XLOG_STATE_CALLBACK &&
> -			    iclog->ic_state != XLOG_STATE_IOERROR) {
> -				iclog = iclog->ic_next;
> +			if (iclog->ic_state != XLOG_STATE_CALLBACK)
>  				continue;
> -			}
>  
>  			/*
>  			 * Running callbacks will drop the icloglock which means
> @@ -2835,12 +2827,8 @@ xlog_state_do_callback(
>  			 */
>  			cycled_icloglock = true;
>  			xlog_state_do_iclog_callbacks(log, iclog);
> -			if (XLOG_FORCED_SHUTDOWN(log))
> -				wake_up_all(&iclog->ic_force_wait);
> -			else
> -				xlog_state_clean_iclog(log, iclog);
> -			iclog = iclog->ic_next;
> -		} while (first_iclog != iclog);
> +			xlog_state_clean_iclog(log, iclog);
> +		} while ((iclog = iclog->ic_next) != first_iclog);
>  
>  		if (repeats > 5000) {
>  			flushcnt += repeats;
> @@ -2849,7 +2837,7 @@ xlog_state_do_callback(
>  				"%s: possible infinite loop (%d iterations)",
>  				__func__, flushcnt);
>  		}
> -	} while (!ioerror && cycled_icloglock);
> +	} while (cycled_icloglock);
>  
>  	if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE ||
>  	    log->l_iclog->ic_state == XLOG_STATE_IOERROR)
> -- 
> 2.24.1
>
Brian Foster March 18, 2020, 2:48 p.m. UTC | #2
On Mon, Mar 16, 2020 at 03:42:28PM +0100, Christoph Hellwig wrote:
> Move handling of a shut down log out of xlog_state_iodone_process_iclog
> and into xlog_state_do_callback so that it can be moved into an entirely
> separate branch.  While doing so switch to using XLOG_FORCED_SHUTDOWN to
> check the shutdown condition global to the log instead of the per-iclog
> flag, and make sure the comments match reality.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/xfs/xfs_log.c | 64 ++++++++++++++++++++----------------------------
>  1 file changed, 26 insertions(+), 38 deletions(-)
> 
> diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
> index c534d7007aa3..4efaa248a03d 100644
> --- a/fs/xfs/xfs_log.c
> +++ b/fs/xfs/xfs_log.c
...
> @@ -2795,39 +2785,41 @@ STATIC void
>  xlog_state_do_callback(
>  	struct xlog		*log)
>  {
> -	struct xlog_in_core	*iclog;
> -	struct xlog_in_core	*first_iclog;
>  	bool			cycled_icloglock;
> -	bool			ioerror;
>  	int			flushcnt = 0;
>  	int			repeats = 0;
>  
> +	/*
> +	 * Scan all iclogs starting with the one pointed to by the log.  Reset
> +	 * this starting point each time the log is unlocked (during callbacks).
> +	 *
> +	 * Keep looping through iclogs until one full pass is made without
> +	 * running any callbacks.
> +	 *
> +	 * If the log has been shut down, still perform the callbacks once per
> +	 * iclog to abort all log items, but don't bother to restart the loop
> +	 * after dropping the log as no new callbacks can show up.
> +	 */

"after dropping the lock ..."

>  	spin_lock(&log->l_icloglock);
>  	do {
> -		/*
> -		 * Scan all iclogs starting with the one pointed to by the
> -		 * log.  Reset this starting point each time the log is
> -		 * unlocked (during callbacks).
> -		 *
> -		 * Keep looping through iclogs until one full pass is made
> -		 * without running any callbacks.
> -		 */
> -		first_iclog = log->l_iclog;
> -		iclog = log->l_iclog;
> +		struct xlog_in_core	*first_iclog = log->l_iclog;
> +		struct xlog_in_core	*iclog = first_iclog;
> +
>  		cycled_icloglock = false;
> -		ioerror = false;
>  		repeats++;
>  
>  		do {
> -			if (xlog_state_iodone_process_iclog(log, iclog,
> -							&ioerror))
> +			if (XLOG_FORCED_SHUTDOWN(log)) {
> +				xlog_state_do_iclog_callbacks(log, iclog);
> +				wake_up_all(&iclog->ic_force_wait);
> +				continue;
> +			}
> +

Why do we need to change the flow here? The current code looks like it
proceeds with the _do_iclog_callbacks() call below..

As it is, I also don't think this reflects the comment above because if
we catch the shutdown partway through a loop, the outer loop will
execute one more time through. That doesn't look like a problem at a
glance, but I think we should try to retain closer to existing behavior
by folding the shutdown check into the ic_state check below as well as
the outer loop conditional.

This (and the next patch) also raises the issue of whether to maintain
state validity once the iclog ioerror state goes away. Currently we see
the IOERROR state and kind of have free reign on busting through the
normal runtime logic to clear out callbacks, etc. on the iclog
regardless of what the pre-error state was. It certainly makes sense to
continue to do that based on XLOG_FORCED_SHUTDOWN(), but the iclog state
sort of provides a platform that allows us to do that because any
particular context can see it and handle an iclog with care. With
IOERROR replaced with the (potentially racy) log flag check, I think we
should try to maintain the coherence of other states wherever possible.
IOW, XLOG_FORCED_SHUTDOWN() means we can run callbacks and abort and
whatnot, but we should probably still consider and update the iclog
state as we progress (as opposed to leaving it in the DONE_SYNC state,
for example) because there's no guarantee some other context will
(always) behave just as it did with IOERROR.

Brian

> +			if (xlog_state_iodone_process_iclog(log, iclog))
>  				break;
>  
> -			if (iclog->ic_state != XLOG_STATE_CALLBACK &&
> -			    iclog->ic_state != XLOG_STATE_IOERROR) {
> -				iclog = iclog->ic_next;
> +			if (iclog->ic_state != XLOG_STATE_CALLBACK)
>  				continue;
> -			}
>  
>  			/*
>  			 * Running callbacks will drop the icloglock which means
> @@ -2835,12 +2827,8 @@ xlog_state_do_callback(
>  			 */
>  			cycled_icloglock = true;
>  			xlog_state_do_iclog_callbacks(log, iclog);
> -			if (XLOG_FORCED_SHUTDOWN(log))
> -				wake_up_all(&iclog->ic_force_wait);
> -			else
> -				xlog_state_clean_iclog(log, iclog);
> -			iclog = iclog->ic_next;
> -		} while (first_iclog != iclog);
> +			xlog_state_clean_iclog(log, iclog);
> +		} while ((iclog = iclog->ic_next) != first_iclog);
>  
>  		if (repeats > 5000) {
>  			flushcnt += repeats;
> @@ -2849,7 +2837,7 @@ xlog_state_do_callback(
>  				"%s: possible infinite loop (%d iterations)",
>  				__func__, flushcnt);
>  		}
> -	} while (!ioerror && cycled_icloglock);
> +	} while (cycled_icloglock);
>  
>  	if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE ||
>  	    log->l_iclog->ic_state == XLOG_STATE_IOERROR)
> -- 
> 2.24.1
>
Christoph Hellwig March 18, 2020, 4:34 p.m. UTC | #3
On Wed, Mar 18, 2020 at 10:48:25AM -0400, Brian Foster wrote:
> >  		do {
> > -			if (xlog_state_iodone_process_iclog(log, iclog,
> > -							&ioerror))
> > +			if (XLOG_FORCED_SHUTDOWN(log)) {
> > +				xlog_state_do_iclog_callbacks(log, iclog);
> > +				wake_up_all(&iclog->ic_force_wait);
> > +				continue;
> > +			}
> > +
> 
> Why do we need to change the flow here? The current code looks like it
> proceeds with the _do_iclog_callbacks() call below..
>
> As it is, I also don't think this reflects the comment above because if
> we catch the shutdown partway through a loop, the outer loop will
> execute one more time through. That doesn't look like a problem at a
> glance, but I think we should try to retain closer to existing behavior
> by folding the shutdown check into the ic_state check below as well as
> the outer loop conditional.

True.  I think we just need to clear cycled_icloglock in the
shutdown branch.  I prefer that flow over falling through to the
main loop body as that clearly separates out the shutdown case.

> This (and the next patch) also raises the issue of whether to maintain
> state validity once the iclog ioerror state goes away. Currently we see
> the IOERROR state and kind of have free reign on busting through the
> normal runtime logic to clear out callbacks, etc. on the iclog
> regardless of what the pre-error state was. It certainly makes sense to
> continue to do that based on XLOG_FORCED_SHUTDOWN(), but the iclog state
> sort of provides a platform that allows us to do that because any
> particular context can see it and handle an iclog with care. With
> IOERROR replaced with the (potentially racy) log flag check, I think we
> should try to maintain the coherence of other states wherever possible.
> IOW, XLOG_FORCED_SHUTDOWN() means we can run callbacks and abort and
> whatnot, but we should probably still consider and update the iclog
> state as we progress (as opposed to leaving it in the DONE_SYNC state,
> for example) because there's no guarantee some other context will
> (always) behave just as it did with IOERROR.

I actually very much disagree with that, and this series moves into
the other direction.  We only really changes the states when
writing to iclogs, syncing them to disk, and I/O completion.  And
all the paths just error out at a high level when the log is shut
down, so there is no need to move the state along.  Faking state
changes when they don't correspond to the actual I/O just seems like
a really bad idea.

Also if you look at what state checks are left, the are all (except
for the debug check in xfs_log_unmount_verify_iclog) under
l_icloglock and guarded by a shutdown check.
Brian Foster March 19, 2020, 11:36 a.m. UTC | #4
On Wed, Mar 18, 2020 at 05:34:29PM +0100, Christoph Hellwig wrote:
> On Wed, Mar 18, 2020 at 10:48:25AM -0400, Brian Foster wrote:
> > >  		do {
> > > -			if (xlog_state_iodone_process_iclog(log, iclog,
> > > -							&ioerror))
> > > +			if (XLOG_FORCED_SHUTDOWN(log)) {
> > > +				xlog_state_do_iclog_callbacks(log, iclog);
> > > +				wake_up_all(&iclog->ic_force_wait);
> > > +				continue;
> > > +			}
> > > +
> > 
> > Why do we need to change the flow here? The current code looks like it
> > proceeds with the _do_iclog_callbacks() call below..
> >
> > As it is, I also don't think this reflects the comment above because if
> > we catch the shutdown partway through a loop, the outer loop will
> > execute one more time through. That doesn't look like a problem at a
> > glance, but I think we should try to retain closer to existing behavior
> > by folding the shutdown check into the ic_state check below as well as
> > the outer loop conditional.
> 
> True.  I think we just need to clear cycled_icloglock in the
> shutdown branch.  I prefer that flow over falling through to the
> main loop body as that clearly separates out the shutdown case.
> 

Sure, but a shutdown can still happen at any point so this is just a
duplicate branch to maintain.

> > This (and the next patch) also raises the issue of whether to maintain
> > state validity once the iclog ioerror state goes away. Currently we see
> > the IOERROR state and kind of have free reign on busting through the
> > normal runtime logic to clear out callbacks, etc. on the iclog
> > regardless of what the pre-error state was. It certainly makes sense to
> > continue to do that based on XLOG_FORCED_SHUTDOWN(), but the iclog state
> > sort of provides a platform that allows us to do that because any
> > particular context can see it and handle an iclog with care. With
> > IOERROR replaced with the (potentially racy) log flag check, I think we
> > should try to maintain the coherence of other states wherever possible.
> > IOW, XLOG_FORCED_SHUTDOWN() means we can run callbacks and abort and
> > whatnot, but we should probably still consider and update the iclog
> > state as we progress (as opposed to leaving it in the DONE_SYNC state,
> > for example) because there's no guarantee some other context will
> > (always) behave just as it did with IOERROR.
> 
> I actually very much disagree with that, and this series moves into
> the other direction.  We only really changes the states when
> writing to iclogs, syncing them to disk, and I/O completion.  And
> all the paths just error out at a high level when the log is shut
> down, so there is no need to move the state along.  Faking state
> changes when they don't correspond to the actual I/O just seems like
> a really bad idea.
> 

I think you're misreading me. I'm not suggesting to fake state changes.
I'd argue that's actually what the special case shutdown branch does.
And to the contrary, this patch already implements what I'm suggesting,
it's just not consistent behavior..

First, we basically already go from whatever state we're in to "logical
CALLBACK" during shutdown. This is just forcibly implemented via the
IOERROR state. With IOERROR eventually removed, this highlights things
like whether it's actually safe to make some of those arbitrary
transitions. It's actually not, because going from WANT_SYNC -> CALLBACK
is a potential use after free vector of the CIL ctx (as soon as the ctx
is added to the callback list in the CIL push code). This is yet another
functional problem that should be fixed before removing IOERROR, IMO
(and is reproducible via kasan splat, btw). At this point I think some
of these shutdown checks associated with CALLBACK are simply to ensure
IOERROR remains persistent once it's set on an iclog. We don't need to
carry that logic around if IOERROR is going away.

SYNCING -> CALLBACK is another hokey transition in the existing code,
even if it doesn't currently manifest in a bug that I can see, because
we should probably still expect (wait for) an I/O completion despite
that the filesystem had shutdown in the meantime. Fixing that one might
require tweaks to how the shutdown code actually works (i.e. waiting on
an I/O vs. running callbacks while in-flight). It's not immediately
clear to me what the best solution is for that, but I suspect it could
tie in with fixing the problem noted above.

With regard to this patch, consider that shutdown can happen at any
point and xlog_state_do_iclog_callbacks() cycles icloglock. That means
that as of this patch, we actually can go from IOERROR -> DIRTY and
possibly from DIRTY -> ACTIVE depending on where the iclog lies in the
list. Removing IOERROR will subtley change that behavior yet again to
make the latter transition potentially more likely.

Note that I think that's probably fine. What I'm suggesting is to just
drop the duplicate shutdown branch and instead lets evaluate whether
some of the code these checks intend to avoid is really problematic. I
don't think it is in the completion path since we're just resetting
in-core headers and such. That means we could probably just let the
iclogs fall through those state transitions naturally, reduce the number
of shutdown checks splattered throughout the code and simplify the
overall error handling logic in the process by handling all iclogs
consistently during shutdown.

> Also if you look at what state checks are left, the are all (except
> for the debug check in xfs_log_unmount_verify_iclog) under
> l_icloglock and guarded by a shutdown check.
> 

That's not quite enough IMO. I think the whole IOERROR problem is not
primarily a matter of mechanically factoring it out. It's fragile
functionality that should be fixed/simplified first before there's any
real value to removing IOERROR.

Brian
Christoph Hellwig March 19, 2020, 1:05 p.m. UTC | #5
On Thu, Mar 19, 2020 at 07:36:03AM -0400, Brian Foster wrote:
> > True.  I think we just need to clear cycled_icloglock in the
> > shutdown branch.  I prefer that flow over falling through to the
> > main loop body as that clearly separates out the shutdown case.
> > 
> 
> Sure, but a shutdown can still happen at any point so this is just a
> duplicate branch to maintain.

I don't understand.  We are in the inner loop and under l_icloglock.
The next time a shutdown can come in is when
xlog_state_do_iclog_callbacks drops l_icloglock.  That is at the end
of the inner loop, which means we will always go back to the
force shutdown check quickly.  So how is the branch duplicate?  Yes,
it also calls xlog_state_do_iclog_callbacks and does the wakeup,
but in doing that early it avoid a whole lot of complicated logic
in the previous code base.

> I think you're misreading me. I'm not suggesting to fake state changes.
> I'd argue that's actually what the special case shutdown branch does.
> And to the contrary, this patch already implements what I'm suggesting,
> it's just not consistent behavior..

I'm rather confused now.

> First, we basically already go from whatever state we're in to "logical
> CALLBACK" during shutdown. This is just forcibly implemented via the
> IOERROR state. With IOERROR eventually removed, this highlights things
> like whether it's actually safe to make some of those arbitrary
> transitions. It's actually not, because going from WANT_SYNC -> CALLBACK
> is a potential use after free vector of the CIL ctx (as soon as the ctx
> is added to the callback list in the CIL push code). This is yet another
> functional problem that should be fixed before removing IOERROR, IMO
> (and is reproducible via kasan splat, btw). At this point I think some
> of these shutdown checks associated with CALLBACK are simply to ensure
> IOERROR remains persistent once it's set on an iclog. We don't need to
> carry that logic around if IOERROR is going away.

What shutdown check associated with CALLBACK?

> SYNCING -> CALLBACK is another hokey transition in the existing code,
> even if it doesn't currently manifest in a bug that I can see, because
> we should probably still expect (wait for) an I/O completion despite
> that the filesystem had shutdown in the meantime. Fixing that one might
> require tweaks to how the shutdown code actually works (i.e. waiting on
> an I/O vs. running callbacks while in-flight). It's not immediately
> clear to me what the best solution is for that, but I suspect it could
> tie in with fixing the problem noted above.

True, actually running callbacks on various kinds of "in-flight" iclogs
seems rather dangerous.  So should I interpret your above comments
in that we should fix that first before killing of the IOERROR state?
Brian Foster March 19, 2020, 1:37 p.m. UTC | #6
On Thu, Mar 19, 2020 at 02:05:36PM +0100, Christoph Hellwig wrote:
> On Thu, Mar 19, 2020 at 07:36:03AM -0400, Brian Foster wrote:
> > > True.  I think we just need to clear cycled_icloglock in the
> > > shutdown branch.  I prefer that flow over falling through to the
> > > main loop body as that clearly separates out the shutdown case.
> > > 
> > 
> > Sure, but a shutdown can still happen at any point so this is just a
> > duplicate branch to maintain.
> 
> I don't understand.  We are in the inner loop and under l_icloglock.
> The next time a shutdown can come in is when
> xlog_state_do_iclog_callbacks drops l_icloglock.  That is at the end
> of the inner loop, which means we will always go back to the
> force shutdown check quickly.  So how is the branch duplicate?  Yes,
> it also calls xlog_state_do_iclog_callbacks and does the wakeup,
> but in doing that early it avoid a whole lot of complicated logic
> in the previous code base.
> 

We'll get back to the shutdown check for the next iclog, but not the
iclog we're running callbacks on. So basically we can be in CALLBACK, a
shutdown can set IOERROR where _do_iclog_callbacks() cycles the lock,
the callback picks up the shutdown state and aborts, and then the first
thing we do after that function returns is:

	iclog->ic_state = XLOG_STATE_DIRTY;
	xlog_state_activate_iclogs(log, &iclogs_changed);

... and thus finish the loop by reactivating the (IOERROR) iclog. So for
any particular iclog, we might process it for shutdown normally or in
the special shutdown branch based on timing.

> > I think you're misreading me. I'm not suggesting to fake state changes.
> > I'd argue that's actually what the special case shutdown branch does.
> > And to the contrary, this patch already implements what I'm suggesting,
> > it's just not consistent behavior..
> 
> I'm rather confused now.
> 

Sorry. What I'm saying can probably be simplified to the following
question: if we just removed the special shutdown branch and let the
iclogs fall through the normal completion sequence (once IOERROR is out
of the picture) during shutdown, is that actually a problem?

It seems to me it isn't (subject to testing of course). If that is true,
then that is more simple and consistent than what we seem to be doing in
this patch, which to my eyes seems to want to maintain some of the
IOERROR functional cruft even though the state itself is being removed.
Also note I think it would be reasonable to lift it out in a
later/separate patch if that was more straightforward than reworking
these patches.

If it is a problem, I think that's a potential argument for leaving the
IOERROR state around because then the state technically has meaning.

> > First, we basically already go from whatever state we're in to "logical
> > CALLBACK" during shutdown. This is just forcibly implemented via the
> > IOERROR state. With IOERROR eventually removed, this highlights things
> > like whether it's actually safe to make some of those arbitrary
> > transitions. It's actually not, because going from WANT_SYNC -> CALLBACK
> > is a potential use after free vector of the CIL ctx (as soon as the ctx
> > is added to the callback list in the CIL push code). This is yet another
> > functional problem that should be fixed before removing IOERROR, IMO
> > (and is reproducible via kasan splat, btw). At this point I think some
> > of these shutdown checks associated with CALLBACK are simply to ensure
> > IOERROR remains persistent once it's set on an iclog. We don't need to
> > carry that logic around if IOERROR is going away.
> 
> What shutdown check associated with CALLBACK?
> 

The one(s) that issue callbacks on an IOERROR iclog (note that I'm
referring to the mainline code). Specifically the IOERROR check in
_process_iclog() and the following logic in the caller:

                if (iclog->ic_state != XLOG_STATE_CALLBACK &&
                    iclog->ic_state != XLOG_STATE_IOERROR) {
                        iclog = iclog->ic_next;
                        continue;
                }

IOW, the current code treats an IOERROR iclog as if it were CALLBACK
with the sole exception of updating ic_state.

> > SYNCING -> CALLBACK is another hokey transition in the existing code,
> > even if it doesn't currently manifest in a bug that I can see, because
> > we should probably still expect (wait for) an I/O completion despite
> > that the filesystem had shutdown in the meantime. Fixing that one might
> > require tweaks to how the shutdown code actually works (i.e. waiting on
> > an I/O vs. running callbacks while in-flight). It's not immediately
> > clear to me what the best solution is for that, but I suspect it could
> > tie in with fixing the problem noted above.
> 
> True, actually running callbacks on various kinds of "in-flight" iclogs
> seems rather dangerous.  So should I interpret your above comments
> in that we should fix that first before killing of the IOERROR state?
> 

Yes. Note that the WANT_SYNC -> CALLBACK (IOERROR) behavior is
explicitly problematic in the current code as well because the submitter
context still has the ctx while the callbacks could be freeing it.
That's a reproducible use after free in the current code.

Brian
diff mbox series

Patch

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index c534d7007aa3..4efaa248a03d 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -2746,8 +2746,7 @@  xlog_state_do_iclog_callbacks(
 static bool
 xlog_state_iodone_process_iclog(
 	struct xlog		*log,
-	struct xlog_in_core	*iclog,
-	bool			*ioerror)
+	struct xlog_in_core	*iclog)
 {
 	xfs_lsn_t		lowest_lsn;
 	xfs_lsn_t		header_lsn;
@@ -2759,15 +2758,6 @@  xlog_state_iodone_process_iclog(
 		 * Skip all iclogs in the ACTIVE & DIRTY states:
 		 */
 		return false;
-	case XLOG_STATE_IOERROR:
-		/*
-		 * Between marking a filesystem SHUTDOWN and stopping the log,
-		 * we do flush all iclogs to disk (if there wasn't a log I/O
-		 * error). So, we do want things to go smoothly in case of just
-		 * a SHUTDOWN w/o a LOG_IO_ERROR.
-		 */
-		*ioerror = true;
-		return false;
 	case XLOG_STATE_DONE_SYNC:
 		/*
 		 * Now that we have an iclog that is in the DONE_SYNC state, do
@@ -2795,39 +2785,41 @@  STATIC void
 xlog_state_do_callback(
 	struct xlog		*log)
 {
-	struct xlog_in_core	*iclog;
-	struct xlog_in_core	*first_iclog;
 	bool			cycled_icloglock;
-	bool			ioerror;
 	int			flushcnt = 0;
 	int			repeats = 0;
 
+	/*
+	 * Scan all iclogs starting with the one pointed to by the log.  Reset
+	 * this starting point each time the log is unlocked (during callbacks).
+	 *
+	 * Keep looping through iclogs until one full pass is made without
+	 * running any callbacks.
+	 *
+	 * If the log has been shut down, still perform the callbacks once per
+	 * iclog to abort all log items, but don't bother to restart the loop
+	 * after dropping the log as no new callbacks can show up.
+	 */
 	spin_lock(&log->l_icloglock);
 	do {
-		/*
-		 * Scan all iclogs starting with the one pointed to by the
-		 * log.  Reset this starting point each time the log is
-		 * unlocked (during callbacks).
-		 *
-		 * Keep looping through iclogs until one full pass is made
-		 * without running any callbacks.
-		 */
-		first_iclog = log->l_iclog;
-		iclog = log->l_iclog;
+		struct xlog_in_core	*first_iclog = log->l_iclog;
+		struct xlog_in_core	*iclog = first_iclog;
+
 		cycled_icloglock = false;
-		ioerror = false;
 		repeats++;
 
 		do {
-			if (xlog_state_iodone_process_iclog(log, iclog,
-							&ioerror))
+			if (XLOG_FORCED_SHUTDOWN(log)) {
+				xlog_state_do_iclog_callbacks(log, iclog);
+				wake_up_all(&iclog->ic_force_wait);
+				continue;
+			}
+
+			if (xlog_state_iodone_process_iclog(log, iclog))
 				break;
 
-			if (iclog->ic_state != XLOG_STATE_CALLBACK &&
-			    iclog->ic_state != XLOG_STATE_IOERROR) {
-				iclog = iclog->ic_next;
+			if (iclog->ic_state != XLOG_STATE_CALLBACK)
 				continue;
-			}
 
 			/*
 			 * Running callbacks will drop the icloglock which means
@@ -2835,12 +2827,8 @@  xlog_state_do_callback(
 			 */
 			cycled_icloglock = true;
 			xlog_state_do_iclog_callbacks(log, iclog);
-			if (XLOG_FORCED_SHUTDOWN(log))
-				wake_up_all(&iclog->ic_force_wait);
-			else
-				xlog_state_clean_iclog(log, iclog);
-			iclog = iclog->ic_next;
-		} while (first_iclog != iclog);
+			xlog_state_clean_iclog(log, iclog);
+		} while ((iclog = iclog->ic_next) != first_iclog);
 
 		if (repeats > 5000) {
 			flushcnt += repeats;
@@ -2849,7 +2837,7 @@  xlog_state_do_callback(
 				"%s: possible infinite loop (%d iterations)",
 				__func__, flushcnt);
 		}
-	} while (!ioerror && cycled_icloglock);
+	} while (cycled_icloglock);
 
 	if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE ||
 	    log->l_iclog->ic_state == XLOG_STATE_IOERROR)