diff mbox series

xfs: fix use-after-free on CIL context on shutdown

Message ID 20200611024503.GR2040@dread.disaster.area (mailing list archive)
State Superseded, archived
Headers show
Series xfs: fix use-after-free on CIL context on shutdown | expand

Commit Message

Dave Chinner June 11, 2020, 2:45 a.m. UTC
From: Dave Chinner <dchinner@redhat.com>

xlog_wait() on the CIL context can reference a freed context if the
waiter doesn't get scheduled before the CIL context is freed. This
can happen when a task is on the hard throttle and the CIL push
aborts due to a shutdown. This was detected by generic/019:

thread 1			thread 2

__xfs_trans_commit
 xfs_log_commit_cil
  <CIL size over hard throttle limit>
  xlog_wait
   schedule
				xlog_cil_push_work
				wake_up_all
				<shutdown aborts commit>
				xlog_cil_committed
				kmem_free

   remove_wait_queue
    spin_lock_irqsave --> UAF

Fix it by moving the wait queue to the CIL rather than keeping it in
in the CIL context that gets freed on push completion. Because the
wait queue is now independent of the CIL context and we might have
multiple contexts in flight at once, only wake the waiters on the
push throttle when the context we are pushing is over the hard
throttle size threshold.

Fixes: 0e7ab7efe7745 ("xfs: Throttle commits on delayed background CIL push")
Reported-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_log_cil.c  | 10 +++++-----
 fs/xfs/xfs_log_priv.h |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

Comments

Brian Foster June 11, 2020, 3:11 p.m. UTC | #1
On Thu, Jun 11, 2020 at 12:45:03PM +1000, Dave Chinner wrote:
> 
> From: Dave Chinner <dchinner@redhat.com>
> 
> xlog_wait() on the CIL context can reference a freed context if the
> waiter doesn't get scheduled before the CIL context is freed. This
> can happen when a task is on the hard throttle and the CIL push
> aborts due to a shutdown. This was detected by generic/019:
> 
> thread 1			thread 2
> 
> __xfs_trans_commit
>  xfs_log_commit_cil
>   <CIL size over hard throttle limit>
>   xlog_wait
>    schedule
> 				xlog_cil_push_work
> 				wake_up_all
> 				<shutdown aborts commit>
> 				xlog_cil_committed
> 				kmem_free
> 
>    remove_wait_queue
>     spin_lock_irqsave --> UAF
> 
> Fix it by moving the wait queue to the CIL rather than keeping it in
> in the CIL context that gets freed on push completion. Because the
> wait queue is now independent of the CIL context and we might have
> multiple contexts in flight at once, only wake the waiters on the
> push throttle when the context we are pushing is over the hard
> throttle size threshold.
> 
> Fixes: 0e7ab7efe7745 ("xfs: Throttle commits on delayed background CIL push")
> Reported-by: Yu Kuai <yukuai3@huawei.com>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>
> ---

Looks reasonable:

Reviewed-by: Brian Foster <bfoster@redhat.com>

>  fs/xfs/xfs_log_cil.c  | 10 +++++-----
>  fs/xfs/xfs_log_priv.h |  2 +-
>  2 files changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
> index b43f0e8f43f2e..9ed90368ab311 100644
> --- a/fs/xfs/xfs_log_cil.c
> +++ b/fs/xfs/xfs_log_cil.c
> @@ -671,7 +671,8 @@ xlog_cil_push_work(
>  	/*
>  	 * Wake up any background push waiters now this context is being pushed.
>  	 */
> -	wake_up_all(&ctx->push_wait);
> +	if (ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log))
> +		wake_up_all(&cil->xc_push_wait);
>  
>  	/*
>  	 * Check if we've anything to push. If there is nothing, then we don't
> @@ -743,13 +744,12 @@ xlog_cil_push_work(
>  
>  	/*
>  	 * initialise the new context and attach it to the CIL. Then attach
> -	 * the current context to the CIL committing lsit so it can be found
> +	 * the current context to the CIL committing list so it can be found
>  	 * during log forces to extract the commit lsn of the sequence that
>  	 * needs to be forced.
>  	 */
>  	INIT_LIST_HEAD(&new_ctx->committing);
>  	INIT_LIST_HEAD(&new_ctx->busy_extents);
> -	init_waitqueue_head(&new_ctx->push_wait);
>  	new_ctx->sequence = ctx->sequence + 1;
>  	new_ctx->cil = cil;
>  	cil->xc_ctx = new_ctx;
> @@ -937,7 +937,7 @@ xlog_cil_push_background(
>  	if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) {
>  		trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket);
>  		ASSERT(cil->xc_ctx->space_used < log->l_logsize);
> -		xlog_wait(&cil->xc_ctx->push_wait, &cil->xc_push_lock);
> +		xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock);
>  		return;
>  	}
>  
> @@ -1216,12 +1216,12 @@ xlog_cil_init(
>  	INIT_LIST_HEAD(&cil->xc_committing);
>  	spin_lock_init(&cil->xc_cil_lock);
>  	spin_lock_init(&cil->xc_push_lock);
> +	init_waitqueue_head(&cil->xc_push_wait);
>  	init_rwsem(&cil->xc_ctx_lock);
>  	init_waitqueue_head(&cil->xc_commit_wait);
>  
>  	INIT_LIST_HEAD(&ctx->committing);
>  	INIT_LIST_HEAD(&ctx->busy_extents);
> -	init_waitqueue_head(&ctx->push_wait);
>  	ctx->sequence = 1;
>  	ctx->cil = cil;
>  	cil->xc_ctx = ctx;
> diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
> index ec22c7a3867f1..75a62870b63af 100644
> --- a/fs/xfs/xfs_log_priv.h
> +++ b/fs/xfs/xfs_log_priv.h
> @@ -240,7 +240,6 @@ struct xfs_cil_ctx {
>  	struct xfs_log_vec	*lv_chain;	/* logvecs being pushed */
>  	struct list_head	iclog_entry;
>  	struct list_head	committing;	/* ctx committing list */
> -	wait_queue_head_t	push_wait;	/* background push throttle */
>  	struct work_struct	discard_endio_work;
>  };
>  
> @@ -274,6 +273,7 @@ struct xfs_cil {
>  	wait_queue_head_t	xc_commit_wait;
>  	xfs_lsn_t		xc_current_sequence;
>  	struct work_struct	xc_push_work;
> +	wait_queue_head_t	xc_push_wait;	/* background push throttle */
>  } ____cacheline_aligned_in_smp;
>  
>  /*
>
Yu Kuai June 16, 2020, 1:16 a.m. UTC | #2
On 2020/6/11 10:45, Dave Chinner wrote:
> 
> From: Dave Chinner <dchinner@redhat.com>
> 
> xlog_wait() on the CIL context can reference a freed context if the
> waiter doesn't get scheduled before the CIL context is freed. This
> can happen when a task is on the hard throttle and the CIL push
> aborts due to a shutdown. This was detected by generic/019:
> 
> thread 1			thread 2
> 
> __xfs_trans_commit
>   xfs_log_commit_cil
>    <CIL size over hard throttle limit>
>    xlog_wait
>     schedule
> 				xlog_cil_push_work
> 				wake_up_all
> 				<shutdown aborts commit>
> 				xlog_cil_committed
> 				kmem_free
> 
>     remove_wait_queue
>      spin_lock_irqsave --> UAF
> 
> Fix it by moving the wait queue to the CIL rather than keeping it in
> in the CIL context that gets freed on push completion. Because the
> wait queue is now independent of the CIL context and we might have
> multiple contexts in flight at once, only wake the waiters on the
> push throttle when the context we are pushing is over the hard
> throttle size threshold.

Hi, Dave,

How do you think about the following fix:

1. use autoremove_wake_func(), and remove remove_wait_queue() to
avoid UAF.
2. add finish_wait().

@@ -576,12 +576,13 @@ xlog_wait(
                 __releases(lock)
  {
         DECLARE_WAITQUEUE(wait, current);
+       wait.func = autoremove_wake_function;

         add_wait_queue_exclusive(wq, &wait);
         __set_current_state(TASK_UNINTERRUPTIBLE);
         spin_unlock(lock);
         schedule();
-       remove_wait_queue(wq, &wait);
+       finish_wait(wq, &wait);
  }

Best regards!
Yu Kuai
Dave Chinner June 16, 2020, 2:38 a.m. UTC | #3
On Tue, Jun 16, 2020 at 09:16:09AM +0800, yukuai (C) wrote:
> On 2020/6/11 10:45, Dave Chinner wrote:
> > 
> > From: Dave Chinner <dchinner@redhat.com>
> > 
> > xlog_wait() on the CIL context can reference a freed context if the
> > waiter doesn't get scheduled before the CIL context is freed. This
> > can happen when a task is on the hard throttle and the CIL push
> > aborts due to a shutdown. This was detected by generic/019:
> > 
> > thread 1			thread 2
> > 
> > __xfs_trans_commit
> >   xfs_log_commit_cil
> >    <CIL size over hard throttle limit>
> >    xlog_wait
> >     schedule
> > 				xlog_cil_push_work
> > 				wake_up_all
> > 				<shutdown aborts commit>
> > 				xlog_cil_committed
> > 				kmem_free
> > 
> >     remove_wait_queue
> >      spin_lock_irqsave --> UAF
> > 
> > Fix it by moving the wait queue to the CIL rather than keeping it in
> > in the CIL context that gets freed on push completion. Because the
> > wait queue is now independent of the CIL context and we might have
> > multiple contexts in flight at once, only wake the waiters on the
> > push throttle when the context we are pushing is over the hard
> > throttle size threshold.
> 
> Hi, Dave,
> 
> How do you think about the following fix:
> 
> 1. use autoremove_wake_func(), and remove remove_wait_queue() to
> avoid UAF.
> 2. add finish_wait().
> 
> @@ -576,12 +576,13 @@ xlog_wait(
>                 __releases(lock)
>  {
>         DECLARE_WAITQUEUE(wait, current);
> +       wait.func = autoremove_wake_function;
> 
>         add_wait_queue_exclusive(wq, &wait);
>         __set_current_state(TASK_UNINTERRUPTIBLE);
>         spin_unlock(lock);
>         schedule();
> -       remove_wait_queue(wq, &wait);
> +       finish_wait(wq, &wait);
>  }

Yes, that would address this specific symptom of the problem, but it
doesn't fix the problem root cause: that the wq can be freed while
this function sleeps. IMO, this sort of change leaves a trap for
future modifications - all the code calling xlog_wait() assumes the
embedded wq the task is sleeping on still exists after waiting so we
really should be fixing the problem the incorrect existence
guarantee in the CIL code that you tripped over.

Cheers,

Dave.
Christoph Hellwig June 19, 2020, 1:46 p.m. UTC | #4
On Thu, Jun 11, 2020 at 12:45:03PM +1000, Dave Chinner wrote:
> 
> From: Dave Chinner <dchinner@redhat.com>
> 
> xlog_wait() on the CIL context can reference a freed context if the
> waiter doesn't get scheduled before the CIL context is freed. This
> can happen when a task is on the hard throttle and the CIL push
> aborts due to a shutdown. This was detected by generic/019:
> 
> thread 1			thread 2
> 
> __xfs_trans_commit
>  xfs_log_commit_cil
>   <CIL size over hard throttle limit>
>   xlog_wait
>    schedule
> 				xlog_cil_push_work
> 				wake_up_all
> 				<shutdown aborts commit>
> 				xlog_cil_committed
> 				kmem_free
> 
>    remove_wait_queue
>     spin_lock_irqsave --> UAF
> 
> Fix it by moving the wait queue to the CIL rather than keeping it in
> in the CIL context that gets freed on push completion. Because the
> wait queue is now independent of the CIL context and we might have
> multiple contexts in flight at once, only wake the waiters on the
> push throttle when the context we are pushing is over the hard
> throttle size threshold.
> 
> Fixes: 0e7ab7efe7745 ("xfs: Throttle commits on delayed background CIL push")
> Reported-by: Yu Kuai <yukuai3@huawei.com>
> Signed-off-by: Dave Chinner <dchinner@redhat.com>

Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>
diff mbox series

Patch

diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index b43f0e8f43f2e..9ed90368ab311 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -671,7 +671,8 @@  xlog_cil_push_work(
 	/*
 	 * Wake up any background push waiters now this context is being pushed.
 	 */
-	wake_up_all(&ctx->push_wait);
+	if (ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log))
+		wake_up_all(&cil->xc_push_wait);
 
 	/*
 	 * Check if we've anything to push. If there is nothing, then we don't
@@ -743,13 +744,12 @@  xlog_cil_push_work(
 
 	/*
 	 * initialise the new context and attach it to the CIL. Then attach
-	 * the current context to the CIL committing lsit so it can be found
+	 * the current context to the CIL committing list so it can be found
 	 * during log forces to extract the commit lsn of the sequence that
 	 * needs to be forced.
 	 */
 	INIT_LIST_HEAD(&new_ctx->committing);
 	INIT_LIST_HEAD(&new_ctx->busy_extents);
-	init_waitqueue_head(&new_ctx->push_wait);
 	new_ctx->sequence = ctx->sequence + 1;
 	new_ctx->cil = cil;
 	cil->xc_ctx = new_ctx;
@@ -937,7 +937,7 @@  xlog_cil_push_background(
 	if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) {
 		trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket);
 		ASSERT(cil->xc_ctx->space_used < log->l_logsize);
-		xlog_wait(&cil->xc_ctx->push_wait, &cil->xc_push_lock);
+		xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock);
 		return;
 	}
 
@@ -1216,12 +1216,12 @@  xlog_cil_init(
 	INIT_LIST_HEAD(&cil->xc_committing);
 	spin_lock_init(&cil->xc_cil_lock);
 	spin_lock_init(&cil->xc_push_lock);
+	init_waitqueue_head(&cil->xc_push_wait);
 	init_rwsem(&cil->xc_ctx_lock);
 	init_waitqueue_head(&cil->xc_commit_wait);
 
 	INIT_LIST_HEAD(&ctx->committing);
 	INIT_LIST_HEAD(&ctx->busy_extents);
-	init_waitqueue_head(&ctx->push_wait);
 	ctx->sequence = 1;
 	ctx->cil = cil;
 	cil->xc_ctx = ctx;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index ec22c7a3867f1..75a62870b63af 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -240,7 +240,6 @@  struct xfs_cil_ctx {
 	struct xfs_log_vec	*lv_chain;	/* logvecs being pushed */
 	struct list_head	iclog_entry;
 	struct list_head	committing;	/* ctx committing list */
-	wait_queue_head_t	push_wait;	/* background push throttle */
 	struct work_struct	discard_endio_work;
 };
 
@@ -274,6 +273,7 @@  struct xfs_cil {
 	wait_queue_head_t	xc_commit_wait;
 	xfs_lsn_t		xc_current_sequence;
 	struct work_struct	xc_push_work;
+	wait_queue_head_t	xc_push_wait;	/* background push throttle */
 } ____cacheline_aligned_in_smp;
 
 /*