Message ID | 20200611024503.GR2040@dread.disaster.area (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | xfs: fix use-after-free on CIL context on shutdown | expand |
On Thu, Jun 11, 2020 at 12:45:03PM +1000, Dave Chinner wrote: > > From: Dave Chinner <dchinner@redhat.com> > > xlog_wait() on the CIL context can reference a freed context if the > waiter doesn't get scheduled before the CIL context is freed. This > can happen when a task is on the hard throttle and the CIL push > aborts due to a shutdown. This was detected by generic/019: > > thread 1 thread 2 > > __xfs_trans_commit > xfs_log_commit_cil > <CIL size over hard throttle limit> > xlog_wait > schedule > xlog_cil_push_work > wake_up_all > <shutdown aborts commit> > xlog_cil_committed > kmem_free > > remove_wait_queue > spin_lock_irqsave --> UAF > > Fix it by moving the wait queue to the CIL rather than keeping it in > in the CIL context that gets freed on push completion. Because the > wait queue is now independent of the CIL context and we might have > multiple contexts in flight at once, only wake the waiters on the > push throttle when the context we are pushing is over the hard > throttle size threshold. > > Fixes: 0e7ab7efe7745 ("xfs: Throttle commits on delayed background CIL push") > Reported-by: Yu Kuai <yukuai3@huawei.com> > Signed-off-by: Dave Chinner <dchinner@redhat.com> > --- Looks reasonable: Reviewed-by: Brian Foster <bfoster@redhat.com> > fs/xfs/xfs_log_cil.c | 10 +++++----- > fs/xfs/xfs_log_priv.h | 2 +- > 2 files changed, 6 insertions(+), 6 deletions(-) > > diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c > index b43f0e8f43f2e..9ed90368ab311 100644 > --- a/fs/xfs/xfs_log_cil.c > +++ b/fs/xfs/xfs_log_cil.c > @@ -671,7 +671,8 @@ xlog_cil_push_work( > /* > * Wake up any background push waiters now this context is being pushed. > */ > - wake_up_all(&ctx->push_wait); > + if (ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) > + wake_up_all(&cil->xc_push_wait); > > /* > * Check if we've anything to push. If there is nothing, then we don't > @@ -743,13 +744,12 @@ xlog_cil_push_work( > > /* > * initialise the new context and attach it to the CIL. Then attach > - * the current context to the CIL committing lsit so it can be found > + * the current context to the CIL committing list so it can be found > * during log forces to extract the commit lsn of the sequence that > * needs to be forced. > */ > INIT_LIST_HEAD(&new_ctx->committing); > INIT_LIST_HEAD(&new_ctx->busy_extents); > - init_waitqueue_head(&new_ctx->push_wait); > new_ctx->sequence = ctx->sequence + 1; > new_ctx->cil = cil; > cil->xc_ctx = new_ctx; > @@ -937,7 +937,7 @@ xlog_cil_push_background( > if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) { > trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket); > ASSERT(cil->xc_ctx->space_used < log->l_logsize); > - xlog_wait(&cil->xc_ctx->push_wait, &cil->xc_push_lock); > + xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock); > return; > } > > @@ -1216,12 +1216,12 @@ xlog_cil_init( > INIT_LIST_HEAD(&cil->xc_committing); > spin_lock_init(&cil->xc_cil_lock); > spin_lock_init(&cil->xc_push_lock); > + init_waitqueue_head(&cil->xc_push_wait); > init_rwsem(&cil->xc_ctx_lock); > init_waitqueue_head(&cil->xc_commit_wait); > > INIT_LIST_HEAD(&ctx->committing); > INIT_LIST_HEAD(&ctx->busy_extents); > - init_waitqueue_head(&ctx->push_wait); > ctx->sequence = 1; > ctx->cil = cil; > cil->xc_ctx = ctx; > diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h > index ec22c7a3867f1..75a62870b63af 100644 > --- a/fs/xfs/xfs_log_priv.h > +++ b/fs/xfs/xfs_log_priv.h > @@ -240,7 +240,6 @@ struct xfs_cil_ctx { > struct xfs_log_vec *lv_chain; /* logvecs being pushed */ > struct list_head iclog_entry; > struct list_head committing; /* ctx committing list */ > - wait_queue_head_t push_wait; /* background push throttle */ > struct work_struct discard_endio_work; > }; > > @@ -274,6 +273,7 @@ struct xfs_cil { > wait_queue_head_t xc_commit_wait; > xfs_lsn_t xc_current_sequence; > struct work_struct xc_push_work; > + wait_queue_head_t xc_push_wait; /* background push throttle */ > } ____cacheline_aligned_in_smp; > > /* >
On 2020/6/11 10:45, Dave Chinner wrote: > > From: Dave Chinner <dchinner@redhat.com> > > xlog_wait() on the CIL context can reference a freed context if the > waiter doesn't get scheduled before the CIL context is freed. This > can happen when a task is on the hard throttle and the CIL push > aborts due to a shutdown. This was detected by generic/019: > > thread 1 thread 2 > > __xfs_trans_commit > xfs_log_commit_cil > <CIL size over hard throttle limit> > xlog_wait > schedule > xlog_cil_push_work > wake_up_all > <shutdown aborts commit> > xlog_cil_committed > kmem_free > > remove_wait_queue > spin_lock_irqsave --> UAF > > Fix it by moving the wait queue to the CIL rather than keeping it in > in the CIL context that gets freed on push completion. Because the > wait queue is now independent of the CIL context and we might have > multiple contexts in flight at once, only wake the waiters on the > push throttle when the context we are pushing is over the hard > throttle size threshold. Hi, Dave, How do you think about the following fix: 1. use autoremove_wake_func(), and remove remove_wait_queue() to avoid UAF. 2. add finish_wait(). @@ -576,12 +576,13 @@ xlog_wait( __releases(lock) { DECLARE_WAITQUEUE(wait, current); + wait.func = autoremove_wake_function; add_wait_queue_exclusive(wq, &wait); __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock(lock); schedule(); - remove_wait_queue(wq, &wait); + finish_wait(wq, &wait); } Best regards! Yu Kuai
On Tue, Jun 16, 2020 at 09:16:09AM +0800, yukuai (C) wrote: > On 2020/6/11 10:45, Dave Chinner wrote: > > > > From: Dave Chinner <dchinner@redhat.com> > > > > xlog_wait() on the CIL context can reference a freed context if the > > waiter doesn't get scheduled before the CIL context is freed. This > > can happen when a task is on the hard throttle and the CIL push > > aborts due to a shutdown. This was detected by generic/019: > > > > thread 1 thread 2 > > > > __xfs_trans_commit > > xfs_log_commit_cil > > <CIL size over hard throttle limit> > > xlog_wait > > schedule > > xlog_cil_push_work > > wake_up_all > > <shutdown aborts commit> > > xlog_cil_committed > > kmem_free > > > > remove_wait_queue > > spin_lock_irqsave --> UAF > > > > Fix it by moving the wait queue to the CIL rather than keeping it in > > in the CIL context that gets freed on push completion. Because the > > wait queue is now independent of the CIL context and we might have > > multiple contexts in flight at once, only wake the waiters on the > > push throttle when the context we are pushing is over the hard > > throttle size threshold. > > Hi, Dave, > > How do you think about the following fix: > > 1. use autoremove_wake_func(), and remove remove_wait_queue() to > avoid UAF. > 2. add finish_wait(). > > @@ -576,12 +576,13 @@ xlog_wait( > __releases(lock) > { > DECLARE_WAITQUEUE(wait, current); > + wait.func = autoremove_wake_function; > > add_wait_queue_exclusive(wq, &wait); > __set_current_state(TASK_UNINTERRUPTIBLE); > spin_unlock(lock); > schedule(); > - remove_wait_queue(wq, &wait); > + finish_wait(wq, &wait); > } Yes, that would address this specific symptom of the problem, but it doesn't fix the problem root cause: that the wq can be freed while this function sleeps. IMO, this sort of change leaves a trap for future modifications - all the code calling xlog_wait() assumes the embedded wq the task is sleeping on still exists after waiting so we really should be fixing the problem the incorrect existence guarantee in the CIL code that you tripped over. Cheers, Dave.
On Thu, Jun 11, 2020 at 12:45:03PM +1000, Dave Chinner wrote: > > From: Dave Chinner <dchinner@redhat.com> > > xlog_wait() on the CIL context can reference a freed context if the > waiter doesn't get scheduled before the CIL context is freed. This > can happen when a task is on the hard throttle and the CIL push > aborts due to a shutdown. This was detected by generic/019: > > thread 1 thread 2 > > __xfs_trans_commit > xfs_log_commit_cil > <CIL size over hard throttle limit> > xlog_wait > schedule > xlog_cil_push_work > wake_up_all > <shutdown aborts commit> > xlog_cil_committed > kmem_free > > remove_wait_queue > spin_lock_irqsave --> UAF > > Fix it by moving the wait queue to the CIL rather than keeping it in > in the CIL context that gets freed on push completion. Because the > wait queue is now independent of the CIL context and we might have > multiple contexts in flight at once, only wake the waiters on the > push throttle when the context we are pushing is over the hard > throttle size threshold. > > Fixes: 0e7ab7efe7745 ("xfs: Throttle commits on delayed background CIL push") > Reported-by: Yu Kuai <yukuai3@huawei.com> > Signed-off-by: Dave Chinner <dchinner@redhat.com> Looks good: Reviewed-by: Christoph Hellwig <hch@lst.de>
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index b43f0e8f43f2e..9ed90368ab311 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -671,7 +671,8 @@ xlog_cil_push_work( /* * Wake up any background push waiters now this context is being pushed. */ - wake_up_all(&ctx->push_wait); + if (ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) + wake_up_all(&cil->xc_push_wait); /* * Check if we've anything to push. If there is nothing, then we don't @@ -743,13 +744,12 @@ xlog_cil_push_work( /* * initialise the new context and attach it to the CIL. Then attach - * the current context to the CIL committing lsit so it can be found + * the current context to the CIL committing list so it can be found * during log forces to extract the commit lsn of the sequence that * needs to be forced. */ INIT_LIST_HEAD(&new_ctx->committing); INIT_LIST_HEAD(&new_ctx->busy_extents); - init_waitqueue_head(&new_ctx->push_wait); new_ctx->sequence = ctx->sequence + 1; new_ctx->cil = cil; cil->xc_ctx = new_ctx; @@ -937,7 +937,7 @@ xlog_cil_push_background( if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) { trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket); ASSERT(cil->xc_ctx->space_used < log->l_logsize); - xlog_wait(&cil->xc_ctx->push_wait, &cil->xc_push_lock); + xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock); return; } @@ -1216,12 +1216,12 @@ xlog_cil_init( INIT_LIST_HEAD(&cil->xc_committing); spin_lock_init(&cil->xc_cil_lock); spin_lock_init(&cil->xc_push_lock); + init_waitqueue_head(&cil->xc_push_wait); init_rwsem(&cil->xc_ctx_lock); init_waitqueue_head(&cil->xc_commit_wait); INIT_LIST_HEAD(&ctx->committing); INIT_LIST_HEAD(&ctx->busy_extents); - init_waitqueue_head(&ctx->push_wait); ctx->sequence = 1; ctx->cil = cil; cil->xc_ctx = ctx; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index ec22c7a3867f1..75a62870b63af 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -240,7 +240,6 @@ struct xfs_cil_ctx { struct xfs_log_vec *lv_chain; /* logvecs being pushed */ struct list_head iclog_entry; struct list_head committing; /* ctx committing list */ - wait_queue_head_t push_wait; /* background push throttle */ struct work_struct discard_endio_work; }; @@ -274,6 +273,7 @@ struct xfs_cil { wait_queue_head_t xc_commit_wait; xfs_lsn_t xc_current_sequence; struct work_struct xc_push_work; + wait_queue_head_t xc_push_wait; /* background push throttle */ } ____cacheline_aligned_in_smp; /*