Message ID | 20200417150859.14734-2-bfoster@redhat.com (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | xfs: flush related error handling cleanups | expand |
On 4/17/20 8:08 AM, Brian Foster wrote: > Flush locked log items whose underlying buffers fail metadata > writeback are tagged with a special flag to indicate that the flush > lock is already held. This is currently implemented in the type > specific ->iop_push() callback, but the processing required for such > items is not type specific because we're only doing basic state > management on the underlying buffer. > > Factor the failed log item handling out of the inode and dquot > ->iop_push() callbacks and open code the buffer resubmit helper into > a single helper called from xfsaild_push_item(). This provides a > generic mechanism for handling failed metadata buffer writeback with > a bit less code. > > Signed-off-by: Brian Foster <bfoster@redhat.com> Ok, I traced it through, and I think the re-factor is equivalent Reviewed-by: Allison Collins <allison.henderson@oracle.com> > --- > fs/xfs/xfs_buf_item.c | 39 --------------------------------------- > fs/xfs/xfs_buf_item.h | 2 -- > fs/xfs/xfs_dquot_item.c | 15 --------------- > fs/xfs/xfs_inode_item.c | 15 --------------- > fs/xfs/xfs_trans_ail.c | 41 +++++++++++++++++++++++++++++++++++++++++ > 5 files changed, 41 insertions(+), 71 deletions(-) > > diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c > index 1545657c3ca0..8796adde2d12 100644 > --- a/fs/xfs/xfs_buf_item.c > +++ b/fs/xfs/xfs_buf_item.c > @@ -1248,42 +1248,3 @@ xfs_buf_iodone( > xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); > xfs_buf_item_free(BUF_ITEM(lip)); > } > - > -/* > - * Requeue a failed buffer for writeback. > - * > - * We clear the log item failed state here as well, but we have to be careful > - * about reference counts because the only active reference counts on the buffer > - * may be the failed log items. Hence if we clear the log item failed state > - * before queuing the buffer for IO we can release all active references to > - * the buffer and free it, leading to use after free problems in > - * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which > - * order we process them in - the buffer is locked, and we own the buffer list > - * so nothing on them is going to change while we are performing this action. > - * > - * Hence we can safely queue the buffer for IO before we clear the failed log > - * item state, therefore always having an active reference to the buffer and > - * avoiding the transient zero-reference state that leads to use-after-free. > - * > - * Return true if the buffer was added to the buffer list, false if it was > - * already on the buffer list. > - */ > -bool > -xfs_buf_resubmit_failed_buffers( > - struct xfs_buf *bp, > - struct list_head *buffer_list) > -{ > - struct xfs_log_item *lip; > - bool ret; > - > - ret = xfs_buf_delwri_queue(bp, buffer_list); > - > - /* > - * XFS_LI_FAILED set/clear is protected by ail_lock, caller of this > - * function already have it acquired > - */ > - list_for_each_entry(lip, &bp->b_li_list, li_bio_list) > - xfs_clear_li_failed(lip); > - > - return ret; > -} > diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h > index 30114b510332..c9c57e2da932 100644 > --- a/fs/xfs/xfs_buf_item.h > +++ b/fs/xfs/xfs_buf_item.h > @@ -59,8 +59,6 @@ void xfs_buf_attach_iodone(struct xfs_buf *, > struct xfs_log_item *); > void xfs_buf_iodone_callbacks(struct xfs_buf *); > void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); > -bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *, > - struct list_head *); > bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec); > > extern kmem_zone_t *xfs_buf_item_zone; > diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c > index baad1748d0d1..5a7808299a32 100644 > --- a/fs/xfs/xfs_dquot_item.c > +++ b/fs/xfs/xfs_dquot_item.c > @@ -145,21 +145,6 @@ xfs_qm_dquot_logitem_push( > if (atomic_read(&dqp->q_pincount) > 0) > return XFS_ITEM_PINNED; > > - /* > - * The buffer containing this item failed to be written back > - * previously. Resubmit the buffer for IO > - */ > - if (test_bit(XFS_LI_FAILED, &lip->li_flags)) { > - if (!xfs_buf_trylock(bp)) > - return XFS_ITEM_LOCKED; > - > - if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) > - rval = XFS_ITEM_FLUSHING; > - > - xfs_buf_unlock(bp); > - return rval; > - } > - > if (!xfs_dqlock_nowait(dqp)) > return XFS_ITEM_LOCKED; > > diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c > index f779cca2346f..1d4d256a2e96 100644 > --- a/fs/xfs/xfs_inode_item.c > +++ b/fs/xfs/xfs_inode_item.c > @@ -497,21 +497,6 @@ xfs_inode_item_push( > if (xfs_ipincount(ip) > 0) > return XFS_ITEM_PINNED; > > - /* > - * The buffer containing this item failed to be written back > - * previously. Resubmit the buffer for IO. > - */ > - if (test_bit(XFS_LI_FAILED, &lip->li_flags)) { > - if (!xfs_buf_trylock(bp)) > - return XFS_ITEM_LOCKED; > - > - if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) > - rval = XFS_ITEM_FLUSHING; > - > - xfs_buf_unlock(bp); > - return rval; > - } > - > if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) > return XFS_ITEM_LOCKED; > > diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c > index 564253550b75..0c709651a2c6 100644 > --- a/fs/xfs/xfs_trans_ail.c > +++ b/fs/xfs/xfs_trans_ail.c > @@ -345,6 +345,45 @@ xfs_ail_delete( > xfs_trans_ail_cursor_clear(ailp, lip); > } > > +/* > + * Requeue a failed buffer for writeback. > + * > + * We clear the log item failed state here as well, but we have to be careful > + * about reference counts because the only active reference counts on the buffer > + * may be the failed log items. Hence if we clear the log item failed state > + * before queuing the buffer for IO we can release all active references to > + * the buffer and free it, leading to use after free problems in > + * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which > + * order we process them in - the buffer is locked, and we own the buffer list > + * so nothing on them is going to change while we are performing this action. > + * > + * Hence we can safely queue the buffer for IO before we clear the failed log > + * item state, therefore always having an active reference to the buffer and > + * avoiding the transient zero-reference state that leads to use-after-free. > + */ > +static inline int > +xfsaild_push_failed( > + struct xfs_log_item *lip, > + struct list_head *buffer_list) > +{ > + struct xfs_buf *bp = lip->li_buf; > + > + if (!xfs_buf_trylock(bp)) > + return XFS_ITEM_LOCKED; > + > + if (!xfs_buf_delwri_queue(bp, buffer_list)) { > + xfs_buf_unlock(bp); > + return XFS_ITEM_FLUSHING; > + } > + > + /* protected by ail_lock */ > + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) > + xfs_clear_li_failed(lip); > + > + xfs_buf_unlock(bp); > + return XFS_ITEM_SUCCESS; > +} > + > static inline uint > xfsaild_push_item( > struct xfs_ail *ailp, > @@ -365,6 +404,8 @@ xfsaild_push_item( > */ > if (!lip->li_ops->iop_push) > return XFS_ITEM_PINNED; > + if (test_bit(XFS_LI_FAILED, &lip->li_flags)) > + return xfsaild_push_failed(lip, &ailp->ail_buf_list); > return lip->li_ops->iop_push(lip, &ailp->ail_buf_list); > } > >
On Fri, Apr 17, 2020 at 11:08:48AM -0400, Brian Foster wrote: > Flush locked log items whose underlying buffers fail metadata > writeback are tagged with a special flag to indicate that the flush > lock is already held. This is currently implemented in the type > specific ->iop_push() callback, but the processing required for such > items is not type specific because we're only doing basic state > management on the underlying buffer. > > Factor the failed log item handling out of the inode and dquot > ->iop_push() callbacks and open code the buffer resubmit helper into > a single helper called from xfsaild_push_item(). This provides a > generic mechanism for handling failed metadata buffer writeback with > a bit less code. > > Signed-off-by: Brian Foster <bfoster@redhat.com> ..... > diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c > index 564253550b75..0c709651a2c6 100644 > --- a/fs/xfs/xfs_trans_ail.c > +++ b/fs/xfs/xfs_trans_ail.c > @@ -345,6 +345,45 @@ xfs_ail_delete( > xfs_trans_ail_cursor_clear(ailp, lip); > } > > +/* > + * Requeue a failed buffer for writeback. > + * > + * We clear the log item failed state here as well, but we have to be careful > + * about reference counts because the only active reference counts on the buffer > + * may be the failed log items. Hence if we clear the log item failed state > + * before queuing the buffer for IO we can release all active references to > + * the buffer and free it, leading to use after free problems in > + * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which > + * order we process them in - the buffer is locked, and we own the buffer list > + * so nothing on them is going to change while we are performing this action. > + * > + * Hence we can safely queue the buffer for IO before we clear the failed log > + * item state, therefore always having an active reference to the buffer and > + * avoiding the transient zero-reference state that leads to use-after-free. > + */ > +static inline int > +xfsaild_push_failed( Bad name IMO. Makes me think it's an action that is taken when an item push failed, not an action that resubmits a buffer that had an IO failure. i.e. "push" doesn't imply IO, and failure to push an item doesn't mean there was an IO error - it may be locked, already flushing, pinned, etc. > + struct xfs_log_item *lip, > + struct list_head *buffer_list) > +{ > + struct xfs_buf *bp = lip->li_buf; This also assumes that the log item has a buffer associated with it. So perhaps "xfsaild_resubmit_failed_buffer()" would be more approriate here. Other than that, the code is fine. Cheers, Dave.
On Mon, Apr 20, 2020 at 12:45:56PM +1000, Dave Chinner wrote: > On Fri, Apr 17, 2020 at 11:08:48AM -0400, Brian Foster wrote: > > Flush locked log items whose underlying buffers fail metadata > > writeback are tagged with a special flag to indicate that the flush > > lock is already held. This is currently implemented in the type > > specific ->iop_push() callback, but the processing required for such > > items is not type specific because we're only doing basic state > > management on the underlying buffer. > > > > Factor the failed log item handling out of the inode and dquot > > ->iop_push() callbacks and open code the buffer resubmit helper into > > a single helper called from xfsaild_push_item(). This provides a > > generic mechanism for handling failed metadata buffer writeback with > > a bit less code. > > > > Signed-off-by: Brian Foster <bfoster@redhat.com> > ..... > > diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c > > index 564253550b75..0c709651a2c6 100644 > > --- a/fs/xfs/xfs_trans_ail.c > > +++ b/fs/xfs/xfs_trans_ail.c > > @@ -345,6 +345,45 @@ xfs_ail_delete( > > xfs_trans_ail_cursor_clear(ailp, lip); > > } > > > > +/* > > + * Requeue a failed buffer for writeback. > > + * > > + * We clear the log item failed state here as well, but we have to be careful > > + * about reference counts because the only active reference counts on the buffer > > + * may be the failed log items. Hence if we clear the log item failed state > > + * before queuing the buffer for IO we can release all active references to > > + * the buffer and free it, leading to use after free problems in > > + * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which > > + * order we process them in - the buffer is locked, and we own the buffer list > > + * so nothing on them is going to change while we are performing this action. > > + * > > + * Hence we can safely queue the buffer for IO before we clear the failed log > > + * item state, therefore always having an active reference to the buffer and > > + * avoiding the transient zero-reference state that leads to use-after-free. > > + */ > > +static inline int > > +xfsaild_push_failed( > > Bad name IMO. Makes me think it's an action that is taken when an > item push failed, not an action that resubmits a buffer that had an > IO failure. > > i.e. "push" doesn't imply IO, and failure to push an item doesn't > mean there was an IO error - it may be locked, already flushing, > pinned, etc. > Yeah.. > > + struct xfs_log_item *lip, > > + struct list_head *buffer_list) > > +{ > > + struct xfs_buf *bp = lip->li_buf; > > This also assumes that the log item has a buffer associated with it. > So perhaps "xfsaild_resubmit_failed_buffer()" would be more > approriate here. > The buffer pointer is an implementation detail of failed items. It would be nice to find a way to avoid that. How about xfsaild_resubmit_item() to be consistent with the parent function (xfsaild_push_item())? Brian > Other than that, the code is fine. > > Cheers, > > Dave. > -- > Dave Chinner > david@fromorbit.com >
On Mon, Apr 20, 2020 at 09:58:25AM -0400, Brian Foster wrote: > On Mon, Apr 20, 2020 at 12:45:56PM +1000, Dave Chinner wrote: > > On Fri, Apr 17, 2020 at 11:08:48AM -0400, Brian Foster wrote: > > > Flush locked log items whose underlying buffers fail metadata > > > writeback are tagged with a special flag to indicate that the flush > > > lock is already held. This is currently implemented in the type > > > specific ->iop_push() callback, but the processing required for such > > > items is not type specific because we're only doing basic state > > > management on the underlying buffer. > > > > > > Factor the failed log item handling out of the inode and dquot > > > ->iop_push() callbacks and open code the buffer resubmit helper into > > > a single helper called from xfsaild_push_item(). This provides a > > > generic mechanism for handling failed metadata buffer writeback with > > > a bit less code. > > > > > > Signed-off-by: Brian Foster <bfoster@redhat.com> > > ..... > > > diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c > > > index 564253550b75..0c709651a2c6 100644 > > > --- a/fs/xfs/xfs_trans_ail.c > > > +++ b/fs/xfs/xfs_trans_ail.c > > > @@ -345,6 +345,45 @@ xfs_ail_delete( > > > xfs_trans_ail_cursor_clear(ailp, lip); > > > } > > > > > > +/* > > > + * Requeue a failed buffer for writeback. > > > + * > > > + * We clear the log item failed state here as well, but we have to be careful > > > + * about reference counts because the only active reference counts on the buffer > > > + * may be the failed log items. Hence if we clear the log item failed state > > > + * before queuing the buffer for IO we can release all active references to > > > + * the buffer and free it, leading to use after free problems in > > > + * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which > > > + * order we process them in - the buffer is locked, and we own the buffer list > > > + * so nothing on them is going to change while we are performing this action. > > > + * > > > + * Hence we can safely queue the buffer for IO before we clear the failed log > > > + * item state, therefore always having an active reference to the buffer and > > > + * avoiding the transient zero-reference state that leads to use-after-free. > > > + */ > > > +static inline int > > > +xfsaild_push_failed( > > > > Bad name IMO. Makes me think it's an action that is taken when an > > item push failed, not an action that resubmits a buffer that had an > > IO failure. > > > > i.e. "push" doesn't imply IO, and failure to push an item doesn't > > mean there was an IO error - it may be locked, already flushing, > > pinned, etc. > > > > Yeah.. > > > > + struct xfs_log_item *lip, > > > + struct list_head *buffer_list) > > > +{ > > > + struct xfs_buf *bp = lip->li_buf; > > > > This also assumes that the log item has a buffer associated with it. > > So perhaps "xfsaild_resubmit_failed_buffer()" would be more > > approriate here. > > > > The buffer pointer is an implementation detail of failed items. It would > be nice to find a way to avoid that. How about xfsaild_resubmit_item() > to be consistent with the parent function (xfsaild_push_item())? That works, too. :) Cheers, Dave.
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 1545657c3ca0..8796adde2d12 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -1248,42 +1248,3 @@ xfs_buf_iodone( xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); xfs_buf_item_free(BUF_ITEM(lip)); } - -/* - * Requeue a failed buffer for writeback. - * - * We clear the log item failed state here as well, but we have to be careful - * about reference counts because the only active reference counts on the buffer - * may be the failed log items. Hence if we clear the log item failed state - * before queuing the buffer for IO we can release all active references to - * the buffer and free it, leading to use after free problems in - * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which - * order we process them in - the buffer is locked, and we own the buffer list - * so nothing on them is going to change while we are performing this action. - * - * Hence we can safely queue the buffer for IO before we clear the failed log - * item state, therefore always having an active reference to the buffer and - * avoiding the transient zero-reference state that leads to use-after-free. - * - * Return true if the buffer was added to the buffer list, false if it was - * already on the buffer list. - */ -bool -xfs_buf_resubmit_failed_buffers( - struct xfs_buf *bp, - struct list_head *buffer_list) -{ - struct xfs_log_item *lip; - bool ret; - - ret = xfs_buf_delwri_queue(bp, buffer_list); - - /* - * XFS_LI_FAILED set/clear is protected by ail_lock, caller of this - * function already have it acquired - */ - list_for_each_entry(lip, &bp->b_li_list, li_bio_list) - xfs_clear_li_failed(lip); - - return ret; -} diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index 30114b510332..c9c57e2da932 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -59,8 +59,6 @@ void xfs_buf_attach_iodone(struct xfs_buf *, struct xfs_log_item *); void xfs_buf_iodone_callbacks(struct xfs_buf *); void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); -bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *, - struct list_head *); bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec); extern kmem_zone_t *xfs_buf_item_zone; diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index baad1748d0d1..5a7808299a32 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -145,21 +145,6 @@ xfs_qm_dquot_logitem_push( if (atomic_read(&dqp->q_pincount) > 0) return XFS_ITEM_PINNED; - /* - * The buffer containing this item failed to be written back - * previously. Resubmit the buffer for IO - */ - if (test_bit(XFS_LI_FAILED, &lip->li_flags)) { - if (!xfs_buf_trylock(bp)) - return XFS_ITEM_LOCKED; - - if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) - rval = XFS_ITEM_FLUSHING; - - xfs_buf_unlock(bp); - return rval; - } - if (!xfs_dqlock_nowait(dqp)) return XFS_ITEM_LOCKED; diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index f779cca2346f..1d4d256a2e96 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -497,21 +497,6 @@ xfs_inode_item_push( if (xfs_ipincount(ip) > 0) return XFS_ITEM_PINNED; - /* - * The buffer containing this item failed to be written back - * previously. Resubmit the buffer for IO. - */ - if (test_bit(XFS_LI_FAILED, &lip->li_flags)) { - if (!xfs_buf_trylock(bp)) - return XFS_ITEM_LOCKED; - - if (!xfs_buf_resubmit_failed_buffers(bp, buffer_list)) - rval = XFS_ITEM_FLUSHING; - - xfs_buf_unlock(bp); - return rval; - } - if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) return XFS_ITEM_LOCKED; diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 564253550b75..0c709651a2c6 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -345,6 +345,45 @@ xfs_ail_delete( xfs_trans_ail_cursor_clear(ailp, lip); } +/* + * Requeue a failed buffer for writeback. + * + * We clear the log item failed state here as well, but we have to be careful + * about reference counts because the only active reference counts on the buffer + * may be the failed log items. Hence if we clear the log item failed state + * before queuing the buffer for IO we can release all active references to + * the buffer and free it, leading to use after free problems in + * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which + * order we process them in - the buffer is locked, and we own the buffer list + * so nothing on them is going to change while we are performing this action. + * + * Hence we can safely queue the buffer for IO before we clear the failed log + * item state, therefore always having an active reference to the buffer and + * avoiding the transient zero-reference state that leads to use-after-free. + */ +static inline int +xfsaild_push_failed( + struct xfs_log_item *lip, + struct list_head *buffer_list) +{ + struct xfs_buf *bp = lip->li_buf; + + if (!xfs_buf_trylock(bp)) + return XFS_ITEM_LOCKED; + + if (!xfs_buf_delwri_queue(bp, buffer_list)) { + xfs_buf_unlock(bp); + return XFS_ITEM_FLUSHING; + } + + /* protected by ail_lock */ + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) + xfs_clear_li_failed(lip); + + xfs_buf_unlock(bp); + return XFS_ITEM_SUCCESS; +} + static inline uint xfsaild_push_item( struct xfs_ail *ailp, @@ -365,6 +404,8 @@ xfsaild_push_item( */ if (!lip->li_ops->iop_push) return XFS_ITEM_PINNED; + if (test_bit(XFS_LI_FAILED, &lip->li_flags)) + return xfsaild_push_failed(lip, &ailp->ail_buf_list); return lip->li_ops->iop_push(lip, &ailp->ail_buf_list); }
Flush locked log items whose underlying buffers fail metadata writeback are tagged with a special flag to indicate that the flush lock is already held. This is currently implemented in the type specific ->iop_push() callback, but the processing required for such items is not type specific because we're only doing basic state management on the underlying buffer. Factor the failed log item handling out of the inode and dquot ->iop_push() callbacks and open code the buffer resubmit helper into a single helper called from xfsaild_push_item(). This provides a generic mechanism for handling failed metadata buffer writeback with a bit less code. Signed-off-by: Brian Foster <bfoster@redhat.com> --- fs/xfs/xfs_buf_item.c | 39 --------------------------------------- fs/xfs/xfs_buf_item.h | 2 -- fs/xfs/xfs_dquot_item.c | 15 --------------- fs/xfs/xfs_inode_item.c | 15 --------------- fs/xfs/xfs_trans_ail.c | 41 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 41 insertions(+), 71 deletions(-)