@@ -884,20 +884,39 @@ static int __noflush_suspending(struct mapped_device *md)
return test_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
}
-static void dm_handle_requeue(struct dm_io *io)
+/* Return true if the original bio is requeued */
+static bool dm_handle_requeue(struct dm_io *io)
{
- if (io->status == BLK_STS_DM_REQUEUE) {
- struct bio *bio = io->split_bio ? io->split_bio : io->orig_bio;
- struct mapped_device *md = io->md;
+ struct bio *bio = io->split_bio ? io->split_bio : io->orig_bio;
+ bool need_requeue = (io->status == BLK_STS_DM_REQUEUE);
+ bool handle_eagain = (io->status == BLK_STS_AGAIN) &&
+ (bio->bi_opf & REQ_POLLED);
+ struct mapped_device *md = io->md;
+ bool requeued = false;
+
+ if (need_requeue || handle_eagain) {
unsigned long flags;
+
+ if (bio->bi_opf & REQ_POLLED) {
+ /*
+ * Upper layer won't help us poll split bio
+ * (io->orig_bio may only reflect a subset of the
+ * pre-split original) so clear REQ_POLLED in case
+ * of requeue.
+ */
+ bio_clear_polled(bio);
+ }
+
/*
* Target requested pushing back the I/O.
*/
spin_lock_irqsave(&md->deferred_lock, flags);
- if (__noflush_suspending(md) &&
- !WARN_ON_ONCE(dm_is_zone_write(md, bio))) {
+ if ((__noflush_suspending(md) &&
+ !WARN_ON_ONCE(dm_is_zone_write(md, bio))) ||
+ handle_eagain) {
/* NOTE early return due to BLK_STS_DM_REQUEUE below */
bio_list_add_head(&md->deferred, bio);
+ requeued = true;
} else {
/*
* noflush suspend was interrupted or this is
@@ -907,6 +926,10 @@ static void dm_handle_requeue(struct dm_io *io)
}
spin_unlock_irqrestore(&md->deferred_lock, flags);
}
+
+ if (requeued)
+ queue_work(md->wq, &md->work);
+ return requeued;
}
static void dm_io_complete(struct dm_io *io)
@@ -914,8 +937,9 @@ static void dm_io_complete(struct dm_io *io)
struct bio *bio = io->split_bio ? io->split_bio : io->orig_bio;
struct mapped_device *md = io->md;
blk_status_t io_error;
+ bool requeued;
- dm_handle_requeue(io);
+ requeued = dm_handle_requeue(io);
io_error = io->status;
if (dm_io_flagged(io, DM_IO_ACCOUNTED))
@@ -936,23 +960,9 @@ static void dm_io_complete(struct dm_io *io)
if (unlikely(wq_has_sleeper(&md->wait)))
wake_up(&md->wait);
- if (io_error == BLK_STS_DM_REQUEUE || io_error == BLK_STS_AGAIN) {
- if (bio->bi_opf & REQ_POLLED) {
- /*
- * Upper layer won't help us poll split bio (io->orig_bio
- * may only reflect a subset of the pre-split original)
- * so clear REQ_POLLED in case of requeue.
- */
- bio_clear_polled(bio);
- if (io_error == BLK_STS_AGAIN) {
- /* io_uring doesn't handle BLK_STS_AGAIN (yet) */
- queue_io(md, bio);
- return;
- }
- }
- if (io_error == BLK_STS_DM_REQUEUE)
- return;
- }
+ /* We have requeued, so return now */
+ if (requeued)
+ return;
if (bio_is_flush_with_data(bio)) {
/*
In case that BLK_STS_DM_REQUEUE is returned or BLK_STS_AGAIN is returned for POLLED io, we requeue the original bio into deferred list and request md->wq to re-submit it to block layer. Improve the handling in the following way: 1) unify handling for BLK_STS_DM_REQUEUE and BLK_STS_AGAIN, and clear REQ_POLLED for BLK_STS_DM_REQUEUE too, for the sake of simplicity, given BLK_STS_DM_REQUEUE is very unusual 2) queue md->wq explicitly in __dm_io_complete(), so requeue handling becomes more robust Signed-off-by: Ming Lei <ming.lei@redhat.com> --- drivers/md/dm.c | 58 +++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 24 deletions(-)