@@ -587,7 +587,9 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
io = container_of(tio, struct dm_io, tio);
io->magic = DM_IO_MAGIC;
io->status = 0;
- atomic_set(&io->io_count, 1);
+
+ /* one is for submission, the other is for completion */
+ atomic_set(&io->io_count, 2);
this_cpu_inc(*md->pending_io);
io->orig_bio = bio;
io->md = md;
@@ -937,11 +939,6 @@ static inline bool dm_tio_is_normal(struct dm_target_io *tio)
!dm_tio_flagged(tio, DM_TIO_IS_DUPLICATE_BIO));
}
-static void dm_io_inc_pending(struct dm_io *io)
-{
- atomic_inc(&io->io_count);
-}
-
/*
* Decrements the number of outstanding ios that a bio has been
* cloned into, completing the original io if necc.
@@ -1276,7 +1273,6 @@ static void __map_bio(struct bio *clone)
/*
* Map the clone.
*/
- dm_io_inc_pending(io);
tio->old_sector = clone->bi_iter.bi_sector;
if (unlikely(swap_bios_limit(ti, clone))) {
@@ -1358,11 +1354,12 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
}
}
-static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
+static int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
unsigned num_bios, unsigned *len)
{
struct bio_list blist = BIO_EMPTY_LIST;
struct bio *clone;
+ int ret = 0;
switch (num_bios) {
case 0:
@@ -1371,15 +1368,19 @@ static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
clone = alloc_tio(ci, ti, 0, len, GFP_NOIO);
dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO);
__map_bio(clone);
+ ret = 1;
break;
default:
alloc_multiple_bios(&blist, ci, ti, num_bios, len);
while ((clone = bio_list_pop(&blist))) {
dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO);
__map_bio(clone);
+ ret += 1;
}
break;
}
+
+ return ret;
}
static void __send_empty_flush(struct clone_info *ci)
@@ -1399,8 +1400,19 @@ static void __send_empty_flush(struct clone_info *ci)
ci->bio = &flush_bio;
ci->sector_count = 0;
- while ((ti = dm_table_get_target(ci->map, target_nr++)))
- __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
+ while ((ti = dm_table_get_target(ci->map, target_nr++))) {
+ int bios;
+
+ atomic_add(ti->num_flush_bios, &ci->io->io_count);
+ bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL);
+ atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
+ }
+
+ /*
+ * alloc_io() takes one extra reference for submission, so the
+ * reference won't reach 0 after the following subtraction
+ */
+ atomic_sub(1, &ci->io->io_count);
bio_uninit(ci->bio);
}
@@ -1409,6 +1421,7 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target
unsigned num_bios)
{
unsigned len;
+ int bios;
len = min_t(sector_t, ci->sector_count,
max_io_len_target_boundary(ti, dm_target_offset(ti, ci->sector)));
@@ -1420,7 +1433,13 @@ static void __send_changing_extent_only(struct clone_info *ci, struct dm_target
ci->sector += len;
ci->sector_count -= len;
- __send_duplicate_bios(ci, ti, num_bios, &len);
+ atomic_add(num_bios, &ci->io->io_count);
+ bios = __send_duplicate_bios(ci, ti, num_bios, &len);
+ /*
+ * alloc_io() takes one extra reference for submission, so the
+ * reference won't reach 0 after the following subtraction
+ */
+ atomic_sub(num_bios - bios + 1, &ci->io->io_count);
}
static bool is_abnormal_io(struct bio *bio)
@@ -1603,9 +1622,15 @@ static void dm_split_and_process_bio(struct mapped_device *md,
* Add every dm_io instance into the hlist_head which is stored in
* bio->bi_private, so that dm_poll_bio can poll them all.
*/
- if (error || !ci.submit_as_polled)
+ if (error || !ci.submit_as_polled) {
+ /*
+ * In case of submission failure, the extra reference for
+ * submitting io isn't consumed yet
+ */
+ if (error)
+ atomic_dec(&ci.io->io_count);
dm_io_dec_pending(ci.io, errno_to_blk_status(error));
- else
+ } else
dm_queue_poll_io(bio, ci.io);
}
Currently target io's reference counter is grabbed before calling __map_bio(), this way isn't efficient since we can move this grabbing into alloc_io(). Meantime it becomes typical async io reference counter model: one is for submission side, the other is for completion side, and the io won't be completed until both two sides are done. Signed-off-by: Ming Lei <ming.lei@redhat.com> --- drivers/md/dm.c | 51 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 13 deletions(-)