diff mbox

[1/2] block: remove struct bio_batch

Message ID 1460832928-15514-2-git-send-email-hch@lst.de (mailing list archive)
State New, archived
Headers show

Commit Message

Christoph Hellwig April 16, 2016, 6:55 p.m. UTC
It can be replaced with a combination of bio_chain and submit_bio_wait.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lin <ming.l@ssi.samsung.com>
Signed-off-by: Sagi Grimberg <sagig@grimberg.me>
---
 block/blk-lib.c | 118 +++++++++++++-------------------------------------------
 1 file changed, 27 insertions(+), 91 deletions(-)

Comments

Ming Lei April 17, 2016, 2:25 p.m. UTC | #1
On Sun, Apr 17, 2016 at 2:55 AM, Christoph Hellwig <hch@lst.de> wrote:
> It can be replaced with a combination of bio_chain and submit_bio_wait.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> Signed-off-by: Ming Lin <ming.l@ssi.samsung.com>
> Signed-off-by: Sagi Grimberg <sagig@grimberg.me>
> ---
>  block/blk-lib.c | 118 +++++++++++++-------------------------------------------
>  1 file changed, 27 insertions(+), 91 deletions(-)
>
> diff --git a/block/blk-lib.c b/block/blk-lib.c
> index 9ebf653..700d248 100644
> --- a/block/blk-lib.c
> +++ b/block/blk-lib.c
> @@ -9,21 +9,17 @@
>
>  #include "blk.h"
>
> -struct bio_batch {
> -       atomic_t                done;
> -       int                     error;
> -       struct completion       *wait;
> -};
> -
> -static void bio_batch_end_io(struct bio *bio)
> +static struct bio *next_bio(struct bio *bio, int rw, unsigned int nr_pages,
> +               gfp_t gfp)
>  {
> -       struct bio_batch *bb = bio->bi_private;
> +       struct bio *new = bio_alloc(gfp, nr_pages);
> +
> +       if (bio) {
> +               bio_chain(bio, new);
> +               submit_bio(rw, bio);
> +       }
>
> -       if (bio->bi_error && bio->bi_error != -EOPNOTSUPP)
> -               bb->error = bio->bi_error;
> -       if (atomic_dec_and_test(&bb->done))
> -               complete(bb->wait);
> -       bio_put(bio);
> +       return new;
>  }
>
>  /**
> @@ -40,13 +36,11 @@ static void bio_batch_end_io(struct bio *bio)
>  int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
>                 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
>  {
> -       DECLARE_COMPLETION_ONSTACK(wait);
>         struct request_queue *q = bdev_get_queue(bdev);
>         int type = REQ_WRITE | REQ_DISCARD;
>         unsigned int granularity;
>         int alignment;
> -       struct bio_batch bb;
> -       struct bio *bio;
> +       struct bio *bio = NULL;
>         int ret = 0;
>         struct blk_plug plug;
>
> @@ -66,25 +60,15 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
>                 type |= REQ_SECURE;
>         }
>
> -       atomic_set(&bb.done, 1);
> -       bb.error = 0;
> -       bb.wait = &wait;
> -
>         blk_start_plug(&plug);
>         while (nr_sects) {
>                 unsigned int req_sects;
>                 sector_t end_sect, tmp;
>
> -               bio = bio_alloc(gfp_mask, 1);
> -               if (!bio) {
> -                       ret = -ENOMEM;
> -                       break;
> -               }
> -
>                 /* Make sure bi_size doesn't overflow */
>                 req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);
>
> -               /*
> +               /**
>                  * If splitting a request, and the next starting sector would be
>                  * misaligned, stop the discard at the previous aligned sector.
>                  */
> @@ -98,18 +82,14 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
>                         req_sects = end_sect - sector;
>                 }
>
> +               bio = next_bio(bio, type, 1, gfp_mask);
>                 bio->bi_iter.bi_sector = sector;
> -               bio->bi_end_io = bio_batch_end_io;
>                 bio->bi_bdev = bdev;
> -               bio->bi_private = &bb;
>
>                 bio->bi_iter.bi_size = req_sects << 9;
>                 nr_sects -= req_sects;
>                 sector = end_sect;
>
> -               atomic_inc(&bb.done);
> -               submit_bio(type, bio);
> -
>                 /*
>                  * We can loop for a long time in here, if someone does
>                  * full device discards (like mkfs). Be nice and allow
> @@ -118,15 +98,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
>                  */
>                 cond_resched();
>         }
> +       if (bio)
> +               ret = submit_bio_wait(type, bio);
>         blk_finish_plug(&plug);

The patch itself is correct, and the idea is good.

But it can be simpler or more readable by always chaining bios into
the same parent bio, which is submitted as the last one.

Thanks,

>
> -       /* Wait for bios in-flight */
> -       if (!atomic_dec_and_test(&bb.done))
> -               wait_for_completion_io(&wait);
> -
> -       if (bb.error)
> -               return bb.error;
> -       return ret;
> +       return ret != -EOPNOTSUPP ? ret : 0;
>  }
>  EXPORT_SYMBOL(blkdev_issue_discard);
>
> @@ -145,11 +121,9 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
>                             sector_t nr_sects, gfp_t gfp_mask,
>                             struct page *page)
>  {
> -       DECLARE_COMPLETION_ONSTACK(wait);
>         struct request_queue *q = bdev_get_queue(bdev);
>         unsigned int max_write_same_sectors;
> -       struct bio_batch bb;
> -       struct bio *bio;
> +       struct bio *bio = NULL;
>         int ret = 0;
>
>         if (!q)
> @@ -158,21 +132,10 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
>         /* Ensure that max_write_same_sectors doesn't overflow bi_size */
>         max_write_same_sectors = UINT_MAX >> 9;
>
> -       atomic_set(&bb.done, 1);
> -       bb.error = 0;
> -       bb.wait = &wait;
> -
>         while (nr_sects) {
> -               bio = bio_alloc(gfp_mask, 1);
> -               if (!bio) {
> -                       ret = -ENOMEM;
> -                       break;
> -               }
> -
> +               bio = next_bio(bio, REQ_WRITE | REQ_WRITE_SAME, 1, gfp_mask);
>                 bio->bi_iter.bi_sector = sector;
> -               bio->bi_end_io = bio_batch_end_io;
>                 bio->bi_bdev = bdev;
> -               bio->bi_private = &bb;
>                 bio->bi_vcnt = 1;
>                 bio->bi_io_vec->bv_page = page;
>                 bio->bi_io_vec->bv_offset = 0;
> @@ -186,18 +149,11 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
>                         bio->bi_iter.bi_size = nr_sects << 9;
>                         nr_sects = 0;
>                 }
> -
> -               atomic_inc(&bb.done);
> -               submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
>         }
>
> -       /* Wait for bios in-flight */
> -       if (!atomic_dec_and_test(&bb.done))
> -               wait_for_completion_io(&wait);
> -
> -       if (bb.error)
> -               return bb.error;
> -       return ret;
> +       if (bio)
> +               ret = submit_bio_wait(REQ_WRITE | REQ_WRITE_SAME, bio);
> +       return ret != -EOPNOTSUPP ? ret : 0;
>  }
>  EXPORT_SYMBOL(blkdev_issue_write_same);
>
> @@ -216,28 +172,15 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
>                                   sector_t nr_sects, gfp_t gfp_mask)
>  {
>         int ret;
> -       struct bio *bio;
> -       struct bio_batch bb;
> +       struct bio *bio = NULL;
>         unsigned int sz;
> -       DECLARE_COMPLETION_ONSTACK(wait);
>
> -       atomic_set(&bb.done, 1);
> -       bb.error = 0;
> -       bb.wait = &wait;
> -
> -       ret = 0;
>         while (nr_sects != 0) {
> -               bio = bio_alloc(gfp_mask,
> -                               min(nr_sects, (sector_t)BIO_MAX_PAGES));
> -               if (!bio) {
> -                       ret = -ENOMEM;
> -                       break;
> -               }
> -
> +               bio = next_bio(bio, WRITE,
> +                               min(nr_sects, (sector_t)BIO_MAX_PAGES),
> +                               gfp_mask);
>                 bio->bi_iter.bi_sector = sector;
>                 bio->bi_bdev   = bdev;
> -               bio->bi_end_io = bio_batch_end_io;
> -               bio->bi_private = &bb;
>
>                 while (nr_sects != 0) {
>                         sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
> @@ -247,18 +190,11 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
>                         if (ret < (sz << 9))
>                                 break;
>                 }
> -               ret = 0;
> -               atomic_inc(&bb.done);
> -               submit_bio(WRITE, bio);
>         }
>
> -       /* Wait for bios in-flight */
> -       if (!atomic_dec_and_test(&bb.done))
> -               wait_for_completion_io(&wait);
> -
> -       if (bb.error)
> -               return bb.error;
> -       return ret;
> +       if (bio)
> +               return submit_bio_wait(WRITE, bio);
> +       return 0;
>  }
>
>  /**
> --
> 2.1.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-block" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig April 17, 2016, 6:07 p.m. UTC | #2
On Sun, Apr 17, 2016 at 10:25:09PM +0800, Ming Lei wrote:
> The patch itself is correct, and the idea is good.
> 
> But it can be simpler or more readable by always chaining bios into
> the same parent bio, which is submitted as the last one.

Which means we now messed up our I/O order to not be sequential
for no good reason.
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ming Lei April 17, 2016, 9:11 p.m. UTC | #3
On Mon, Apr 18, 2016 at 2:07 AM, Christoph Hellwig <hch@lst.de> wrote:
> On Sun, Apr 17, 2016 at 10:25:09PM +0800, Ming Lei wrote:
>> The patch itself is correct, and the idea is good.
>>
>> But it can be simpler or more readable by always chaining bios into
>> the same parent bio, which is submitted as the last one.
>
> Which means we now messed up our I/O order to not be sequential
> for no good reason.

From I/O order view, I don't see any difference among the two approachs,
and all are like the following, and both are similar with the way in
linus tree too.

   - submit(sector: s[0], size:sz[0])
   - submit(sector: s[1], size:sz[1])
   ...
   - submit(sector:s[n-1], size:sz[n-1])
   - submit_wait(sector:s[n], size:sz[n])

And there is always the euquation: s[i+1] = s[i] + sz[i] for i = 0,
..., n - 1, and
s[n] + sz[n] = sector + nr_sects.


Thanks,
Ming Lei
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig April 17, 2016, 9:29 p.m. UTC | #4
I'm confused on what you're proposing. Can you just show what you mean with
an incremental patch?
--
To unsubscribe from this list: send the line "unsubscribe linux-block" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Ming Lei April 18, 2016, 2:26 a.m. UTC | #5
On Mon, Apr 18, 2016 at 5:29 AM, Christoph Hellwig <hch@lst.de> wrote:
> I'm confused on what you're proposing. Can you just show what you mean with
> an incremental patch?

I mean all async bios can chain to one same parent, but looks that
implementation
can't be so clean with this one, so I am fine with it now:

        Reviewed-by: Ming Lei <tom.leiming@gmail.com>
diff mbox

Patch

diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9ebf653..700d248 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -9,21 +9,17 @@ 
 
 #include "blk.h"
 
-struct bio_batch {
-	atomic_t		done;
-	int			error;
-	struct completion	*wait;
-};
-
-static void bio_batch_end_io(struct bio *bio)
+static struct bio *next_bio(struct bio *bio, int rw, unsigned int nr_pages,
+		gfp_t gfp)
 {
-	struct bio_batch *bb = bio->bi_private;
+	struct bio *new = bio_alloc(gfp, nr_pages);
+
+	if (bio) {
+		bio_chain(bio, new);
+		submit_bio(rw, bio);
+	}
 
-	if (bio->bi_error && bio->bi_error != -EOPNOTSUPP)
-		bb->error = bio->bi_error;
-	if (atomic_dec_and_test(&bb->done))
-		complete(bb->wait);
-	bio_put(bio);
+	return new;
 }
 
 /**
@@ -40,13 +36,11 @@  static void bio_batch_end_io(struct bio *bio)
 int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
 {
-	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q = bdev_get_queue(bdev);
 	int type = REQ_WRITE | REQ_DISCARD;
 	unsigned int granularity;
 	int alignment;
-	struct bio_batch bb;
-	struct bio *bio;
+	struct bio *bio = NULL;
 	int ret = 0;
 	struct blk_plug plug;
 
@@ -66,25 +60,15 @@  int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		type |= REQ_SECURE;
 	}
 
-	atomic_set(&bb.done, 1);
-	bb.error = 0;
-	bb.wait = &wait;
-
 	blk_start_plug(&plug);
 	while (nr_sects) {
 		unsigned int req_sects;
 		sector_t end_sect, tmp;
 
-		bio = bio_alloc(gfp_mask, 1);
-		if (!bio) {
-			ret = -ENOMEM;
-			break;
-		}
-
 		/* Make sure bi_size doesn't overflow */
 		req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9);
 
-		/*
+		/**
 		 * If splitting a request, and the next starting sector would be
 		 * misaligned, stop the discard at the previous aligned sector.
 		 */
@@ -98,18 +82,14 @@  int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 			req_sects = end_sect - sector;
 		}
 
+		bio = next_bio(bio, type, 1, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
-		bio->bi_end_io = bio_batch_end_io;
 		bio->bi_bdev = bdev;
-		bio->bi_private = &bb;
 
 		bio->bi_iter.bi_size = req_sects << 9;
 		nr_sects -= req_sects;
 		sector = end_sect;
 
-		atomic_inc(&bb.done);
-		submit_bio(type, bio);
-
 		/*
 		 * We can loop for a long time in here, if someone does
 		 * full device discards (like mkfs). Be nice and allow
@@ -118,15 +98,11 @@  int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		 */
 		cond_resched();
 	}
+	if (bio)
+		ret = submit_bio_wait(type, bio);
 	blk_finish_plug(&plug);
 
-	/* Wait for bios in-flight */
-	if (!atomic_dec_and_test(&bb.done))
-		wait_for_completion_io(&wait);
-
-	if (bb.error)
-		return bb.error;
-	return ret;
+	return ret != -EOPNOTSUPP ? ret : 0;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
 
@@ -145,11 +121,9 @@  int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 			    sector_t nr_sects, gfp_t gfp_mask,
 			    struct page *page)
 {
-	DECLARE_COMPLETION_ONSTACK(wait);
 	struct request_queue *q = bdev_get_queue(bdev);
 	unsigned int max_write_same_sectors;
-	struct bio_batch bb;
-	struct bio *bio;
+	struct bio *bio = NULL;
 	int ret = 0;
 
 	if (!q)
@@ -158,21 +132,10 @@  int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 	/* Ensure that max_write_same_sectors doesn't overflow bi_size */
 	max_write_same_sectors = UINT_MAX >> 9;
 
-	atomic_set(&bb.done, 1);
-	bb.error = 0;
-	bb.wait = &wait;
-
 	while (nr_sects) {
-		bio = bio_alloc(gfp_mask, 1);
-		if (!bio) {
-			ret = -ENOMEM;
-			break;
-		}
-
+		bio = next_bio(bio, REQ_WRITE | REQ_WRITE_SAME, 1, gfp_mask);
 		bio->bi_iter.bi_sector = sector;
-		bio->bi_end_io = bio_batch_end_io;
 		bio->bi_bdev = bdev;
-		bio->bi_private = &bb;
 		bio->bi_vcnt = 1;
 		bio->bi_io_vec->bv_page = page;
 		bio->bi_io_vec->bv_offset = 0;
@@ -186,18 +149,11 @@  int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
 			bio->bi_iter.bi_size = nr_sects << 9;
 			nr_sects = 0;
 		}
-
-		atomic_inc(&bb.done);
-		submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
 	}
 
-	/* Wait for bios in-flight */
-	if (!atomic_dec_and_test(&bb.done))
-		wait_for_completion_io(&wait);
-
-	if (bb.error)
-		return bb.error;
-	return ret;
+	if (bio)
+		ret = submit_bio_wait(REQ_WRITE | REQ_WRITE_SAME, bio);
+	return ret != -EOPNOTSUPP ? ret : 0;
 }
 EXPORT_SYMBOL(blkdev_issue_write_same);
 
@@ -216,28 +172,15 @@  static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 				  sector_t nr_sects, gfp_t gfp_mask)
 {
 	int ret;
-	struct bio *bio;
-	struct bio_batch bb;
+	struct bio *bio = NULL;
 	unsigned int sz;
-	DECLARE_COMPLETION_ONSTACK(wait);
 
-	atomic_set(&bb.done, 1);
-	bb.error = 0;
-	bb.wait = &wait;
-
-	ret = 0;
 	while (nr_sects != 0) {
-		bio = bio_alloc(gfp_mask,
-				min(nr_sects, (sector_t)BIO_MAX_PAGES));
-		if (!bio) {
-			ret = -ENOMEM;
-			break;
-		}
-
+		bio = next_bio(bio, WRITE,
+				min(nr_sects, (sector_t)BIO_MAX_PAGES),
+				gfp_mask);
 		bio->bi_iter.bi_sector = sector;
 		bio->bi_bdev   = bdev;
-		bio->bi_end_io = bio_batch_end_io;
-		bio->bi_private = &bb;
 
 		while (nr_sects != 0) {
 			sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
@@ -247,18 +190,11 @@  static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
 			if (ret < (sz << 9))
 				break;
 		}
-		ret = 0;
-		atomic_inc(&bb.done);
-		submit_bio(WRITE, bio);
 	}
 
-	/* Wait for bios in-flight */
-	if (!atomic_dec_and_test(&bb.done))
-		wait_for_completion_io(&wait);
-
-	if (bb.error)
-		return bb.error;
-	return ret;
+	if (bio)
+		return submit_bio_wait(WRITE, bio);
+	return 0;
 }
 
 /**