diff mbox series

[12/16] block: add single bio async direct IO helper

Message ID c8d2d919894fd0112f21723a9cb50b6c7cbd9613.1634676157.git.asml.silence@gmail.com (mailing list archive)
State New, archived
Headers show
Series block optimisation round | expand

Commit Message

Pavel Begunkov Oct. 19, 2021, 9:24 p.m. UTC
As with __blkdev_direct_IO_simple(), we can implement direct IO more
efficiently if there is only one bio. Add __blkdev_direct_IO_async() and
blkdev_bio_end_io_async(). This patch brings me from 4.45-4.5 MIOPS with
nullblk to 4.7+.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
 block/fops.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 87 insertions(+), 3 deletions(-)

Comments

Christoph Hellwig Oct. 20, 2021, 6:36 a.m. UTC | #1
On Tue, Oct 19, 2021 at 10:24:21PM +0100, Pavel Begunkov wrote:
> +	bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
> +	dio = container_of(bio, struct blkdev_dio, bio);
> +	__bio_set_dev(bio, bdev);
> +	bio->bi_iter.bi_sector = pos >> 9;

SECTOR_SHIFT.

> +	bio->bi_write_hint = iocb->ki_hint;
> +	bio->bi_end_io = blkdev_bio_end_io_async;
> +	bio->bi_ioprio = iocb->ki_ioprio;
> +	dio->flags = 0;
> +	dio->iocb = iocb;
> +
> +	ret = bio_iov_iter_get_pages(bio, iter);
> +	if (unlikely(ret)) {
> +		bio->bi_status = BLK_STS_IOERR;
> +		bio_endio(bio);
> +		return BLK_STS_IOERR;

This function does not return a blk_status_t, so this is wrong (and
sparse should have complained).  I also don't think the error path
here should go hrough the bio for error handling but just do a put and
return the error.

> +	if (iov_iter_rw(iter) == READ) {
> +		bio->bi_opf = REQ_OP_READ;
> +		if (iter_is_iovec(iter)) {
> +			dio->flags |= DIO_SHOULD_DIRTY;
> +			bio_set_pages_dirty(bio);
> +		}
> +	} else {
> +		bio->bi_opf = dio_bio_write_op(iocb);
> +		task_io_account_write(bio->bi_iter.bi_size);
> +	}
> +
> +	if (iocb->ki_flags & IOCB_NOWAIT)
> +		bio->bi_opf |= REQ_NOWAIT;

This code is entirely duplicated, pleae move it into an (inline) helper.

> +	/*
> +	 * Don't plug for HIPRI/polled IO, as those should go straight
> +	 * to issue
> +	 */

This comment seems misplaced as the function does not use plugging at
all.
Pavel Begunkov Oct. 20, 2021, 12:35 p.m. UTC | #2
On 10/20/21 07:36, Christoph Hellwig wrote:
> On Tue, Oct 19, 2021 at 10:24:21PM +0100, Pavel Begunkov wrote:
>> +	bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
>> +	dio = container_of(bio, struct blkdev_dio, bio);
>> +	__bio_set_dev(bio, bdev);
>> +	bio->bi_iter.bi_sector = pos >> 9;
> 
> SECTOR_SHIFT.
> 
>> +	bio->bi_write_hint = iocb->ki_hint;
>> +	bio->bi_end_io = blkdev_bio_end_io_async;
>> +	bio->bi_ioprio = iocb->ki_ioprio;
>> +	dio->flags = 0;
>> +	dio->iocb = iocb;
>> +
>> +	ret = bio_iov_iter_get_pages(bio, iter);
>> +	if (unlikely(ret)) {
>> +		bio->bi_status = BLK_STS_IOERR;
>> +		bio_endio(bio);
>> +		return BLK_STS_IOERR;
> 
> This function does not return a blk_status_t, so this is wrong (and
> sparse should have complained).  I also don't think the error path
> here should go hrough the bio for error handling but just do a put and
> return the error.

My bad, following __blkdev_direct_IO() it was intended to be
blk_status_to_errno(BLK_STS_IOERR), but just return is much
better.

> 
>> +	if (iov_iter_rw(iter) == READ) {
>> +		bio->bi_opf = REQ_OP_READ;
>> +		if (iter_is_iovec(iter)) {
>> +			dio->flags |= DIO_SHOULD_DIRTY;
>> +			bio_set_pages_dirty(bio);
>> +		}
>> +	} else {
>> +		bio->bi_opf = dio_bio_write_op(iocb);
>> +		task_io_account_write(bio->bi_iter.bi_size);
>> +	}
>> +
>> +	if (iocb->ki_flags & IOCB_NOWAIT)
>> +		bio->bi_opf |= REQ_NOWAIT;
> 
> This code is entirely duplicated, pleae move it into an (inline) helper.

I'll try it out, thanks

>> +	/*
>> +	 * Don't plug for HIPRI/polled IO, as those should go straight
>> +	 * to issue
>> +	 */
> 
> This comment seems misplaced as the function does not use plugging at
> all.

will kill it
diff mbox series

Patch

diff --git a/block/fops.c b/block/fops.c
index 7cf98db0595a..0f1332374756 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -305,6 +305,88 @@  static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 	return ret;
 }
 
+static void blkdev_bio_end_io_async(struct bio *bio)
+{
+	struct blkdev_dio *dio = container_of(bio, struct blkdev_dio, bio);
+	struct kiocb *iocb = dio->iocb;
+	ssize_t ret;
+
+	if (likely(!bio->bi_status)) {
+		ret = dio->size;
+		iocb->ki_pos += ret;
+	} else {
+		ret = blk_status_to_errno(bio->bi_status);
+	}
+
+	iocb->ki_complete(iocb, ret, 0);
+
+	if (dio->flags & DIO_SHOULD_DIRTY) {
+		bio_check_pages_dirty(bio);
+	} else {
+		bio_release_pages(bio, false);
+		bio_put(bio);
+	}
+}
+
+static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
+					struct iov_iter *iter,
+					unsigned int nr_pages)
+{
+	struct block_device *bdev = iocb->ki_filp->private_data;
+	struct blkdev_dio *dio;
+	struct bio *bio;
+	loff_t pos = iocb->ki_pos;
+	int ret = 0;
+
+	if ((pos | iov_iter_alignment(iter)) &
+	    (bdev_logical_block_size(bdev) - 1))
+		return -EINVAL;
+
+	bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
+	dio = container_of(bio, struct blkdev_dio, bio);
+	__bio_set_dev(bio, bdev);
+	bio->bi_iter.bi_sector = pos >> 9;
+	bio->bi_write_hint = iocb->ki_hint;
+	bio->bi_end_io = blkdev_bio_end_io_async;
+	bio->bi_ioprio = iocb->ki_ioprio;
+	dio->flags = 0;
+	dio->iocb = iocb;
+
+	ret = bio_iov_iter_get_pages(bio, iter);
+	if (unlikely(ret)) {
+		bio->bi_status = BLK_STS_IOERR;
+		bio_endio(bio);
+		return BLK_STS_IOERR;
+	}
+	dio->size = bio->bi_iter.bi_size;
+
+	if (iov_iter_rw(iter) == READ) {
+		bio->bi_opf = REQ_OP_READ;
+		if (iter_is_iovec(iter)) {
+			dio->flags |= DIO_SHOULD_DIRTY;
+			bio_set_pages_dirty(bio);
+		}
+	} else {
+		bio->bi_opf = dio_bio_write_op(iocb);
+		task_io_account_write(bio->bi_iter.bi_size);
+	}
+
+	if (iocb->ki_flags & IOCB_NOWAIT)
+		bio->bi_opf |= REQ_NOWAIT;
+	/*
+	 * Don't plug for HIPRI/polled IO, as those should go straight
+	 * to issue
+	 */
+	if (iocb->ki_flags & IOCB_HIPRI) {
+		bio_set_polled(bio, iocb);
+		submit_bio(bio);
+		WRITE_ONCE(iocb->private, bio);
+	} else {
+		submit_bio(bio);
+	}
+	return -EIOCBQUEUED;
+}
+
 static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 {
 	unsigned int nr_pages;
@@ -313,9 +395,11 @@  static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 		return 0;
 
 	nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
-	if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS)
-		return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
-
+	if (likely(nr_pages <= BIO_MAX_VECS)) {
+		if (is_sync_kiocb(iocb))
+			return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
+		return __blkdev_direct_IO_async(iocb, iter, nr_pages);
+	}
 	return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
 }