Message ID | c8d2d919894fd0112f21723a9cb50b6c7cbd9613.1634676157.git.asml.silence@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | block optimisation round | expand |
On Tue, Oct 19, 2021 at 10:24:21PM +0100, Pavel Begunkov wrote: > + bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); > + dio = container_of(bio, struct blkdev_dio, bio); > + __bio_set_dev(bio, bdev); > + bio->bi_iter.bi_sector = pos >> 9; SECTOR_SHIFT. > + bio->bi_write_hint = iocb->ki_hint; > + bio->bi_end_io = blkdev_bio_end_io_async; > + bio->bi_ioprio = iocb->ki_ioprio; > + dio->flags = 0; > + dio->iocb = iocb; > + > + ret = bio_iov_iter_get_pages(bio, iter); > + if (unlikely(ret)) { > + bio->bi_status = BLK_STS_IOERR; > + bio_endio(bio); > + return BLK_STS_IOERR; This function does not return a blk_status_t, so this is wrong (and sparse should have complained). I also don't think the error path here should go hrough the bio for error handling but just do a put and return the error. > + if (iov_iter_rw(iter) == READ) { > + bio->bi_opf = REQ_OP_READ; > + if (iter_is_iovec(iter)) { > + dio->flags |= DIO_SHOULD_DIRTY; > + bio_set_pages_dirty(bio); > + } > + } else { > + bio->bi_opf = dio_bio_write_op(iocb); > + task_io_account_write(bio->bi_iter.bi_size); > + } > + > + if (iocb->ki_flags & IOCB_NOWAIT) > + bio->bi_opf |= REQ_NOWAIT; This code is entirely duplicated, pleae move it into an (inline) helper. > + /* > + * Don't plug for HIPRI/polled IO, as those should go straight > + * to issue > + */ This comment seems misplaced as the function does not use plugging at all.
On 10/20/21 07:36, Christoph Hellwig wrote: > On Tue, Oct 19, 2021 at 10:24:21PM +0100, Pavel Begunkov wrote: >> + bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); >> + dio = container_of(bio, struct blkdev_dio, bio); >> + __bio_set_dev(bio, bdev); >> + bio->bi_iter.bi_sector = pos >> 9; > > SECTOR_SHIFT. > >> + bio->bi_write_hint = iocb->ki_hint; >> + bio->bi_end_io = blkdev_bio_end_io_async; >> + bio->bi_ioprio = iocb->ki_ioprio; >> + dio->flags = 0; >> + dio->iocb = iocb; >> + >> + ret = bio_iov_iter_get_pages(bio, iter); >> + if (unlikely(ret)) { >> + bio->bi_status = BLK_STS_IOERR; >> + bio_endio(bio); >> + return BLK_STS_IOERR; > > This function does not return a blk_status_t, so this is wrong (and > sparse should have complained). I also don't think the error path > here should go hrough the bio for error handling but just do a put and > return the error. My bad, following __blkdev_direct_IO() it was intended to be blk_status_to_errno(BLK_STS_IOERR), but just return is much better. > >> + if (iov_iter_rw(iter) == READ) { >> + bio->bi_opf = REQ_OP_READ; >> + if (iter_is_iovec(iter)) { >> + dio->flags |= DIO_SHOULD_DIRTY; >> + bio_set_pages_dirty(bio); >> + } >> + } else { >> + bio->bi_opf = dio_bio_write_op(iocb); >> + task_io_account_write(bio->bi_iter.bi_size); >> + } >> + >> + if (iocb->ki_flags & IOCB_NOWAIT) >> + bio->bi_opf |= REQ_NOWAIT; > > This code is entirely duplicated, pleae move it into an (inline) helper. I'll try it out, thanks >> + /* >> + * Don't plug for HIPRI/polled IO, as those should go straight >> + * to issue >> + */ > > This comment seems misplaced as the function does not use plugging at > all. will kill it
diff --git a/block/fops.c b/block/fops.c index 7cf98db0595a..0f1332374756 100644 --- a/block/fops.c +++ b/block/fops.c @@ -305,6 +305,88 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, return ret; } +static void blkdev_bio_end_io_async(struct bio *bio) +{ + struct blkdev_dio *dio = container_of(bio, struct blkdev_dio, bio); + struct kiocb *iocb = dio->iocb; + ssize_t ret; + + if (likely(!bio->bi_status)) { + ret = dio->size; + iocb->ki_pos += ret; + } else { + ret = blk_status_to_errno(bio->bi_status); + } + + iocb->ki_complete(iocb, ret, 0); + + if (dio->flags & DIO_SHOULD_DIRTY) { + bio_check_pages_dirty(bio); + } else { + bio_release_pages(bio, false); + bio_put(bio); + } +} + +static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, + struct iov_iter *iter, + unsigned int nr_pages) +{ + struct block_device *bdev = iocb->ki_filp->private_data; + struct blkdev_dio *dio; + struct bio *bio; + loff_t pos = iocb->ki_pos; + int ret = 0; + + if ((pos | iov_iter_alignment(iter)) & + (bdev_logical_block_size(bdev) - 1)) + return -EINVAL; + + bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); + dio = container_of(bio, struct blkdev_dio, bio); + __bio_set_dev(bio, bdev); + bio->bi_iter.bi_sector = pos >> 9; + bio->bi_write_hint = iocb->ki_hint; + bio->bi_end_io = blkdev_bio_end_io_async; + bio->bi_ioprio = iocb->ki_ioprio; + dio->flags = 0; + dio->iocb = iocb; + + ret = bio_iov_iter_get_pages(bio, iter); + if (unlikely(ret)) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + return BLK_STS_IOERR; + } + dio->size = bio->bi_iter.bi_size; + + if (iov_iter_rw(iter) == READ) { + bio->bi_opf = REQ_OP_READ; + if (iter_is_iovec(iter)) { + dio->flags |= DIO_SHOULD_DIRTY; + bio_set_pages_dirty(bio); + } + } else { + bio->bi_opf = dio_bio_write_op(iocb); + task_io_account_write(bio->bi_iter.bi_size); + } + + if (iocb->ki_flags & IOCB_NOWAIT) + bio->bi_opf |= REQ_NOWAIT; + /* + * Don't plug for HIPRI/polled IO, as those should go straight + * to issue + */ + if (iocb->ki_flags & IOCB_HIPRI) { + bio_set_polled(bio, iocb); + submit_bio(bio); + WRITE_ONCE(iocb->private, bio); + } else { + submit_bio(bio); + } + return -EIOCBQUEUED; +} + static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { unsigned int nr_pages; @@ -313,9 +395,11 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) return 0; nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); - if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS) - return __blkdev_direct_IO_simple(iocb, iter, nr_pages); - + if (likely(nr_pages <= BIO_MAX_VECS)) { + if (is_sync_kiocb(iocb)) + return __blkdev_direct_IO_simple(iocb, iter, nr_pages); + return __blkdev_direct_IO_async(iocb, iter, nr_pages); + } return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages)); }
As with __blkdev_direct_IO_simple(), we can implement direct IO more efficiently if there is only one bio. Add __blkdev_direct_IO_async() and blkdev_bio_end_io_async(). This patch brings me from 4.45-4.5 MIOPS with nullblk to 4.7+. Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> --- block/fops.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 3 deletions(-)