Message ID | 20241206221801.790690-7-kbusch@meta.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | block write streams with nvme fdp | expand |
On 12/6/24 23:17, Keith Busch wrote: > From: Christoph Hellwig <hch@lst.de> > > Export statx information about the number and granularity of write > streams, use the per-kiocb write hint and map temperature hints > to write streams (which is a bit questionable, but this shows how it is > done). > > Signed-off-by: Christoph Hellwig <hch@lst.de> > Signed-off-by: Keith Busch <kbusch@kernel.org> > --- > block/bdev.c | 6 ++++++ > block/fops.c | 23 +++++++++++++++++++++++ > 2 files changed, 29 insertions(+) > Reviewed-by: Hannes Reinecke <hare@suse.de> Cheers, Hannes
On 06/12/24 02:17PM, Keith Busch wrote: >From: Christoph Hellwig <hch@lst.de> > >Export statx information about the number and granularity of write >streams, use the per-kiocb write hint and map temperature hints >to write streams (which is a bit questionable, but this shows how it is >done). > >Signed-off-by: Christoph Hellwig <hch@lst.de> >Signed-off-by: Keith Busch <kbusch@kernel.org> >--- > block/bdev.c | 6 ++++++ > block/fops.c | 23 +++++++++++++++++++++++ > 2 files changed, 29 insertions(+) > >diff --git a/block/bdev.c b/block/bdev.c >index 738e3c8457e7f..c23245f1fdfe3 100644 >--- a/block/bdev.c >+++ b/block/bdev.c >@@ -1296,6 +1296,12 @@ void bdev_statx(struct path *path, struct kstat *stat, > stat->result_mask |= STATX_DIOALIGN; > } > >+ if ((request_mask & STATX_WRITE_STREAM) && We may not reach this point, if user application doesn't set either of STATX_DIOALIGN or STATX_WRITE_ATOMIC. >+ bdev_max_write_streams(bdev)) { >+ stat->write_stream_max = bdev_max_write_streams(bdev); >+ stat->result_mask |= STATX_WRITE_STREAM; statx will show value of 0 for write_stream_granularity. Below is the fix which might help you, diff --git a/block/bdev.c b/block/bdev.c index c23245f1fdfe..290577e20457 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -1275,7 +1275,8 @@ void bdev_statx(struct path *path, struct kstat *stat, struct inode *backing_inode; struct block_device *bdev; - if (!(request_mask & (STATX_DIOALIGN | STATX_WRITE_ATOMIC))) + if (!(request_mask & (STATX_DIOALIGN | STATX_WRITE_ATOMIC | + STATX_WRITE_STREAM))) return; backing_inode = d_backing_inode(path->dentry); @@ -1299,6 +1300,7 @@ void bdev_statx(struct path *path, struct kstat *stat, if ((request_mask & STATX_WRITE_STREAM) && bdev_max_write_streams(bdev)) { stat->write_stream_max = bdev_max_write_streams(bdev); + stat->write_stream_granularity = bdev_write_stream_granularity(bdev); stat->result_mask |= STATX_WRITE_STREAM; }
diff --git a/block/bdev.c b/block/bdev.c index 738e3c8457e7f..c23245f1fdfe3 100644 --- a/block/bdev.c +++ b/block/bdev.c @@ -1296,6 +1296,12 @@ void bdev_statx(struct path *path, struct kstat *stat, stat->result_mask |= STATX_DIOALIGN; } + if ((request_mask & STATX_WRITE_STREAM) && + bdev_max_write_streams(bdev)) { + stat->write_stream_max = bdev_max_write_streams(bdev); + stat->result_mask |= STATX_WRITE_STREAM; + } + if (request_mask & STATX_WRITE_ATOMIC && bdev_can_atomic_write(bdev)) { struct request_queue *bd_queue = bdev->bd_queue; diff --git a/block/fops.c b/block/fops.c index 6d5c4fc5a2168..f16aa39bf5bad 100644 --- a/block/fops.c +++ b/block/fops.c @@ -73,6 +73,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb, } bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT; bio.bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint; + bio.bi_write_stream = iocb->ki_write_stream; bio.bi_ioprio = iocb->ki_ioprio; if (iocb->ki_flags & IOCB_ATOMIC) bio.bi_opf |= REQ_ATOMIC; @@ -206,6 +207,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, for (;;) { bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT; bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint; + bio->bi_write_stream = iocb->ki_write_stream; bio->bi_private = dio; bio->bi_end_io = blkdev_bio_end_io; bio->bi_ioprio = iocb->ki_ioprio; @@ -333,6 +335,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, dio->iocb = iocb; bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT; bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint; + bio->bi_write_stream = iocb->ki_write_stream; bio->bi_end_io = blkdev_bio_end_io_async; bio->bi_ioprio = iocb->ki_ioprio; @@ -398,6 +401,26 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (blkdev_dio_invalid(bdev, iocb, iter)) return -EINVAL; + if (iov_iter_rw(iter) == WRITE) { + u16 max_write_streams = bdev_max_write_streams(bdev); + + if (iocb->ki_write_stream) { + if (iocb->ki_write_stream > max_write_streams) + return -EINVAL; + } else if (max_write_streams) { + enum rw_hint write_hint = + file_inode(iocb->ki_filp)->i_write_hint; + + /* + * Just use the write hint as write stream for block + * device writes. This assumes no file system is + * mounted that would use the streams differently. + */ + if (write_hint <= max_write_streams) + iocb->ki_write_stream = write_hint; + } + } + nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); if (likely(nr_pages <= BIO_MAX_VECS)) { if (is_sync_kiocb(iocb))