diff mbox series

[v8,05/19] block, fs: Restore the per-bio/request data lifetime fields

Message ID 20231219000815.2739120-6-bvanassche@acm.org (mailing list archive)
State New, archived
Headers show
Series Pass data lifetime information to SCSI disk devices | expand

Commit Message

Bart Van Assche Dec. 19, 2023, 12:07 a.m. UTC
Restore support for passing data lifetime information from filesystems to
block drivers. This patch reverts commit b179c98f7697 ("block: Remove
request.write_hint") and commit c75e707fe1aa ("block: remove the
per-bio/request write hint").

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/bio.c                 |  2 ++
 block/blk-crypto-fallback.c |  1 +
 block/blk-merge.c           |  8 ++++++++
 block/blk-mq.c              |  2 ++
 block/bounce.c              |  1 +
 block/fops.c                |  3 +++
 fs/buffer.c                 | 12 ++++++++----
 fs/direct-io.c              |  2 ++
 fs/iomap/buffered-io.c      |  2 ++
 fs/iomap/direct-io.c        |  1 +
 fs/mpage.c                  |  1 +
 include/linux/blk-mq.h      |  2 ++
 include/linux/blk_types.h   |  2 ++
 13 files changed, 35 insertions(+), 4 deletions(-)

Comments

Kanchan Joshi Jan. 22, 2024, 9:23 a.m. UTC | #1
On 12/19/2023 5:37 AM, Bart Van Assche wrote:

> diff --git a/block/fops.c b/block/fops.c
> index 0abaac705daf..787ce52bc2c6 100644
> --- a/block/fops.c
> +++ b/block/fops.c
> @@ -73,6 +73,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
>   		bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
>   	}
>   	bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
> +	bio.bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
>   	bio.bi_ioprio = iocb->ki_ioprio;
>   
>   	ret = bio_iov_iter_get_pages(&bio, iter);
> @@ -203,6 +204,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
>   
>   	for (;;) {
>   		bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
> +		bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
>   		bio->bi_private = dio;
>   		bio->bi_end_io = blkdev_bio_end_io;
>   		bio->bi_ioprio = iocb->ki_ioprio;
> @@ -321,6 +323,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
>   	dio->flags = 0;
>   	dio->iocb = iocb;
>   	bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
> +	bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;

This (and two more places above) should rather be changed to:

bio.bi_write_hint = bdev_file_inode(iocb->ki_filp)->i_write_hint;

Note that at other places too (e.g., blkdev_fallocate, blkdev_mmap, 
blkdev_lseek) bdev inode is used and not file inode.
Bart Van Assche Jan. 22, 2024, 8:05 p.m. UTC | #2
On 1/22/24 01:23, Kanchan Joshi wrote:
> On 12/19/2023 5:37 AM, Bart Van Assche wrote:
> 
>> diff --git a/block/fops.c b/block/fops.c
>> index 0abaac705daf..787ce52bc2c6 100644
>> --- a/block/fops.c
>> +++ b/block/fops.c
>> @@ -73,6 +73,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
>>    		bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
>>    	}
>>    	bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
>> +	bio.bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
>>    	bio.bi_ioprio = iocb->ki_ioprio;
>>    
>>    	ret = bio_iov_iter_get_pages(&bio, iter);
>> @@ -203,6 +204,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
>>    
>>    	for (;;) {
>>    		bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
>> +		bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
>>    		bio->bi_private = dio;
>>    		bio->bi_end_io = blkdev_bio_end_io;
>>    		bio->bi_ioprio = iocb->ki_ioprio;
>> @@ -321,6 +323,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
>>    	dio->flags = 0;
>>    	dio->iocb = iocb;
>>    	bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
>> +	bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
> 
> This (and two more places above) should rather be changed to:
> 
> bio.bi_write_hint = bdev_file_inode(iocb->ki_filp)->i_write_hint;
> 
> Note that at other places too (e.g., blkdev_fallocate, blkdev_mmap,
> blkdev_lseek) bdev inode is used and not file inode.

Why should this code be changed? The above code has been tested and
works fine.

Thanks,

Bart.
Kanchan Joshi Jan. 23, 2024, 12:35 p.m. UTC | #3
On 1/23/2024 1:35 AM, Bart Van Assche wrote:
> On 1/22/24 01:23, Kanchan Joshi wrote:
>> On 12/19/2023 5:37 AM, Bart Van Assche wrote:
>>
>>> diff --git a/block/fops.c b/block/fops.c
>>> index 0abaac705daf..787ce52bc2c6 100644
>>> --- a/block/fops.c
>>> +++ b/block/fops.c
>>> @@ -73,6 +73,7 @@ static ssize_t __blkdev_direct_IO_simple(struct 
>>> kiocb *iocb,
>>>            bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
>>>        }
>>>        bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
>>> +    bio.bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
>>>        bio.bi_ioprio = iocb->ki_ioprio;
>>>        ret = bio_iov_iter_get_pages(&bio, iter);
>>> @@ -203,6 +204,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb 
>>> *iocb, struct iov_iter *iter,
>>>        for (;;) {
>>>            bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
>>> +        bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
>>>            bio->bi_private = dio;
>>>            bio->bi_end_io = blkdev_bio_end_io;
>>>            bio->bi_ioprio = iocb->ki_ioprio;
>>> @@ -321,6 +323,7 @@ static ssize_t __blkdev_direct_IO_async(struct 
>>> kiocb *iocb,
>>>        dio->flags = 0;
>>>        dio->iocb = iocb;
>>>        bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
>>> +    bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
>>
>> This (and two more places above) should rather be changed to:
>>
>> bio.bi_write_hint = bdev_file_inode(iocb->ki_filp)->i_write_hint;
>>
>> Note that at other places too (e.g., blkdev_fallocate, blkdev_mmap,
>> blkdev_lseek) bdev inode is used and not file inode.
> 
> Why should this code be changed?

Because this file is for raw block IO, and bdev_file_inode is the right 
inode to be used.

> The above code has been tested and
> works fine.

At the cost of inviting some extra work. Because this patch used 
file_inode, the patch 6 needs to set the hint on two inodes.
If we use bdev_file_inode, this whole thing becomes clean.
Bart Van Assche Jan. 23, 2024, 3:59 p.m. UTC | #4
On 1/23/24 04:35, Kanchan Joshi wrote:
> At the cost of inviting some extra work. Because this patch used
> file_inode, the patch 6 needs to set the hint on two inodes.
> If we use bdev_file_inode, this whole thing becomes clean.

The idea of accessing block devices only in the F_SET_RW_HINT
implementation is wrong because it involves a layering violation.
With the current patch series data lifetime information is
available to filesystems like Fuse. If the F_SET_RW_HINT
implementation would iterate over block devices, no data lifetime
information would be available to filesystems like Fuse.

Bart.
diff mbox series

Patch

diff --git a/block/bio.c b/block/bio.c
index 816d412c06e9..755fcde5cb66 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -251,6 +251,7 @@  void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
 	bio->bi_opf = opf;
 	bio->bi_flags = 0;
 	bio->bi_ioprio = 0;
+	bio->bi_write_hint = 0;
 	bio->bi_status = 0;
 	bio->bi_iter.bi_sector = 0;
 	bio->bi_iter.bi_size = 0;
@@ -813,6 +814,7 @@  static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
 {
 	bio_set_flag(bio, BIO_CLONED);
 	bio->bi_ioprio = bio_src->bi_ioprio;
+	bio->bi_write_hint = bio_src->bi_write_hint;
 	bio->bi_iter = bio_src->bi_iter;
 
 	if (bio->bi_bdev) {
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index e6468eab2681..b1e7415f8439 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -172,6 +172,7 @@  static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
 	if (bio_flagged(bio_src, BIO_REMAPPED))
 		bio_set_flag(bio, BIO_REMAPPED);
 	bio->bi_ioprio		= bio_src->bi_ioprio;
+	bio->bi_write_hint	= bio_src->bi_write_hint;
 	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
 	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
 
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 65e75efa9bd3..65bbbf6cf1fe 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -814,6 +814,10 @@  static struct request *attempt_merge(struct request_queue *q,
 	if (rq_data_dir(req) != rq_data_dir(next))
 		return NULL;
 
+	/* Don't merge requests with different write hints. */
+	if (req->write_hint != next->write_hint)
+		return NULL;
+
 	if (req->ioprio != next->ioprio)
 		return NULL;
 
@@ -941,6 +945,10 @@  bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 	if (!bio_crypt_rq_ctx_compatible(rq, bio))
 		return false;
 
+	/* Don't merge requests with different write hints. */
+	if (rq->write_hint != bio->bi_write_hint)
+		return false;
+
 	if (rq->ioprio != bio_prio(bio))
 		return false;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e2d11183f62e..081df9faf499 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2551,6 +2551,7 @@  static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
 		rq->cmd_flags |= REQ_FAILFAST_MASK;
 
 	rq->__sector = bio->bi_iter.bi_sector;
+	rq->write_hint = bio->bi_write_hint;
 	blk_rq_bio_prep(rq, bio, nr_segs);
 
 	/* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */
@@ -3143,6 +3144,7 @@  int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 	}
 	rq->nr_phys_segments = rq_src->nr_phys_segments;
 	rq->ioprio = rq_src->ioprio;
+	rq->write_hint = rq_src->write_hint;
 
 	if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
 		goto free_and_out;
diff --git a/block/bounce.c b/block/bounce.c
index 7cfcb242f9a1..d6a5219f29dd 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -169,6 +169,7 @@  static struct bio *bounce_clone_bio(struct bio *bio_src)
 	if (bio_flagged(bio_src, BIO_REMAPPED))
 		bio_set_flag(bio, BIO_REMAPPED);
 	bio->bi_ioprio		= bio_src->bi_ioprio;
+	bio->bi_write_hint	= bio_src->bi_write_hint;
 	bio->bi_iter.bi_sector	= bio_src->bi_iter.bi_sector;
 	bio->bi_iter.bi_size	= bio_src->bi_iter.bi_size;
 
diff --git a/block/fops.c b/block/fops.c
index 0abaac705daf..787ce52bc2c6 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -73,6 +73,7 @@  static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
 		bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
 	}
 	bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
+	bio.bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
 	bio.bi_ioprio = iocb->ki_ioprio;
 
 	ret = bio_iov_iter_get_pages(&bio, iter);
@@ -203,6 +204,7 @@  static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
 
 	for (;;) {
 		bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
+		bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
 		bio->bi_private = dio;
 		bio->bi_end_io = blkdev_bio_end_io;
 		bio->bi_ioprio = iocb->ki_ioprio;
@@ -321,6 +323,7 @@  static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
 	dio->flags = 0;
 	dio->iocb = iocb;
 	bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
+	bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
 	bio->bi_end_io = blkdev_bio_end_io_async;
 	bio->bi_ioprio = iocb->ki_ioprio;
 
diff --git a/fs/buffer.c b/fs/buffer.c
index 967f34b70aa8..76834398e713 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -55,7 +55,7 @@ 
 
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
 static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
-			  struct writeback_control *wbc);
+			  enum rw_hint hint, struct writeback_control *wbc);
 
 #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
 
@@ -1903,7 +1903,8 @@  int __block_write_full_folio(struct inode *inode, struct folio *folio,
 	do {
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
-			submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
+			submit_bh_wbc(REQ_OP_WRITE | write_flags, bh,
+				      inode->i_write_hint, wbc);
 			nr_underway++;
 		}
 		bh = next;
@@ -1957,7 +1958,8 @@  int __block_write_full_folio(struct inode *inode, struct folio *folio,
 		struct buffer_head *next = bh->b_this_page;
 		if (buffer_async_write(bh)) {
 			clear_buffer_dirty(bh);
-			submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
+			submit_bh_wbc(REQ_OP_WRITE | write_flags, bh,
+				      inode->i_write_hint, wbc);
 			nr_underway++;
 		}
 		bh = next;
@@ -2777,6 +2779,7 @@  static void end_bio_bh_io_sync(struct bio *bio)
 }
 
 static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
+			  enum rw_hint write_hint,
 			  struct writeback_control *wbc)
 {
 	const enum req_op op = opf & REQ_OP_MASK;
@@ -2804,6 +2807,7 @@  static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
 	fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
 
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+	bio->bi_write_hint = write_hint;
 
 	__bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
 
@@ -2823,7 +2827,7 @@  static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
 
 void submit_bh(blk_opf_t opf, struct buffer_head *bh)
 {
-	submit_bh_wbc(opf, bh, NULL);
+	submit_bh_wbc(opf, bh, WRITE_LIFE_NOT_SET, NULL);
 }
 EXPORT_SYMBOL(submit_bh);
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 20533266ade6..5261ab8bcdaa 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -410,6 +410,8 @@  dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
 		bio->bi_end_io = dio_bio_end_io;
 	if (dio->is_pinned)
 		bio_set_flag(bio, BIO_PAGE_PINNED);
+	bio->bi_write_hint = file_inode(dio->iocb->ki_filp)->i_write_hint;
+
 	sdio->bio = bio;
 	sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
 }
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index f72df2babe56..191eb575485e 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1677,6 +1677,7 @@  iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
 			       REQ_OP_WRITE | wbc_to_write_flags(wbc),
 			       GFP_NOFS, &iomap_ioend_bioset);
 	bio->bi_iter.bi_sector = sector;
+	bio->bi_write_hint = inode->i_write_hint;
 	wbc_init_bio(wbc, bio);
 
 	ioend = container_of(bio, struct iomap_ioend, io_inline_bio);
@@ -1707,6 +1708,7 @@  iomap_chain_bio(struct bio *prev)
 	new = bio_alloc(prev->bi_bdev, BIO_MAX_VECS, prev->bi_opf, GFP_NOFS);
 	bio_clone_blkg_association(new, prev);
 	new->bi_iter.bi_sector = bio_end_sector(prev);
+	new->bi_write_hint = prev->bi_write_hint;
 
 	bio_chain(prev, new);
 	bio_get(prev);		/* for iomap_finish_ioend */
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index bcd3f8cf5ea4..f3b43d223a46 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -380,6 +380,7 @@  static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 		fscrypt_set_bio_crypt_ctx(bio, inode, pos >> inode->i_blkbits,
 					  GFP_KERNEL);
 		bio->bi_iter.bi_sector = iomap_sector(iomap, pos);
+		bio->bi_write_hint = inode->i_write_hint;
 		bio->bi_ioprio = dio->iocb->ki_ioprio;
 		bio->bi_private = dio;
 		bio->bi_end_io = iomap_dio_bio_end_io;
diff --git a/fs/mpage.c b/fs/mpage.c
index ffb064ed9d04..268785f2bb53 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -611,6 +611,7 @@  static int __mpage_writepage(struct folio *folio, struct writeback_control *wbc,
 				GFP_NOFS);
 		bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9);
 		wbc_init_bio(wbc, bio);
+		bio->bi_write_hint = inode->i_write_hint;
 	}
 
 	/*
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 1ab3081c82ed..479f26af76bd 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -8,6 +8,7 @@ 
 #include <linux/scatterlist.h>
 #include <linux/prefetch.h>
 #include <linux/srcu.h>
+#include <linux/rw_hint.h>
 
 struct blk_mq_tags;
 struct blk_flush_queue;
@@ -135,6 +136,7 @@  struct request {
 	struct blk_crypto_keyslot *crypt_keyslot;
 #endif
 
+	enum rw_hint write_hint;
 	unsigned short ioprio;
 
 	enum mq_rq_state state;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index d5c5e59ddbd2..8410957f4313 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -10,6 +10,7 @@ 
 #include <linux/bvec.h>
 #include <linux/device.h>
 #include <linux/ktime.h>
+#include <linux/rw_hint.h>
 
 struct bio_set;
 struct bio;
@@ -269,6 +270,7 @@  struct bio {
 						 */
 	unsigned short		bi_flags;	/* BIO_* below */
 	unsigned short		bi_ioprio;
+	enum rw_hint		bi_write_hint;
 	blk_status_t		bi_status;
 	atomic_t		__bi_remaining;