[05/12] block: add bit bucket capabilities

Message ID	20220630204212.1265638-6-kbusch@fb.com (mailing list archive)
State	New, archived
Headers	show Return-Path: <linux-fsdevel-owner@kernel.org> From: Keith Busch <kbusch@fb.com> To: <linux-fsdevel@vger.kernel.org>, <linux-block@vger.kernel.org>, <linux-nvme@lists.infradead.org> CC: <axboe@kernel.dk>, Kernel Team <Kernel-team@fb.com>, <hch@lst.de>, <willy@infradead.org>, <sagi@grimberg.me>, Keith Busch <kbusch@kernel.org> Subject: [PATCH 05/12] block: add bit bucket capabilities Date: Thu, 30 Jun 2022 13:42:05 -0700 Message-ID: <20220630204212.1265638-6-kbusch@fb.com> In-Reply-To: <20220630204212.1265638-1-kbusch@fb.com> References: <20220630204212.1265638-1-kbusch@fb.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain Precedence: bulk
Series	block: support for partial sector reads \| expand [00/12] block: support for partial sector reads [01/12] block: move direct io alignment check to common [02/12] iomap: save copy of bdev for direct io [03/12] iomap: get logical block size directly [04/12] iomap: use common blkdev alignment check [05/12] block: add bit bucket capabilities [06/12] nvme: add support for bit buckets [07/12] block: allow copying pre-registered bvecs [08/12] block: add bio number of vecs helper for partials [09/12] block: add partial sector parameter helper [10/12] block: add direct-io partial sector read support [11/12] iomap: add direct-io partial sector read support [12/12] block: export and document bit_bucket attribute

diff --git a/block/bio.c b/block/bio.c index 933ea3210954..b0c85778257a 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1208,6 +1208,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; + unsigned int lbas = bdev_logical_block_size(bio->bi_bdev); ssize_t size, left; unsigned len, i; size_t offset; @@ -1226,10 +1227,32 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) * more pages than bi_max_vecs allows, so we have to ALIGN_DOWN the * result to ensure the bio's total size is correct. The remainder of * the iov data will be picked up in the next bio iteration. + * + * Partial sector reads can break the iov length expecations by + * allowing dma_alignement granularities. The code enforces only 1 + * segment in that case, which simplifies the following logic. We don't + * need to consider individual segment lengths since the skip and + * truncate bytes are guaranteed to align the total length to the block + * size. */ size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); - if (size > 0) - size = ALIGN_DOWN(size, bdev_logical_block_size(bio->bi_bdev)); + if (size > 0) { + /* + * If size doesn't reach the end with bit buckets, align the + * total size down to the block size to avoid a bit-bucket + * truncation overlapping with the desired read data. + */ + if (bio_flagged(bio, BIO_BIT_BUCKET)) { + if (size != iov_iter_count(iter)) { + size_t total_size = size + bio->bi_iter.bi_size; + + total_size = ALIGN_DOWN(total_size, lbas); + size = total_size - bio->bi_iter.bi_size; + } + } else { + size = ALIGN_DOWN(size, lbas); + } + } if (unlikely(size <= 0)) return size ? size : -EFAULT; @@ -1602,6 +1625,8 @@ struct bio *bio_split(struct bio *bio, int sectors, if (bio_flagged(bio, BIO_TRACE_COMPLETION)) bio_set_flag(split, BIO_TRACE_COMPLETION); + if (bio_flagged(bio, BIO_BIT_BUCKET)) + bio_set_flag(split, BIO_BIT_BUCKET); return split; } diff --git a/block/blk-core.c b/block/blk-core.c index 5ad7bd93077c..d2e9fd42b732 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -73,6 +73,9 @@ struct kmem_cache *blk_requestq_srcu_cachep; */ static struct workqueue_struct *kblockd_workqueue; +struct page *blk_bb_page; +EXPORT_SYMBOL_GPL(blk_bb_page); + /** * blk_queue_flag_set - atomically set a queue flag * @flag: flag to be set @@ -1228,5 +1231,7 @@ int __init blk_dev_init(void) blk_debugfs_root = debugfs_create_dir("block", NULL); + blk_bb_page = ZERO_PAGE(0); + return 0; } diff --git a/block/blk-merge.c b/block/blk-merge.c index 0f5f42ebd0bb..65b71114633f 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -281,7 +281,8 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, * If the queue doesn't support SG gaps and adding this * offset would create a gap, disallow it. */ - if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset)) + if (!bio_flagged(bio, BIO_BIT_BUCKET) && bvprvp && + bvec_gap_to_prev(q, bvprvp, bv.bv_offset)) goto split; if (nsegs < max_segs && diff --git a/block/blk-mq.c b/block/blk-mq.c index 15c7c5c4ad22..efbe308d7ae5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2425,6 +2425,8 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio, if (bio->bi_opf & REQ_RAHEAD) rq->cmd_flags |= REQ_FAILFAST_MASK; + if (bio_flagged(bio, BIO_BIT_BUCKET)) + rq->rq_flags |= RQF_BIT_BUCKET; rq->__sector = bio->bi_iter.bi_sector; blk_rq_bio_prep(rq, bio, nr_segs); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 43aad0da3305..05fa0b292223 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -22,6 +22,8 @@ typedef __u32 __bitwise req_flags_t; /* drive already may have started this one */ #define RQF_STARTED ((__force req_flags_t)(1 << 1)) +/* request has bit bucket payload */ +#define RQF_BIT_BUCKET ((__force req_flags_t)(1 << 2)) /* may not be passed by ioscheduler */ #define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3)) /* request for flush sequence */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a24d4078fb21..dc981d0232d1 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -332,6 +332,7 @@ enum { BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */ + BIO_BIT_BUCKET, /* contains one or more bit bucket pages */ BIO_FLAG_LAST }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9d676adfaaa1..4396fcf04bb8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -44,6 +44,7 @@ struct blk_crypto_profile; extern const struct device_type disk_type; extern struct device_type part_type; extern struct class block_class; +extern struct page *blk_bb_page; /* Must be consistent with blk_mq_poll_stats_bkt() */ #define BLK_MQ_POLL_STATS_BKTS 16 @@ -580,6 +581,7 @@ struct request_queue { #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ #define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */ +#define QUEUE_FLAG_BIT_BUCKET 31 /* device supports read bit buckets */ #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ @@ -621,6 +623,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags) #define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags) #define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags) +#define blk_queue_bb(q) test_bit(QUEUE_FLAG_BIT_BUCKET, &(q)->queue_flags) extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); @@ -1566,4 +1569,14 @@ struct io_comp_batch { #define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { } +static inline void blk_add_bb_page(struct bio *bio, int len) +{ + bio_set_flag(bio, BIO_BIT_BUCKET); + get_page(blk_bb_page); + bio_add_page(bio, blk_bb_page, len, 0); +} +static inline bool blk_is_bit_bucket(struct page *page) +{ + return page == blk_bb_page; +} #endif /* _LINUX_BLKDEV_H */

[05/12] block: add bit bucket capabilities

Commit Message

Patch