@@ -1208,6 +1208,7 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
struct page **pages = (struct page **)bv;
+ unsigned int lbas = bdev_logical_block_size(bio->bi_bdev);
ssize_t size, left;
unsigned len, i;
size_t offset;
@@ -1226,10 +1227,32 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
* more pages than bi_max_vecs allows, so we have to ALIGN_DOWN the
* result to ensure the bio's total size is correct. The remainder of
* the iov data will be picked up in the next bio iteration.
+ *
+ * Partial sector reads can break the iov length expecations by
+ * allowing dma_alignement granularities. The code enforces only 1
+ * segment in that case, which simplifies the following logic. We don't
+ * need to consider individual segment lengths since the skip and
+ * truncate bytes are guaranteed to align the total length to the block
+ * size.
*/
size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
- if (size > 0)
- size = ALIGN_DOWN(size, bdev_logical_block_size(bio->bi_bdev));
+ if (size > 0) {
+ /*
+ * If size doesn't reach the end with bit buckets, align the
+ * total size down to the block size to avoid a bit-bucket
+ * truncation overlapping with the desired read data.
+ */
+ if (bio_flagged(bio, BIO_BIT_BUCKET)) {
+ if (size != iov_iter_count(iter)) {
+ size_t total_size = size + bio->bi_iter.bi_size;
+
+ total_size = ALIGN_DOWN(total_size, lbas);
+ size = total_size - bio->bi_iter.bi_size;
+ }
+ } else {
+ size = ALIGN_DOWN(size, lbas);
+ }
+ }
if (unlikely(size <= 0))
return size ? size : -EFAULT;
@@ -1602,6 +1625,8 @@ struct bio *bio_split(struct bio *bio, int sectors,
if (bio_flagged(bio, BIO_TRACE_COMPLETION))
bio_set_flag(split, BIO_TRACE_COMPLETION);
+ if (bio_flagged(bio, BIO_BIT_BUCKET))
+ bio_set_flag(split, BIO_BIT_BUCKET);
return split;
}
@@ -73,6 +73,9 @@ struct kmem_cache *blk_requestq_srcu_cachep;
*/
static struct workqueue_struct *kblockd_workqueue;
+struct page *blk_bb_page;
+EXPORT_SYMBOL_GPL(blk_bb_page);
+
/**
* blk_queue_flag_set - atomically set a queue flag
* @flag: flag to be set
@@ -1228,5 +1231,7 @@ int __init blk_dev_init(void)
blk_debugfs_root = debugfs_create_dir("block", NULL);
+ blk_bb_page = ZERO_PAGE(0);
+
return 0;
}
@@ -281,7 +281,8 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
* If the queue doesn't support SG gaps and adding this
* offset would create a gap, disallow it.
*/
- if (bvprvp && bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
+ if (!bio_flagged(bio, BIO_BIT_BUCKET) && bvprvp &&
+ bvec_gap_to_prev(q, bvprvp, bv.bv_offset))
goto split;
if (nsegs < max_segs &&
@@ -2425,6 +2425,8 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
if (bio->bi_opf & REQ_RAHEAD)
rq->cmd_flags |= REQ_FAILFAST_MASK;
+ if (bio_flagged(bio, BIO_BIT_BUCKET))
+ rq->rq_flags |= RQF_BIT_BUCKET;
rq->__sector = bio->bi_iter.bi_sector;
blk_rq_bio_prep(rq, bio, nr_segs);
@@ -22,6 +22,8 @@ typedef __u32 __bitwise req_flags_t;
/* drive already may have started this one */
#define RQF_STARTED ((__force req_flags_t)(1 << 1))
+/* request has bit bucket payload */
+#define RQF_BIT_BUCKET ((__force req_flags_t)(1 << 2))
/* may not be passed by ioscheduler */
#define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3))
/* request for flush sequence */
@@ -332,6 +332,7 @@ enum {
BIO_QOS_MERGED, /* but went through rq_qos merge path */
BIO_REMAPPED,
BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */
+ BIO_BIT_BUCKET, /* contains one or more bit bucket pages */
BIO_FLAG_LAST
};
@@ -44,6 +44,7 @@ struct blk_crypto_profile;
extern const struct device_type disk_type;
extern struct device_type part_type;
extern struct class block_class;
+extern struct page *blk_bb_page;
/* Must be consistent with blk_mq_poll_stats_bkt() */
#define BLK_MQ_POLL_STATS_BKTS 16
@@ -580,6 +581,7 @@ struct request_queue {
#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */
#define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */
#define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */
+#define QUEUE_FLAG_BIT_BUCKET 31 /* device supports read bit buckets */
#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_SAME_COMP) | \
@@ -621,6 +623,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
#define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
#define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags)
+#define blk_queue_bb(q) test_bit(QUEUE_FLAG_BIT_BUCKET, &(q)->queue_flags)
extern void blk_set_pm_only(struct request_queue *q);
extern void blk_clear_pm_only(struct request_queue *q);
@@ -1566,4 +1569,14 @@ struct io_comp_batch {
#define DEFINE_IO_COMP_BATCH(name) struct io_comp_batch name = { }
+static inline void blk_add_bb_page(struct bio *bio, int len)
+{
+ bio_set_flag(bio, BIO_BIT_BUCKET);
+ get_page(blk_bb_page);
+ bio_add_page(bio, blk_bb_page, len, 0);
+}
+static inline bool blk_is_bit_bucket(struct page *page)
+{
+ return page == blk_bb_page;
+}
#endif /* _LINUX_BLKDEV_H */