diff mbox series

[PATCHv9,2/7] block: introduce max_write_hints queue limit

Message ID 20241025213645.3464331-3-kbusch@meta.com (mailing list archive)
State New
Headers show
Series write hints with nvme fdp, scsi streams | expand

Commit Message

Keith Busch Oct. 25, 2024, 9:36 p.m. UTC
From: Keith Busch <kbusch@kernel.org>

Drivers with hardware that support write streams need a way to export how
many are available so applications can generically query this.

Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 Documentation/ABI/stable/sysfs-block |  7 +++++++
 block/blk-settings.c                 |  3 +++
 block/blk-sysfs.c                    |  3 +++
 include/linux/blkdev.h               | 12 ++++++++++++
 4 files changed, 25 insertions(+)

Comments

Christoph Hellwig Oct. 28, 2024, 11:51 a.m. UTC | #1
On Fri, Oct 25, 2024 at 02:36:40PM -0700, Keith Busch wrote:
> +static inline unsigned short bdev_max_write_hints(struct block_device *bdev)
> +{
> +	return queue_max_write_hints(bdev_get_queue(bdev));
> +}

As pointed out by Bart last time, you can't simply give the write hints
to all block device.  Assume we'd want to wire up the write stream based
separate to f2fs (which btw would be a good demonstration), and you'd
have two different f2fs file systems on separate partitions that'd
now start sharing the write streams if they simply started from stream
1.  Same for our pending XFS data placement work.
Christoph Hellwig Oct. 28, 2024, 11:52 a.m. UTC | #2
On Mon, Oct 28, 2024 at 12:51:32PM +0100, Christoph Hellwig wrote:
> As pointed out by Bart last time, you can't simply give the write hints
> to all block device.  Assume we'd want to wire up the write stream based
> separate to f2fs (which btw would be a good demonstration), and you'd
> have two different f2fs file systems on separate partitions that'd
> now start sharing the write streams if they simply started from stream
> 1.  Same for our pending XFS data placement work.

And I'm an idiot and should have looked at the next patch patch first.
Sorry for that.
diff mbox series

Patch

diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 8353611107154..f2db2cabb8e75 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -506,6 +506,13 @@  Description:
 		[RO] Maximum size in bytes of a single element in a DMA
 		scatter/gather list.
 
+What:		/sys/block/<disk>/queue/max_write_hints
+Date:		October 2024
+Contact:	linux-block@vger.kernel.org
+Description:
+		[RO] Maximum number of write hints supported, 0 if not
+		supported. If supported, valid values are 1 through
+		max_write_hints, inclusive.
 
 What:		/sys/block/<disk>/queue/max_segments
 Date:		March 2010
diff --git a/block/blk-settings.c b/block/blk-settings.c
index a446654ddee5e..921fb4d334fa4 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -43,6 +43,7 @@  void blk_set_stacking_limits(struct queue_limits *lim)
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 
 	/* Inherit limits from component devices */
+	lim->max_write_hints = USHRT_MAX;
 	lim->max_segments = USHRT_MAX;
 	lim->max_discard_segments = USHRT_MAX;
 	lim->max_hw_sectors = UINT_MAX;
@@ -544,6 +545,8 @@  int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->max_segment_size = min_not_zero(t->max_segment_size,
 					   b->max_segment_size);
 
+	t->max_write_hints = min(t->max_write_hints, b->max_write_hints);
+
 	alignment = queue_limit_alignment_offset(b, start);
 
 	/* Bottom device has different alignment.  Check that it is
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 741b95dfdbf6f..85f48ca461049 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -104,6 +104,7 @@  QUEUE_SYSFS_LIMIT_SHOW(max_segments)
 QUEUE_SYSFS_LIMIT_SHOW(max_discard_segments)
 QUEUE_SYSFS_LIMIT_SHOW(max_integrity_segments)
 QUEUE_SYSFS_LIMIT_SHOW(max_segment_size)
+QUEUE_SYSFS_LIMIT_SHOW(max_write_hints)
 QUEUE_SYSFS_LIMIT_SHOW(logical_block_size)
 QUEUE_SYSFS_LIMIT_SHOW(physical_block_size)
 QUEUE_SYSFS_LIMIT_SHOW(chunk_sectors)
@@ -457,6 +458,7 @@  QUEUE_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
 QUEUE_RO_ENTRY(queue_max_segments, "max_segments");
 QUEUE_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
 QUEUE_RO_ENTRY(queue_max_segment_size, "max_segment_size");
+QUEUE_RO_ENTRY(queue_max_write_hints, "max_write_hints");
 QUEUE_RW_LOAD_MODULE_ENTRY(elv_iosched, "scheduler");
 
 QUEUE_RO_ENTRY(queue_logical_block_size, "logical_block_size");
@@ -591,6 +593,7 @@  static struct attribute *queue_attrs[] = {
 	&queue_max_discard_segments_entry.attr,
 	&queue_max_integrity_segments_entry.attr,
 	&queue_max_segment_size_entry.attr,
+	&queue_max_write_hints_entry.attr,
 	&queue_hw_sector_size_entry.attr,
 	&queue_logical_block_size_entry.attr,
 	&queue_physical_block_size_entry.attr,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 55bec14fe55f9..a8ad41ee07234 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -393,6 +393,8 @@  struct queue_limits {
 	unsigned short		max_integrity_segments;
 	unsigned short		max_discard_segments;
 
+	unsigned short		max_write_hints;
+
 	unsigned int		max_open_zones;
 	unsigned int		max_active_zones;
 
@@ -1183,6 +1185,11 @@  static inline unsigned short queue_max_segments(const struct request_queue *q)
 	return q->limits.max_segments;
 }
 
+static inline unsigned short queue_max_write_hints(struct request_queue *q)
+{
+	return q->limits.max_write_hints;
+}
+
 static inline unsigned short queue_max_discard_segments(const struct request_queue *q)
 {
 	return q->limits.max_discard_segments;
@@ -1230,6 +1237,11 @@  static inline unsigned int bdev_max_segments(struct block_device *bdev)
 	return queue_max_segments(bdev_get_queue(bdev));
 }
 
+static inline unsigned short bdev_max_write_hints(struct block_device *bdev)
+{
+	return queue_max_write_hints(bdev_get_queue(bdev));
+}
+
 static inline unsigned queue_logical_block_size(const struct request_queue *q)
 {
 	return q->limits.logical_block_size;