diff mbox series

[RFC,v2,1/8] block: introduce BLK_FEAT_WRITE_ZEROES_UNMAP to queue limits features

Message ID 20250115114637.2705887-2-yi.zhang@huaweicloud.com (mailing list archive)
State New
Headers show
Series [RFC,v2,1/8] block: introduce BLK_FEAT_WRITE_ZEROES_UNMAP to queue limits features | expand

Commit Message

Zhang Yi Jan. 15, 2025, 11:46 a.m. UTC
From: Zhang Yi <yi.zhang@huawei.com>

Currently, it's hard to know whether the storage device supports unmap
write zeroes. We cannot determine it only by checking if the disk
supports the write zeroes command, as for some HDDs that do submit
actual zeros to the disk media even if they claim to support the write
zeroes command, but that should be very slow.

Therefor, add a new queue limit feature, BLK_FEAT_WRITE_ZEROES_UNMAP and
the corresponding sysfs entry, to indicate whether the block device
explicitly supports the unmapped write zeroes command. Each device
driver should set this bit if it is certain that the attached disk
supports this command. If the bit is not set, the disk either does not
support it, or its support status is unknown.

For the stacked devices cases, the BLK_FEAT_WRITE_ZEROES_UNMAP should be
supported both by the stacking driver and all underlying devices.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
 Documentation/ABI/stable/sysfs-block | 14 ++++++++++++++
 block/blk-settings.c                 |  6 ++++++
 block/blk-sysfs.c                    |  3 +++
 include/linux/blkdev.h               |  3 +++
 4 files changed, 26 insertions(+)
diff mbox series

Patch

diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 0cceb2badc83..ab4117cefd9a 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -722,6 +722,20 @@  Description:
 		0, write zeroes is not supported by the device.
 
 
+What:		/sys/block/<disk>/queue/write_zeroes_unmap
+Date:		January 2025
+Contact:	Zhang Yi <yi.zhang@huawei.com>
+Description:
+		[RO] Devices that explicitly support the unmap write zeroes
+		operation in which a single write zeroes request with the unmap
+		bit set to zero out the range of contiguous blocks on storage
+		by freeing blocks, rather than writing physical zeroes to the
+		media. If write_zeroes_unmap is 1, this indicates that the
+		device explicitly supports the write zero command. Otherwise,
+		the device either does not support it, or its support status is
+		unknown.
+
+
 What:		/sys/block/<disk>/queue/zone_append_max_bytes
 Date:		May 2020
 Contact:	linux-block@vger.kernel.org
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 8f09e33f41f6..a8bf2f8f0634 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -652,6 +652,8 @@  int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		t->features &= ~BLK_FEAT_NOWAIT;
 	if (!(b->features & BLK_FEAT_POLL))
 		t->features &= ~BLK_FEAT_POLL;
+	if (!(b->features & BLK_FEAT_WRITE_ZEROES_UNMAP))
+		t->features &= ~BLK_FEAT_WRITE_ZEROES_UNMAP;
 
 	t->flags |= (b->flags & BLK_FLAG_MISALIGNED);
 
@@ -774,6 +776,10 @@  int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		t->zone_write_granularity = 0;
 		t->max_zone_append_sectors = 0;
 	}
+
+	if (!t->max_write_zeroes_sectors)
+		t->features &= ~BLK_FEAT_WRITE_ZEROES_UNMAP;
+
 	blk_stack_atomic_writes_limits(t, b);
 
 	return ret;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 767598e719ab..13f22bee19d2 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -248,6 +248,7 @@  static ssize_t queue_##_name##_show(struct gendisk *disk, char *page)	\
 QUEUE_SYSFS_FEATURE_SHOW(poll, BLK_FEAT_POLL);
 QUEUE_SYSFS_FEATURE_SHOW(fua, BLK_FEAT_FUA);
 QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX);
+QUEUE_SYSFS_FEATURE_SHOW(write_zeroes_unmap, BLK_FEAT_WRITE_ZEROES_UNMAP);
 
 static ssize_t queue_zoned_show(struct gendisk *disk, char *page)
 {
@@ -468,6 +469,7 @@  QUEUE_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes");
 
 QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
 QUEUE_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes");
+QUEUE_RO_ENTRY(queue_write_zeroes_unmap, "write_zeroes_unmap");
 QUEUE_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes");
 QUEUE_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
 
@@ -615,6 +617,7 @@  static struct attribute *queue_attrs[] = {
 	&queue_poll_delay_entry.attr,
 	&queue_virt_boundary_mask_entry.attr,
 	&queue_dma_alignment_entry.attr,
+	&queue_write_zeroes_unmap_entry.attr,
 	NULL,
 };
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 378d3a1a22fc..14ba1e2709bb 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -335,6 +335,9 @@  typedef unsigned int __bitwise blk_features_t;
 #define BLK_FEAT_ATOMIC_WRITES_STACKED \
 	((__force blk_features_t)(1u << 16))
 
+/* supports unmap write zeroes command */
+#define BLK_FEAT_WRITE_ZEROES_UNMAP	((__force blk_features_t)(1u << 17))
+
 /*
  * Flags automatically inherited when stacking limits.
  */