diff mbox series

block: enable passthrough command statistics

Message ID 20241002210744.72321-1-kbusch@meta.com (mailing list archive)
State New, archived
Headers show
Series block: enable passthrough command statistics | expand

Commit Message

Keith Busch Oct. 2, 2024, 9:07 p.m. UTC
From: Keith Busch <kbusch@kernel.org>

Applications using the passthrough interfaces for advance protocol IO
want to continue seeing the disk stats. These requests had been fenced
off from this block layer feature. While the block layer doesn't
necessarily know what a passthrough command does, we do know the data
size and direction, which is enough to account for the command's stats.

Since tracking these has the potential to produce unexpected results,
the passthrough stats are locked behind a new queue feature flag that
needs to be enabled using the /sys/block/<dev>/queue/iostats attribute.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
This is based off the recently created for-6.13/block tree here:

  https://git.kernel.dk/cgit/linux-block/log/?h=for-6.13/block

 Documentation/ABI/stable/sysfs-block |  4 +++-
 block/blk-mq.c                       | 17 +++++++++++++-
 block/blk-sysfs.c                    | 35 +++++++++++++++++++++++++++-
 include/linux/blkdev.h               |  4 ++++
 4 files changed, 57 insertions(+), 3 deletions(-)

Comments

Jens Axboe Oct. 3, 2024, 1:30 a.m. UTC | #1
On 10/2/24 3:07 PM, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
> 
> Applications using the passthrough interfaces for advance protocol IO
> want to continue seeing the disk stats. These requests had been fenced
> off from this block layer feature. While the block layer doesn't
> necessarily know what a passthrough command does, we do know the data
> size and direction, which is enough to account for the command's stats.
> 
> Since tracking these has the potential to produce unexpected results,
> the passthrough stats are locked behind a new queue feature flag that
> needs to be enabled using the /sys/block/<dev>/queue/iostats attribute.

Looks good go me in terms of allowing passthrough stats, and adding the
0/1/2 for iostats (like we do for nomerge too, for example). Minor nit
below.

> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index e85941bec857b..99f438beb6c67 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -246,7 +246,6 @@ static ssize_t queue_##_name##_store(struct gendisk *disk,		\
>  
>  QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL)
>  QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM)
> -QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT)
>  QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES);
>  
>  #define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature)			\
> @@ -272,6 +271,40 @@ static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
>  	return queue_var_show(disk_nr_zones(disk), page);
>  }
>  
> +static ssize_t queue_iostats_show(struct gendisk *disk, char *page)
> +{
> +	return queue_var_show((bool)blk_queue_passthrough_stat(disk->queue) +
> +			      (bool)blk_queue_io_stat(disk->queue), page);
> +}
> +
> +static ssize_t queue_iostats_store(struct gendisk *disk, const char *page,
> +				   size_t count)
> +{
> +	struct queue_limits lim;
> +	unsigned long ios;
> +	ssize_t ret;
> +
> +	ret = queue_var_store(&ios, page, count);
> +	if (ret < 0)
> +		return ret;
> +
> +	lim = queue_limits_start_update(disk->queue);
> +	if (!ios)
> +		lim.features &= ~(BLK_FEAT_IO_STAT | BLK_FEAT_PASSTHROUGH_STAT);
> +	else if (ios == 2)
> +		lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_PASSTHROUGH_STAT;
> +	else if (ios == 1) {
> +		lim.features |= BLK_FEAT_IO_STAT;
> +		lim.features &= ~BLK_FEAT_PASSTHROUGH_STAT;
> +	}

Nit: use braces for all of them, if one requires it. And might be
cleaner to simply do:

	lim = queue_limits_start_update(disk->queue);
	lim.features &= ~(BLK_FEAT_IO_STAT | BLK_FEAT_PASSTHROUGH_STAT);
	if (ios == 2)
		lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_PASSTHROUGH_STAT;
	else if (ios == 1)
		lim.features |= BLK_FEAT_IO_STAT;

and then I guess the braces comment no longer applies.
Christoph Hellwig Oct. 3, 2024, 1 p.m. UTC | #2
On Wed, Oct 02, 2024 at 02:07:44PM -0700, Keith Busch wrote:
> +		accounting of the disk. Set to 0 to disable all stats. Set to 1
> +		to enable block IO stats. Set to 2 to enable passthrough stats
> +		in addition to block IO.

Jens' reply suggest he likes this interface, but I have to say I
already hated it with a passion for the merges - overloading a
previously boolean file with a numberic value is not exactly an
intuitive interface.  Is a new sysfs file for this really a problem?

> +	if (!bio)
> +		return false;
> +	if (!bio->bi_bdev)
> +		return false;
> +	if (blk_rq_bytes(req) & (bdev_logical_block_size(bio->bi_bdev) - 1))
> +		return false;

I understand why all these conditions are there, because basically
they'd break the current code to collect stats.  But I think this needs
a comment explaining why they are there, and why the statistics are
still useful without the requests matching them.

> +	lim = queue_limits_start_update(disk->queue);
> +	if (!ios)
> +		lim.features &= ~(BLK_FEAT_IO_STAT | BLK_FEAT_PASSTHROUGH_STAT);
> +	else if (ios == 2)
> +		lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_PASSTHROUGH_STAT;
> +	else if (ios == 1) {
> +		lim.features |= BLK_FEAT_IO_STAT;
> +		lim.features &= ~BLK_FEAT_PASSTHROUGH_STAT;

BLK_FEAT_IO_STAT is in ->features because drivers need to opt into it,
but BLK_FEAT_PASSTHROUGH_STAT is purely a flag triggered by sysfs and
should go into ->flags.
Jens Axboe Oct. 3, 2024, 1:20 p.m. UTC | #3
On 10/3/24 7:00 AM, Christoph Hellwig wrote:
> On Wed, Oct 02, 2024 at 02:07:44PM -0700, Keith Busch wrote:
>> +		accounting of the disk. Set to 0 to disable all stats. Set to 1
>> +		to enable block IO stats. Set to 2 to enable passthrough stats
>> +		in addition to block IO.
> 
> Jens' reply suggest he likes this interface, but I have to say I
> already hated it with a passion for the merges - overloading a
> previously boolean file with a numberic value is not exactly an
> intuitive interface.  Is a new sysfs file for this really a problem?

We can do a new file as well, like iostats_passthrough. Don't really
feel strongly about that part.
diff mbox series

Patch

diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index cea8856f798dd..bcf6ebedc9199 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -422,7 +422,9 @@  Date:		January 2009
 Contact:	linux-block@vger.kernel.org
 Description:
 		[RW] This file is used to control (on/off) the iostats
-		accounting of the disk.
+		accounting of the disk. Set to 0 to disable all stats. Set to 1
+		to enable block IO stats. Set to 2 to enable passthrough stats
+		in addition to block IO.
 
 
 What:		/sys/block/<disk>/queue/logical_block_size
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ee6cde39e52b7..55809f4bd09e3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -994,13 +994,28 @@  static inline void blk_account_io_done(struct request *req, u64 now)
 	}
 }
 
+static inline bool blk_rq_passthrough_stats(struct request *req)
+{
+	struct bio *bio = req->bio;
+
+	if (!blk_queue_passthrough_stat(req->q))
+		return false;
+	if (!bio)
+		return false;
+	if (!bio->bi_bdev)
+		return false;
+	if (blk_rq_bytes(req) & (bdev_logical_block_size(bio->bi_bdev) - 1))
+		return false;
+	return true;
+}
+
 static inline void blk_account_io_start(struct request *req)
 {
 	trace_block_io_start(req);
 
 	if (!blk_queue_io_stat(req->q))
 		return;
-	if (blk_rq_is_passthrough(req))
+	if (blk_rq_is_passthrough(req) && !blk_rq_passthrough_stats(req))
 		return;
 
 	req->rq_flags |= RQF_IO_STAT;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e85941bec857b..99f438beb6c67 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -246,7 +246,6 @@  static ssize_t queue_##_name##_store(struct gendisk *disk,		\
 
 QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL)
 QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM)
-QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT)
 QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES);
 
 #define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature)			\
@@ -272,6 +271,40 @@  static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
 	return queue_var_show(disk_nr_zones(disk), page);
 }
 
+static ssize_t queue_iostats_show(struct gendisk *disk, char *page)
+{
+	return queue_var_show((bool)blk_queue_passthrough_stat(disk->queue) +
+			      (bool)blk_queue_io_stat(disk->queue), page);
+}
+
+static ssize_t queue_iostats_store(struct gendisk *disk, const char *page,
+				   size_t count)
+{
+	struct queue_limits lim;
+	unsigned long ios;
+	ssize_t ret;
+
+	ret = queue_var_store(&ios, page, count);
+	if (ret < 0)
+		return ret;
+
+	lim = queue_limits_start_update(disk->queue);
+	if (!ios)
+		lim.features &= ~(BLK_FEAT_IO_STAT | BLK_FEAT_PASSTHROUGH_STAT);
+	else if (ios == 2)
+		lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_PASSTHROUGH_STAT;
+	else if (ios == 1) {
+		lim.features |= BLK_FEAT_IO_STAT;
+		lim.features &= ~BLK_FEAT_PASSTHROUGH_STAT;
+	}
+
+	ret = queue_limits_commit_update(disk->queue, &lim);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
 static ssize_t queue_nomerges_show(struct gendisk *disk, char *page)
 {
 	return queue_var_show((blk_queue_nomerges(disk->queue) << 1) |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 50c3b959da281..9a66334a6e356 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -332,6 +332,8 @@  typedef unsigned int __bitwise blk_features_t;
 #define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
 	((__force blk_features_t)(1u << 15))
 
+#define BLK_FEAT_PASSTHROUGH_STAT	((__force blk_features_t)(1u << 16))
+
 /*
  * Flags automatically inherited when stacking limits.
  */
@@ -617,6 +619,8 @@  void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
 	test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
 #define blk_queue_nonrot(q)	(!((q)->limits.features & BLK_FEAT_ROTATIONAL))
 #define blk_queue_io_stat(q)	((q)->limits.features & BLK_FEAT_IO_STAT)
+#define blk_queue_passthrough_stat(q)	\
+	((q)->limits.features & BLK_FEAT_PASSTHROUGH_STAT)
 #define blk_queue_dax(q)	((q)->limits.features & BLK_FEAT_DAX)
 #define blk_queue_pci_p2pdma(q)	((q)->limits.features & BLK_FEAT_PCI_P2PDMA)
 #ifdef CONFIG_BLK_RQ_ALLOC_TIME