diff mbox series

[RFC,V2,2/4] block: add a statistic table for io sector

Message ID 20200713211321.21123-3-guoqing.jiang@cloud.ionos.com
State New, archived
Headers show
Series block: add two statistic tables | expand

Commit Message

Guoqing Jiang July 13, 2020, 9:13 p.m. UTC
With the sector table, so we can know the distribution of
different IO size from upper layer, which means we could
have the opportunity to tune the performance based on the
mostly issued IOs.

Signed-off-by: Florian-Ewald Mueller <florian-ewald.mueller@cloud.ionos.com>
Signed-off-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
---
 block/Kconfig             |  3 ++-
 block/blk-core.c          | 16 ++++++++++++++++
 block/genhd.c             | 21 +++++++++++++++++++++
 include/linux/part_stat.h |  3 ++-
 4 files changed, 41 insertions(+), 2 deletions(-)

Comments

Aleksei Marov Aug. 11, 2020, 3:04 p.m. UTC | #1
Is it possible to collect the very same stats (distribution of sizes and
distribution of lat) without having static maps in kernel but with eBPF tracing?
Like using https://github.com/iovisor/bcc/tree/master/tools/
* biolatency for lat distribution
* bitesize for size distribution
Please, have a look at these and similar tools (biolatpcts, biosnoop). Check the
examples they have 
https://github.com/iovisor/bcc/blob/master/tools/bitesize_example.txt
https://github.com/iovisor/bcc/blob/master/tools/biolatency_example.txt
Let me know what is the difference comparing to your stats.

Best Regards
Aleksei Marov

On Mon, 2020-07-13 at 23:13 +0200, Guoqing Jiang wrote:
> With the sector table, so we can know the distribution of
> different IO size from upper layer, which means we could
> have the opportunity to tune the performance based on the
> mostly issued IOs.
> 
> Signed-off-by: Florian-Ewald Mueller <florian-ewald.mueller@cloud.ionos.com>
> Signed-off-by: Guoqing Jiang <guoqing.jiang@cloud.ionos.com>
> ---
>  block/Kconfig             |  3 ++-
>  block/blk-core.c          | 16 ++++++++++++++++
>  block/genhd.c             | 21 +++++++++++++++++++++
>  include/linux/part_stat.h |  3 ++-
>  4 files changed, 41 insertions(+), 2 deletions(-)
> 
> diff --git a/block/Kconfig b/block/Kconfig
> index 360f63111e2d..c9b9f99152d8 100644
> --- a/block/Kconfig
> +++ b/block/Kconfig
> @@ -180,7 +180,8 @@ config BLK_ADDITIONAL_DISKSTAT
>  	bool "Block layer additional diskstat"
>  	default n
>  	help
> -	Enabling this option adds io latency statistics for each block device.
> +	Enabling this option adds io latency and io size statistics for each
> +	block device.
>  
>  	If unsure, say N.
>  
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 036eb04782de..b67aedfbcefc 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -1437,6 +1437,16 @@ static void blk_additional_latency(struct hd_struct
> *part, const int sgrp,
>  	part_stat_inc(part, latency_table[idx][sgrp]);
>  
>  }
> +
> +static void blk_additional_sector(struct hd_struct *part, const int sgrp,
> +				  unsigned int sectors)
> +{
> +	unsigned int KB = sectors / 2, idx;
> +
> +	idx = (KB > 0) ? ilog2(KB) : 0;
> +	idx = (idx > (ADD_STAT_NUM - 1)) ? (ADD_STAT_NUM - 1) : idx;
> +	part_stat_inc(part, size_table[idx][sgrp]);
> +}
>  #endif
>  
>  static void blk_account_io_completion(struct request *req, unsigned int
> bytes)
> @@ -1447,6 +1457,9 @@ static void blk_account_io_completion(struct request
> *req, unsigned int bytes)
>  
>  		part_stat_lock();
>  		part = req->part;
> +#ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT
> +		blk_additional_sector(part, sgrp, bytes >> SECTOR_SHIFT);
> +#endif
>  		part_stat_add(part, sectors[sgrp], bytes >> 9);
>  		part_stat_unlock();
>  	}
> @@ -1502,6 +1515,9 @@ unsigned long disk_start_io_acct(struct gendisk *disk,
> unsigned int sectors,
>  	update_io_ticks(part, now, false);
>  	part_stat_inc(part, ios[sgrp]);
>  	part_stat_add(part, sectors[sgrp], sectors);
> +#ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT
> +	blk_additional_sector(part, sgrp, sectors);
> +#endif
>  	part_stat_local_inc(part, in_flight[op_is_write(op)]);
>  	part_stat_unlock();
>  
> diff --git a/block/genhd.c b/block/genhd.c
> index f5d2f110fb34..cb9394521a8f 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -1441,6 +1441,26 @@ static ssize_t io_latency_show(struct device *dev,
> struct device_attribute *attr
>  
>  static struct device_attribute dev_attr_io_latency =
>  	__ATTR(io_latency, 0444, io_latency_show, NULL);
> +
> +static ssize_t io_size_show(struct device *dev, struct device_attribute
> *attr, char *buf)
> +{
> +	struct hd_struct *p = dev_to_part(dev);
> +	size_t count = 0;
> +	int i, sgrp;
> +
> +	for (i = 0; i < ADD_STAT_NUM; i++) {
> +		count += scnprintf(buf + count, PAGE_SIZE - count, "%5d KB: ", 1
> << i);
> +		for (sgrp = 0; sgrp < NR_STAT_GROUPS; sgrp++)
> +			count += scnprintf(buf + count, PAGE_SIZE - count, "%lu
> ",
> +					   part_stat_read(p,
> size_table[i][sgrp]));
> +		count += scnprintf(buf + count, PAGE_SIZE - count, "\n");
> +	}
> +
> +	return count;
> +}
> +
> +static struct device_attribute dev_attr_io_size =
> +	__ATTR(io_size, 0444, io_size_show, NULL);
>  #endif
>  
>  static struct attribute *disk_attrs[] = {
> @@ -1464,6 +1484,7 @@ static struct attribute *disk_attrs[] = {
>  #endif
>  #ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT
>  	&dev_attr_io_latency.attr,
> +	&dev_attr_io_size.attr,
>  #endif
>  	NULL
>  };
> diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
> index fe3def8c69d7..2b056cd70d1f 100644
> --- a/include/linux/part_stat.h
> +++ b/include/linux/part_stat.h
> @@ -11,10 +11,11 @@ struct disk_stats {
>  	unsigned long merges[NR_STAT_GROUPS];
>  #ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT
>  /*
> - * We measure latency (ms) for 1, 2, ..., 1024 and >=1024.
> + * We measure latency (ms) and size (sector) for 1, 2, ..., 1024 and >=1024.
>   */
>  #define ADD_STAT_NUM	12
>  	unsigned long latency_table[ADD_STAT_NUM][NR_STAT_GROUPS];
> +	unsigned long size_table[ADD_STAT_NUM][NR_STAT_GROUPS];
>  #endif
>  	unsigned long io_ticks;
>  	local_t in_flight[2];
Guoqing Jiang Aug. 11, 2020, 3:48 p.m. UTC | #2
On 8/11/20 5:04 PM, Aleksei Marov wrote:
> Is it possible to collect the very same stats (distribution of sizes and
> distribution of lat) without having static maps in kernel but with eBPF tracing?
> Like using https://github.com/iovisor/bcc/tree/master/tools/
> * biolatency for lat distribution
> * bitesize for size distribution
> Please, have a look at these and similar tools (biolatpcts, biosnoop). Check the
> examples they have
> https://github.com/iovisor/bcc/blob/master/tools/bitesize_example.txt
> https://github.com/iovisor/bcc/blob/master/tools/biolatency_example.txt
> Let me know what is the difference comparing to your stats.

The difference is about the cost, please see the link.

https://marc.info/?l=linux-block&m=159458634517068&w=2

Thanks,
Guoqing
diff mbox series

Patch

diff --git a/block/Kconfig b/block/Kconfig
index 360f63111e2d..c9b9f99152d8 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -180,7 +180,8 @@  config BLK_ADDITIONAL_DISKSTAT
 	bool "Block layer additional diskstat"
 	default n
 	help
-	Enabling this option adds io latency statistics for each block device.
+	Enabling this option adds io latency and io size statistics for each
+	block device.
 
 	If unsure, say N.
 
diff --git a/block/blk-core.c b/block/blk-core.c
index 036eb04782de..b67aedfbcefc 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1437,6 +1437,16 @@  static void blk_additional_latency(struct hd_struct *part, const int sgrp,
 	part_stat_inc(part, latency_table[idx][sgrp]);
 
 }
+
+static void blk_additional_sector(struct hd_struct *part, const int sgrp,
+				  unsigned int sectors)
+{
+	unsigned int KB = sectors / 2, idx;
+
+	idx = (KB > 0) ? ilog2(KB) : 0;
+	idx = (idx > (ADD_STAT_NUM - 1)) ? (ADD_STAT_NUM - 1) : idx;
+	part_stat_inc(part, size_table[idx][sgrp]);
+}
 #endif
 
 static void blk_account_io_completion(struct request *req, unsigned int bytes)
@@ -1447,6 +1457,9 @@  static void blk_account_io_completion(struct request *req, unsigned int bytes)
 
 		part_stat_lock();
 		part = req->part;
+#ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT
+		blk_additional_sector(part, sgrp, bytes >> SECTOR_SHIFT);
+#endif
 		part_stat_add(part, sectors[sgrp], bytes >> 9);
 		part_stat_unlock();
 	}
@@ -1502,6 +1515,9 @@  unsigned long disk_start_io_acct(struct gendisk *disk, unsigned int sectors,
 	update_io_ticks(part, now, false);
 	part_stat_inc(part, ios[sgrp]);
 	part_stat_add(part, sectors[sgrp], sectors);
+#ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT
+	blk_additional_sector(part, sgrp, sectors);
+#endif
 	part_stat_local_inc(part, in_flight[op_is_write(op)]);
 	part_stat_unlock();
 
diff --git a/block/genhd.c b/block/genhd.c
index f5d2f110fb34..cb9394521a8f 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1441,6 +1441,26 @@  static ssize_t io_latency_show(struct device *dev, struct device_attribute *attr
 
 static struct device_attribute dev_attr_io_latency =
 	__ATTR(io_latency, 0444, io_latency_show, NULL);
+
+static ssize_t io_size_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct hd_struct *p = dev_to_part(dev);
+	size_t count = 0;
+	int i, sgrp;
+
+	for (i = 0; i < ADD_STAT_NUM; i++) {
+		count += scnprintf(buf + count, PAGE_SIZE - count, "%5d KB: ", 1 << i);
+		for (sgrp = 0; sgrp < NR_STAT_GROUPS; sgrp++)
+			count += scnprintf(buf + count, PAGE_SIZE - count, "%lu ",
+					   part_stat_read(p, size_table[i][sgrp]));
+		count += scnprintf(buf + count, PAGE_SIZE - count, "\n");
+	}
+
+	return count;
+}
+
+static struct device_attribute dev_attr_io_size =
+	__ATTR(io_size, 0444, io_size_show, NULL);
 #endif
 
 static struct attribute *disk_attrs[] = {
@@ -1464,6 +1484,7 @@  static struct attribute *disk_attrs[] = {
 #endif
 #ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT
 	&dev_attr_io_latency.attr,
+	&dev_attr_io_size.attr,
 #endif
 	NULL
 };
diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h
index fe3def8c69d7..2b056cd70d1f 100644
--- a/include/linux/part_stat.h
+++ b/include/linux/part_stat.h
@@ -11,10 +11,11 @@  struct disk_stats {
 	unsigned long merges[NR_STAT_GROUPS];
 #ifdef CONFIG_BLK_ADDITIONAL_DISKSTAT
 /*
- * We measure latency (ms) for 1, 2, ..., 1024 and >=1024.
+ * We measure latency (ms) and size (sector) for 1, 2, ..., 1024 and >=1024.
  */
 #define ADD_STAT_NUM	12
 	unsigned long latency_table[ADD_STAT_NUM][NR_STAT_GROUPS];
+	unsigned long size_table[ADD_STAT_NUM][NR_STAT_GROUPS];
 #endif
 	unsigned long io_ticks;
 	local_t in_flight[2];