@@ -358,8 +358,12 @@ What: /sys/block/<disk>/queue/iostats
Date: January 2009
Contact: linux-block@vger.kernel.org
Description:
- [RW] This file is used to control (on/off) the iostats
- accounting of the disk.
+ [RW] This file is used to control the iostats accounting of the
+ disk. If this value is 0, iostats accounting is disabled; If
+ this value is 1, iostats accounting is enabled, but io_ticks is
+ accounted by sampling and the result is not accurate; If this
+ value is 2, iostats accounting is enabled and io_ticks is
+ accounted precisely, but there will be slightly overhead.
What: /sys/block/<disk>/queue/logical_block_size
@@ -956,9 +956,13 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
unsigned long stamp;
again:
stamp = READ_ONCE(part->bd_stamp);
- if (unlikely(time_after(now, stamp))) {
- if (likely(try_cmpxchg(&part->bd_stamp, &stamp, now)))
- __part_stat_add(part, io_ticks, end ? now - stamp : 1);
+ if (unlikely(time_after(now, stamp)) &&
+ likely(try_cmpxchg(&part->bd_stamp, &stamp, now))) {
+ if (end || (blk_queue_precise_io_stat(part->bd_queue) &&
+ part_in_flight(part)))
+ __part_stat_add(part, io_ticks, now - stamp);
+ else
+ __part_stat_add(part, io_ticks, 1);
}
if (part->bd_partno) {
part = bdev_whole(part);
@@ -779,6 +779,9 @@ static void blk_account_io_merge_request(struct request *req)
if (blk_do_io_stat(req)) {
part_stat_lock();
part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
+ if (req->rq_flags & RQF_PRECISE_IO_STAT)
+ part_stat_local_dec(req->part,
+ in_flight[op_is_write(req_op(req))]);
part_stat_unlock();
}
}
@@ -360,8 +360,11 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
if (data->flags & BLK_MQ_REQ_PM)
data->rq_flags |= RQF_PM;
- if (blk_queue_io_stat(q))
+ if (blk_queue_io_stat(q)) {
data->rq_flags |= RQF_IO_STAT;
+ if (blk_queue_precise_io_stat(q))
+ data->rq_flags |= RQF_PRECISE_IO_STAT;
+ }
rq->rq_flags = data->rq_flags;
if (data->rq_flags & RQF_SCHED_TAGS) {
@@ -999,6 +1002,9 @@ static inline void blk_account_io_done(struct request *req, u64 now)
update_io_ticks(req->part, jiffies, true);
part_stat_inc(req->part, ios[sgrp]);
part_stat_add(req->part, nsecs[sgrp], now - req->start_time_ns);
+ if (req->rq_flags & RQF_PRECISE_IO_STAT)
+ part_stat_local_dec(req->part,
+ in_flight[op_is_write(req_op(req))]);
part_stat_unlock();
}
}
@@ -1006,7 +1012,6 @@ static inline void blk_account_io_done(struct request *req, u64 now)
static inline void blk_account_io_start(struct request *req)
{
trace_block_io_start(req);
-
if (blk_do_io_stat(req)) {
/*
* All non-passthrough requests are created from a bio with one
@@ -1021,6 +1026,9 @@ static inline void blk_account_io_start(struct request *req)
part_stat_lock();
update_io_ticks(req->part, jiffies, false);
+ if (req->rq_flags & RQF_PRECISE_IO_STAT)
+ part_stat_local_inc(req->part,
+ in_flight[op_is_write(req_op(req))]);
part_stat_unlock();
}
}
@@ -303,7 +303,6 @@ queue_##name##_store(struct request_queue *q, const char *page, size_t count) \
QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1);
QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0);
-QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0);
QUEUE_SYSFS_BIT_FNS(stable_writes, STABLE_WRITES, 0);
#undef QUEUE_SYSFS_BIT_FNS
@@ -468,6 +467,45 @@ static ssize_t queue_dax_show(struct request_queue *q, char *page)
return queue_var_show(blk_queue_dax(q), page);
}
+static ssize_t queue_iostats_show(struct request_queue *q, char *page)
+{
+ int val = 0;
+
+ if (blk_queue_io_stat(q))
+ val = blk_queue_precise_io_stat(q) ? 2 : 1;
+
+ return sprintf(page, "%u\n", val);
+}
+
+static ssize_t
+queue_iostats_store(struct request_queue *q, const char *page, size_t count)
+{
+ unsigned long nr;
+ int ret = queue_var_store(&nr, page, count);
+
+ if (ret < 0)
+ return ret;
+
+ switch (nr) {
+ case 0:
+ blk_queue_flag_clear(QUEUE_FLAG_IO_STAT, q);
+ blk_queue_flag_clear(QUEUE_FLAG_PRECISE_IO_STAT, q);
+ break;
+ case 1:
+ blk_queue_flag_set(QUEUE_FLAG_IO_STAT, q);
+ blk_queue_flag_clear(QUEUE_FLAG_PRECISE_IO_STAT, q);
+ break;
+ case 2:
+ blk_queue_flag_set(QUEUE_FLAG_IO_STAT, q);
+ blk_queue_flag_set(QUEUE_FLAG_PRECISE_IO_STAT, q);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return count;
+}
+
#define QUEUE_RO_ENTRY(_prefix, _name) \
static struct queue_sysfs_entry _prefix##_entry = { \
.attr = { .name = _name, .mode = 0444 }, \
@@ -489,6 +527,7 @@ QUEUE_RO_ENTRY(queue_max_segments, "max_segments");
QUEUE_RO_ENTRY(queue_max_integrity_segments, "max_integrity_segments");
QUEUE_RO_ENTRY(queue_max_segment_size, "max_segment_size");
QUEUE_RW_ENTRY(elv_iosched, "scheduler");
+QUEUE_RW_ENTRY(queue_iostats, "iostats");
QUEUE_RO_ENTRY(queue_logical_block_size, "logical_block_size");
QUEUE_RO_ENTRY(queue_physical_block_size, "physical_block_size");
@@ -534,7 +573,6 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = {
};
QUEUE_RW_ENTRY(queue_nonrot, "rotational");
-QUEUE_RW_ENTRY(queue_iostats, "iostats");
QUEUE_RW_ENTRY(queue_random, "add_random");
QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
@@ -344,6 +344,7 @@ static inline bool blk_do_io_stat(struct request *rq)
}
void update_io_ticks(struct block_device *part, unsigned long now, bool end);
+unsigned int part_in_flight(struct block_device *part);
static inline void req_set_nomerge(struct request_queue *q, struct request *req)
{
@@ -118,7 +118,7 @@ static void part_stat_read_all(struct block_device *part,
}
}
-static unsigned int part_in_flight(struct block_device *part)
+unsigned int part_in_flight(struct block_device *part)
{
unsigned int inflight = 0;
int cpu;
@@ -44,6 +44,7 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_QUIET ((__force req_flags_t)(1 << 11))
/* account into disk and partition IO statistics */
#define RQF_IO_STAT ((__force req_flags_t)(1 << 13))
+#define RQF_PRECISE_IO_STAT ((__force req_flags_t)(1 << 14))
/* runtime pm request */
#define RQF_PM ((__force req_flags_t)(1 << 15))
/* on IO scheduler merge hash */
@@ -517,6 +517,7 @@ struct request_queue {
#define QUEUE_FLAG_NONROT 6 /* non-rotational device (SSD) */
#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
#define QUEUE_FLAG_IO_STAT 7 /* do disk/partitions IO accounting */
+#define QUEUE_FLAG_PRECISE_IO_STAT 8 /* do disk/partitions IO accounting precisely */
#define QUEUE_FLAG_NOXMERGES 9 /* No extended merges */
#define QUEUE_FLAG_ADD_RANDOM 10 /* Contributes to random pool */
#define QUEUE_FLAG_SYNCHRONOUS 11 /* always completes in submit context */
@@ -557,6 +558,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_stable_writes(q) \
test_bit(QUEUE_FLAG_STABLE_WRITES, &(q)->queue_flags)
#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
+#define blk_queue_precise_io_stat(q) \
+ test_bit(QUEUE_FLAG_PRECISE_IO_STAT, &(q)->queue_flags)
#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
#define blk_queue_zone_resetall(q) \
test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags)