@@ -852,6 +852,10 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, struct bio *bio);
int blk_init_allocated_queue(struct request_queue *q)
{
+ q->stats = blk_alloc_queue_stats();
+ if (!q->stats)
+ return -ENOMEM;
+
q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, q->cmd_size);
if (!q->fq)
return -ENOMEM;
@@ -2692,7 +2696,7 @@ void blk_finish_request(struct request *req, int error)
struct request_queue *q = req->q;
if (req->rq_flags & RQF_STATS)
- blk_stat_add(&q->rq_stats[rq_data_dir(req)], req);
+ blk_stat_add(req);
if (req->rq_flags & RQF_QUEUED)
blk_queue_end_tag(q, req);
@@ -39,6 +39,10 @@
static DEFINE_MUTEX(all_q_mutex);
static LIST_HEAD(all_q_list);
+static void blk_mq_poll_stats_start(struct request_queue *q);
+static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb,
+ struct blk_stats *stats);
+
/*
* Check if any of the ctx's have pending work in this hardware queue
*/
@@ -432,15 +436,8 @@ static void blk_mq_ipi_complete_request(struct request *rq)
static void blk_mq_stat_add(struct request *rq)
{
if (rq->rq_flags & RQF_STATS) {
- /*
- * We could rq->mq_ctx here, but there's less of a risk
- * of races if we have the completion event add the stats
- * to the local software queue.
- */
- struct blk_mq_ctx *ctx;
-
- ctx = __blk_mq_get_ctx(rq->q, raw_smp_processor_id());
- blk_stat_add(&ctx->stat[rq_data_dir(rq)], rq);
+ blk_mq_poll_stats_start(rq->q);
+ blk_stat_add(rq);
}
}
@@ -2039,8 +2036,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
spin_lock_init(&__ctx->lock);
INIT_LIST_HEAD(&__ctx->rq_list);
__ctx->queue = q;
- blk_stat_init(&__ctx->stat[BLK_STAT_READ]);
- blk_stat_init(&__ctx->stat[BLK_STAT_WRITE]);
/* If the cpu isn't online, the cpu is mapped to first hctx */
if (!cpu_online(i))
@@ -2338,6 +2333,14 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
/* mark the queue as mq asap */
q->mq_ops = set->ops;
+ q->stats = blk_alloc_queue_stats();
+ if (!q->stats)
+ goto err_exit;
+
+ q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn, q);
+ if (!q->poll_cb)
+ goto err_exit;
+
q->queue_ctx = alloc_percpu(struct blk_mq_ctx);
if (!q->queue_ctx)
goto err_exit;
@@ -2739,28 +2742,54 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues)
}
EXPORT_SYMBOL_GPL(blk_mq_update_nr_hw_queues);
+/* Enable polling stats and return whether they were already enabled. */
+static bool blk_poll_stats_enable(struct request_queue *q)
+{
+ if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
+ test_and_set_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
+ return true;
+ blk_stat_add_callback(q, q->poll_cb);
+ return false;
+}
+
+static void blk_mq_poll_stats_start(struct request_queue *q)
+{
+ /*
+ * We don't arm the callback if polling stats are not enabled or the
+ * callback has already been armed.
+ */
+ if (!test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags) ||
+ timer_pending(&q->poll_cb->timer))
+ return;
+
+ blk_stat_arm_callback(q->poll_cb, jiffies + msecs_to_jiffies(100));
+}
+
+static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb,
+ struct blk_stats *stats)
+{
+ struct request_queue *q = cb->data;
+
+ if (stats->read.nr_samples)
+ q->poll_stats.read = stats->read;
+ if (stats->write.nr_samples)
+ q->poll_stats.write = stats->write;
+}
+
static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
struct blk_mq_hw_ctx *hctx,
struct request *rq)
{
- struct blk_stats stats;
unsigned long ret = 0;
/*
* If stats collection isn't on, don't sleep but turn it on for
* future users
*/
- if (!blk_stat_enable(q))
+ if (!blk_poll_stats_enable(q))
return 0;
/*
- * We don't have to do this once per IO, should optimize this
- * to just use the current window of stats until it changes
- */
- memset(&stats, 0, sizeof(stats));
- blk_hctx_stat_get(hctx, &stats);
-
- /*
* As an optimistic guess, use half of the mean service time
* for this type of request. We can (and should) make this smarter.
* For instance, if the completion latencies are tight, we can
@@ -2768,10 +2797,10 @@ static unsigned long blk_mq_poll_nsecs(struct request_queue *q,
* important on devices where the completion latencies are longer
* than ~10 usec.
*/
- if (req_op(rq) == REQ_OP_READ && stats.read.nr_samples)
- ret = (stats.read.mean + 1) / 2;
- else if (req_op(rq) == REQ_OP_WRITE && stats.write.nr_samples)
- ret = (stats.write.mean + 1) / 2;
+ if (req_op(rq) == REQ_OP_READ && q->poll_stats.read.nr_samples)
+ ret = (q->poll_stats.read.mean + 1) / 2;
+ else if (req_op(rq) == REQ_OP_WRITE && q->poll_stats.write.nr_samples)
+ ret = (q->poll_stats.write.mean + 1) / 2;
return ret;
}
@@ -20,7 +20,6 @@ struct blk_mq_ctx {
/* incremented at completion time */
unsigned long ____cacheline_aligned_in_smp rq_completed[2];
- struct blk_rq_stat stat[2];
struct request_queue *queue;
struct kobject kobj;
@@ -4,11 +4,24 @@
* Copyright (C) 2016 Jens Axboe
*/
#include <linux/kernel.h>
+#include <linux/rculist.h>
#include <linux/blk-mq.h>
#include "blk-stat.h"
#include "blk-mq.h"
+struct blk_queue_stats {
+ struct list_head callbacks;
+ spinlock_t lock;
+};
+
+static void blk_stat_init(struct blk_rq_stat *stat)
+{
+ stat->min = -1ULL;
+ stat->max = stat->nr_samples = stat->mean = 0;
+ stat->batch = stat->nr_batch = 0;
+}
+
static void blk_stat_flush_batch(struct blk_rq_stat *stat)
{
const s32 nr_batch = READ_ONCE(stat->nr_batch);
@@ -48,209 +61,164 @@ static void blk_stat_sum(struct blk_rq_stat *dst, struct blk_rq_stat *src)
dst->nr_samples += src->nr_samples;
}
-static void blk_mq_stat_get(struct request_queue *q, struct blk_stats *stats)
+static void __blk_stat_add(struct blk_rq_stat *stat, u64 value)
{
- struct blk_mq_hw_ctx *hctx;
- struct blk_mq_ctx *ctx;
- uint64_t latest = 0;
- int i, j, nr;
-
- blk_stat_init(&stats->read);
- blk_stat_init(&stats->write);
-
- nr = 0;
- do {
- uint64_t newest = 0;
-
- queue_for_each_hw_ctx(q, hctx, i) {
- hctx_for_each_ctx(hctx, ctx, j) {
- blk_stat_flush_batch(&ctx->stat[BLK_STAT_READ]);
- blk_stat_flush_batch(&ctx->stat[BLK_STAT_WRITE]);
-
- if (!ctx->stat[BLK_STAT_READ].nr_samples &&
- !ctx->stat[BLK_STAT_WRITE].nr_samples)
- continue;
- if (ctx->stat[BLK_STAT_READ].time > newest)
- newest = ctx->stat[BLK_STAT_READ].time;
- if (ctx->stat[BLK_STAT_WRITE].time > newest)
- newest = ctx->stat[BLK_STAT_WRITE].time;
- }
- }
+ stat->min = min(stat->min, value);
+ stat->max = max(stat->max, value);
- /*
- * No samples
- */
- if (!newest)
- break;
-
- if (newest > latest)
- latest = newest;
-
- queue_for_each_hw_ctx(q, hctx, i) {
- hctx_for_each_ctx(hctx, ctx, j) {
- if (ctx->stat[BLK_STAT_READ].time == newest) {
- blk_stat_sum(&stats->read,
- &ctx->stat[BLK_STAT_READ]);
- nr++;
- }
- if (ctx->stat[BLK_STAT_WRITE].time == newest) {
- blk_stat_sum(&stats->write,
- &ctx->stat[BLK_STAT_WRITE]);
- nr++;
- }
- }
- }
- /*
- * If we race on finding an entry, just loop back again.
- * Should be very rare.
- */
- } while (!nr);
+ if (stat->batch + value < stat->batch ||
+ stat->nr_batch + 1 == BLK_RQ_STAT_BATCH)
+ blk_stat_flush_batch(stat);
- stats->read.time = stats->write.time = latest;
+ stat->batch += value;
+ stat->nr_batch++;
}
-void blk_queue_stat_get(struct request_queue *q, struct blk_stats *stats)
+static struct blk_rq_stat *blk_stat_ddir(struct blk_stats __percpu *stats,
+ int ddir)
{
- if (q->mq_ops)
- blk_mq_stat_get(q, stats);
- else {
- blk_stat_flush_batch(&q->rq_stats[BLK_STAT_READ]);
- blk_stat_flush_batch(&q->rq_stats[BLK_STAT_WRITE]);
- memcpy(&stats->read, &q->rq_stats[BLK_STAT_READ],
- sizeof(struct blk_rq_stat));
- memcpy(&stats->write, &q->rq_stats[BLK_STAT_WRITE],
- sizeof(struct blk_rq_stat));
- }
+ if (ddir == READ)
+ return &this_cpu_ptr(stats)->read;
+ else
+ return &this_cpu_ptr(stats)->write;
}
-void blk_hctx_stat_get(struct blk_mq_hw_ctx *hctx, struct blk_stats *stats)
+void blk_stat_add(struct request *rq)
{
- struct blk_mq_ctx *ctx;
- unsigned int i, nr;
-
- nr = 0;
- do {
- uint64_t newest = 0;
-
- hctx_for_each_ctx(hctx, ctx, i) {
- blk_stat_flush_batch(&ctx->stat[BLK_STAT_READ]);
- blk_stat_flush_batch(&ctx->stat[BLK_STAT_WRITE]);
+ struct request_queue *q = rq->q;
+ struct blk_stat_callback *cb;
+ struct blk_rq_stat *stat;
+ int ddir = rq_data_dir(rq);
+ s64 now, value;
- if (!ctx->stat[BLK_STAT_READ].nr_samples &&
- !ctx->stat[BLK_STAT_WRITE].nr_samples)
- continue;
+ now = __blk_stat_time(ktime_to_ns(ktime_get()));
+ if (now < blk_stat_time(&rq->issue_stat))
+ return;
- if (ctx->stat[BLK_STAT_READ].time > newest)
- newest = ctx->stat[BLK_STAT_READ].time;
- if (ctx->stat[BLK_STAT_WRITE].time > newest)
- newest = ctx->stat[BLK_STAT_WRITE].time;
- }
+ value = now - blk_stat_time(&rq->issue_stat);
- if (!newest)
- break;
-
- hctx_for_each_ctx(hctx, ctx, i) {
- if (ctx->stat[BLK_STAT_READ].time == newest) {
- blk_stat_sum(&stats->read,
- &ctx->stat[BLK_STAT_READ]);
- nr++;
- }
- if (ctx->stat[BLK_STAT_WRITE].time == newest) {
- blk_stat_sum(&stats->write,
- &ctx->stat[BLK_STAT_WRITE]);
- nr++;
- }
+ rcu_read_lock();
+ list_for_each_entry_rcu(cb, &q->stats->callbacks, list) {
+ if (timer_pending(&cb->timer)) {
+ stat = blk_stat_ddir(cb->stats, ddir);
+ __blk_stat_add(stat, value);
}
- /*
- * If we race on finding an entry, just loop back again.
- * Should be very rare, as the window is only updated
- * occasionally
- */
- } while (!nr);
+ }
+ rcu_read_unlock();
}
-static void __blk_stat_init(struct blk_rq_stat *stat, s64 time_now)
+static void blk_stat_timer_fn(unsigned long data)
{
- stat->min = -1ULL;
- stat->max = stat->nr_samples = stat->mean = 0;
- stat->batch = stat->nr_batch = 0;
- stat->time = time_now & BLK_STAT_NSEC_MASK;
-}
+ struct blk_stat_callback *cb = (void *)data;
+ struct blk_stats stats;
+ int i;
-void blk_stat_init(struct blk_rq_stat *stat)
-{
- __blk_stat_init(stat, ktime_to_ns(ktime_get()));
-}
+ blk_stat_init(&stats.read);
+ blk_stat_init(&stats.write);
-static bool __blk_stat_is_current(struct blk_rq_stat *stat, s64 now)
-{
- return (now & BLK_STAT_NSEC_MASK) == (stat->time & BLK_STAT_NSEC_MASK);
+ for_each_online_cpu(i) {
+ struct blk_stats *cpu_stats;
+
+ cpu_stats = per_cpu_ptr(cb->stats, i);
+ blk_stat_sum(&stats.read, &cpu_stats->read);
+ blk_stat_sum(&stats.write, &cpu_stats->write);
+ blk_stat_init(&cpu_stats->read);
+ blk_stat_init(&cpu_stats->write);
+ }
+
+ cb->fn(cb, &stats);
}
-bool blk_stat_is_current(struct blk_rq_stat *stat)
+struct blk_stat_callback *
+blk_stat_alloc_callback(void (*fn)(struct blk_stat_callback *, struct blk_stats *),
+ void *data)
{
- return __blk_stat_is_current(stat, ktime_to_ns(ktime_get()));
+ struct blk_stat_callback *cb;
+
+ cb = kmalloc(sizeof(*cb), GFP_KERNEL);
+ if (!cb)
+ return NULL;
+
+ cb->stats = alloc_percpu(struct blk_stats);
+ if (!cb->stats) {
+ kfree(cb);
+ return NULL;
+ }
+
+ cb->fn = fn;
+ cb->data = data;
+ setup_timer(&cb->timer, blk_stat_timer_fn, (unsigned long)cb);
+
+ return cb;
}
+EXPORT_SYMBOL_GPL(blk_stat_alloc_callback);
-void blk_stat_add(struct blk_rq_stat *stat, struct request *rq)
+void blk_stat_add_callback(struct request_queue *q,
+ struct blk_stat_callback *cb)
{
- s64 now, value;
+ int i;
- now = __blk_stat_time(ktime_to_ns(ktime_get()));
- if (now < blk_stat_time(&rq->issue_stat))
- return;
+ for_each_possible_cpu(i) {
+ struct blk_stats *stats = per_cpu_ptr(cb->stats, i);
- if (!__blk_stat_is_current(stat, now))
- __blk_stat_init(stat, now);
+ blk_stat_init(&stats->read);
+ blk_stat_init(&stats->write);
+ }
- value = now - blk_stat_time(&rq->issue_stat);
- if (value > stat->max)
- stat->max = value;
- if (value < stat->min)
- stat->min = value;
+ spin_lock(&q->stats->lock);
+ list_add_tail_rcu(&cb->list, &q->stats->callbacks);
+ set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
+ spin_unlock(&q->stats->lock);
+}
+EXPORT_SYMBOL_GPL(blk_stat_add_callback);
- if (stat->batch + value < stat->batch ||
- stat->nr_batch + 1 == BLK_RQ_STAT_BATCH)
- blk_stat_flush_batch(stat);
+void blk_stat_remove_callback(struct request_queue *q,
+ struct blk_stat_callback *cb)
+{
+ spin_lock(&q->stats->lock);
+ list_del_rcu(&cb->list);
+ if (list_empty(&q->stats->callbacks))
+ clear_bit(QUEUE_FLAG_STATS, &q->queue_flags);
+ spin_unlock(&q->stats->lock);
- stat->batch += value;
- stat->nr_batch++;
+ del_timer_sync(&cb->timer);
}
+EXPORT_SYMBOL_GPL(blk_stat_remove_callback);
-void blk_stat_clear(struct request_queue *q)
+static void blk_stat_free_callback_rcu(struct rcu_head *head)
{
- if (q->mq_ops) {
- struct blk_mq_hw_ctx *hctx;
- struct blk_mq_ctx *ctx;
- int i, j;
-
- queue_for_each_hw_ctx(q, hctx, i) {
- hctx_for_each_ctx(hctx, ctx, j) {
- blk_stat_init(&ctx->stat[BLK_STAT_READ]);
- blk_stat_init(&ctx->stat[BLK_STAT_WRITE]);
- }
- }
- } else {
- blk_stat_init(&q->rq_stats[BLK_STAT_READ]);
- blk_stat_init(&q->rq_stats[BLK_STAT_WRITE]);
- }
+ struct blk_stat_callback *cb;
+
+ cb = container_of(head, struct blk_stat_callback, rcu);
+ free_percpu(cb->stats);
}
-void blk_stat_set_issue_time(struct blk_issue_stat *stat)
+void blk_stat_free_callback(struct blk_stat_callback *cb)
{
- stat->time = (stat->time & BLK_STAT_MASK) |
- (ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK);
+ call_rcu(&cb->rcu, blk_stat_free_callback_rcu);
}
+EXPORT_SYMBOL_GPL(blk_stat_free_callback);
-/*
- * Enable stat tracking, return whether it was enabled
- */
-bool blk_stat_enable(struct request_queue *q)
+struct blk_queue_stats *blk_alloc_queue_stats(void)
{
- if (!test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
- set_bit(QUEUE_FLAG_STATS, &q->queue_flags);
- return false;
- }
+ struct blk_queue_stats *stats;
+
+ stats = kmalloc(sizeof(*stats), GFP_KERNEL);
+ if (!stats)
+ return NULL;
+
+ INIT_LIST_HEAD(&stats->callbacks);
+ spin_lock_init(&stats->lock);
+
+ return stats;
+}
+
+void blk_free_queue_stats(struct blk_queue_stats *stats)
+{
+ if (!stats)
+ return;
+
+ WARN_ON(!list_empty(&stats->callbacks));
- return true;
+ kfree(stats);
}
@@ -1,11 +1,11 @@
#ifndef BLK_STAT_H
#define BLK_STAT_H
-/*
- * ~0.13s window as a power-of-2 (2^27 nsecs)
- */
-#define BLK_STAT_NSEC 134217728ULL
-#define BLK_STAT_NSEC_MASK ~(BLK_STAT_NSEC - 1)
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include <linux/ktime.h>
+#include <linux/rcupdate.h>
+#include <linux/timer.h>
/*
* Upper 3 bits can be used elsewhere
@@ -15,19 +15,27 @@
#define BLK_STAT_TIME_MASK ((1ULL << BLK_STAT_SHIFT) - 1)
#define BLK_STAT_MASK ~BLK_STAT_TIME_MASK
-enum {
- BLK_STAT_READ = 0,
- BLK_STAT_WRITE,
+#define BLK_RQ_STAT_BATCH 64
+
+struct blk_stat_callback {
+ struct list_head list;
+ struct timer_list timer;
+ struct blk_stats __percpu *stats;
+ void (*fn)(struct blk_stat_callback *, struct blk_stats *);
+ void *data;
+ struct rcu_head rcu;
};
-void blk_stat_add(struct blk_rq_stat *, struct request *);
-void blk_hctx_stat_get(struct blk_mq_hw_ctx *, struct blk_stats *);
-void blk_queue_stat_get(struct request_queue *, struct blk_stats *);
-void blk_stat_clear(struct request_queue *);
-void blk_stat_init(struct blk_rq_stat *);
-bool blk_stat_is_current(struct blk_rq_stat *);
-void blk_stat_set_issue_time(struct blk_issue_stat *);
-bool blk_stat_enable(struct request_queue *);
+struct blk_queue_stats *blk_alloc_queue_stats(void);
+void blk_free_queue_stats(struct blk_queue_stats *);
+
+void blk_stat_add(struct request *);
+
+static inline void blk_stat_set_issue_time(struct blk_issue_stat *stat)
+{
+ stat->time = ((stat->time & BLK_STAT_MASK) |
+ (ktime_to_ns(ktime_get()) & BLK_STAT_TIME_MASK));
+}
static inline u64 __blk_stat_time(u64 time)
{
@@ -39,4 +47,62 @@ static inline u64 blk_stat_time(struct blk_issue_stat *stat)
return __blk_stat_time(stat->time);
}
+/**
+ * blk_stat_alloc_callback() - Allocate a block statistics callback.
+ * @fn: Callback function.
+ * @data: Value for the @data field of the &struct blk_stat_callback.
+ *
+ * Return: &struct blk_stat_callback on success or NULL on ENOMEM.
+ */
+struct blk_stat_callback *
+blk_stat_alloc_callback(void (*fn)(struct blk_stat_callback *, struct blk_stats *),
+ void *data);
+
+/**
+ * blk_stat_add_callback() - Add a block statistics callback to be run on a
+ * request queue.
+ * @q: The request queue.
+ * @cb: The callback.
+ *
+ * Note that a single &struct blk_stat_callback can only be added to a single
+ * &struct request_queue.
+ */
+void blk_stat_add_callback(struct request_queue *q,
+ struct blk_stat_callback *cb);
+
+/**
+ * blk_stat_remove_callback() - Remove a block statistics callback from a
+ * request queue.
+ * @q: The request queue.
+ * @cb: The callback.
+ *
+ * When this returns, the callback is not running on any CPUs and will not be
+ * called again unless readded.
+ */
+void blk_stat_remove_callback(struct request_queue *q,
+ struct blk_stat_callback *cb);
+
+/**
+ * blk_stat_free_callback() - Free a block statistics callback.
+ * @cb: The callback.
+ *
+ * @cb may be NULL, in which case this does nothing. If it is not NULL, @cb must
+ * not be associated with a request queue. I.e., if it was previously added with
+ * blk_stat_add_callback(), it must also have been removed since then with
+ * blk_stat_remove_callback().
+ */
+void blk_stat_free_callback(struct blk_stat_callback *cb);
+
+/**
+ * blk_stat_arm_callback() - Set a block statistics callback to fire at a given
+ * time.
+ * @cb: The callback.
+ * @expires: The time (in jiffies) at which to run.
+ */
+static inline void blk_stat_arm_callback(struct blk_stat_callback *cb,
+ unsigned long expires)
+{
+ mod_timer(&cb->timer, expires);
+}
+
#endif
@@ -785,6 +785,9 @@ static void blk_release_queue(struct kobject *kobj)
container_of(kobj, struct request_queue, kobj);
wbt_exit(q);
+ if (test_bit(QUEUE_FLAG_POLL_STATS, &q->queue_flags))
+ blk_stat_remove_callback(q, q->poll_cb);
+ blk_stat_free_callback(q->poll_cb);
bdi_put(q->backing_dev_info);
blkcg_exit_queue(q);
@@ -793,6 +796,8 @@ static void blk_release_queue(struct kobject *kobj)
elevator_exit(q->elevator);
}
+ blk_free_queue_stats(q->stats);
+
blk_exit_rl(&q->root_rl);
if (q->queue_tags)
@@ -277,7 +277,7 @@ enum {
LAT_EXCEEDED,
};
-static int __latency_exceeded(struct rq_wb *rwb, struct blk_stats *stats)
+static int latency_exceeded(struct rq_wb *rwb, struct blk_stats *stats)
{
struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
u64 thislat;
@@ -308,8 +308,8 @@ static int __latency_exceeded(struct rq_wb *rwb, struct blk_stats *stats)
* waited or still has writes in flights, consider us doing
* just writes as well.
*/
- if ((stats->write.nr_samples && blk_stat_is_current(&stats->read)) ||
- wb_recent_wait(rwb) || wbt_inflight(rwb))
+ if (stats->write.nr_samples || wb_recent_wait(rwb) ||
+ wbt_inflight(rwb))
return LAT_UNKNOWN_WRITES;
return LAT_UNKNOWN;
}
@@ -329,14 +329,6 @@ static int __latency_exceeded(struct rq_wb *rwb, struct blk_stats *stats)
return LAT_OK;
}
-static int latency_exceeded(struct rq_wb *rwb)
-{
- struct blk_stats stats;
-
- blk_queue_stat_get(rwb->queue, &stats);
- return __latency_exceeded(rwb, &stats);
-}
-
static void rwb_trace_step(struct rq_wb *rwb, const char *msg)
{
struct backing_dev_info *bdi = rwb->queue->backing_dev_info;
@@ -355,7 +347,6 @@ static void scale_up(struct rq_wb *rwb)
rwb->scale_step--;
rwb->unknown_cnt = 0;
- blk_stat_clear(rwb->queue);
rwb->scaled_max = calc_wb_limits(rwb);
@@ -385,7 +376,6 @@ static void scale_down(struct rq_wb *rwb, bool hard_throttle)
rwb->scaled_max = false;
rwb->unknown_cnt = 0;
- blk_stat_clear(rwb->queue);
calc_wb_limits(rwb);
rwb_trace_step(rwb, "step down");
}
@@ -412,16 +402,16 @@ static void rwb_arm_timer(struct rq_wb *rwb)
}
expires = jiffies + nsecs_to_jiffies(rwb->cur_win_nsec);
- mod_timer(&rwb->window_timer, expires);
+ blk_stat_arm_callback(rwb->cb, expires);
}
-static void wb_timer_fn(unsigned long data)
+static void wb_timer_fn(struct blk_stat_callback *cb, struct blk_stats *stats)
{
- struct rq_wb *rwb = (struct rq_wb *) data;
+ struct rq_wb *rwb = cb->data;
unsigned int inflight = wbt_inflight(rwb);
int status;
- status = latency_exceeded(rwb);
+ status = latency_exceeded(rwb, stats);
trace_wbt_timer(rwb->queue->backing_dev_info, status, rwb->scale_step,
inflight);
@@ -614,7 +604,7 @@ enum wbt_flags wbt_wait(struct rq_wb *rwb, struct bio *bio, spinlock_t *lock)
__wbt_wait(rwb, bio->bi_opf, lock);
- if (!timer_pending(&rwb->window_timer))
+ if (!timer_pending(&rwb->cb->timer))
rwb_arm_timer(rwb);
if (current_is_kswapd())
@@ -675,7 +665,7 @@ void wbt_disable_default(struct request_queue *q)
struct rq_wb *rwb = q->rq_wb;
if (rwb && rwb->enable_state == WBT_STATE_ON_DEFAULT) {
- del_timer_sync(&rwb->window_timer);
+ blk_stat_remove_callback(q, rwb->cb);
rwb->win_nsec = rwb->min_lat_nsec = 0;
wbt_update_limits(rwb);
}
@@ -699,24 +689,23 @@ int wbt_init(struct request_queue *q)
struct rq_wb *rwb;
int i;
- /*
- * For now, we depend on the stats window being larger than
- * our monitoring window. Ensure that this isn't inadvertently
- * violated.
- */
- BUILD_BUG_ON(RWB_WINDOW_NSEC > BLK_STAT_NSEC);
BUILD_BUG_ON(WBT_NR_BITS > BLK_STAT_RES_BITS);
rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
if (!rwb)
return -ENOMEM;
+ rwb->cb = blk_stat_alloc_callback(wb_timer_fn, rwb);
+ if (!rwb->cb) {
+ kfree(rwb);
+ return -ENOMEM;
+ }
+
for (i = 0; i < WBT_NUM_RWQ; i++) {
atomic_set(&rwb->rq_wait[i].inflight, 0);
init_waitqueue_head(&rwb->rq_wait[i].wait);
}
- setup_timer(&rwb->window_timer, wb_timer_fn, (unsigned long) rwb);
rwb->wc = 1;
rwb->queue_depth = RWB_DEF_DEPTH;
rwb->last_comp = rwb->last_issue = jiffies;
@@ -726,10 +715,10 @@ int wbt_init(struct request_queue *q)
wbt_update_limits(rwb);
/*
- * Assign rwb, and turn on stats tracking for this queue
+ * Assign rwb and add the stats callback.
*/
q->rq_wb = rwb;
- blk_stat_enable(q);
+ blk_stat_add_callback(q, rwb->cb);
rwb->min_lat_nsec = wbt_default_latency_nsec(q);
@@ -744,7 +733,8 @@ void wbt_exit(struct request_queue *q)
struct rq_wb *rwb = q->rq_wb;
if (rwb) {
- del_timer_sync(&rwb->window_timer);
+ blk_stat_remove_callback(q, rwb->cb);
+ blk_stat_free_callback(rwb->cb);
q->rq_wb = NULL;
kfree(rwb);
}
@@ -81,7 +81,7 @@ struct rq_wb {
u64 win_nsec; /* default window size */
u64 cur_win_nsec; /* current window size */
- struct timer_list window_timer;
+ struct blk_stat_callback *cb;
s64 sync_issue;
void *sync_cookie;
@@ -287,8 +287,6 @@ struct blk_issue_stat {
u64 time;
};
-#define BLK_RQ_STAT_BATCH 64
-
struct blk_rq_stat {
s64 mean;
u64 min;
@@ -296,7 +294,6 @@ struct blk_rq_stat {
s32 nr_samples;
s32 nr_batch;
u64 batch;
- s64 time;
};
struct blk_stats {
@@ -40,6 +40,8 @@ struct blkcg_gq;
struct blk_flush_queue;
struct pr_ops;
struct rq_wb;
+struct blk_queue_stats;
+struct blk_stat_callback;
#define BLKDEV_MIN_RQ 4
#define BLKDEV_MAX_RQ 128 /* Default maximum */
@@ -388,6 +390,7 @@ struct request_queue {
int nr_rqs[2]; /* # allocated [a]sync rqs */
int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
+ struct blk_queue_stats *stats;
struct rq_wb *rq_wb;
/*
@@ -505,8 +508,6 @@ struct request_queue {
unsigned int nr_sorted;
unsigned int in_flight[2];
- struct blk_rq_stat rq_stats[2];
-
/*
* Number of active block driver functions for which blk_drain_queue()
* must wait. Must be incremented around functions that unlock the
@@ -516,6 +517,10 @@ struct request_queue {
unsigned int rq_timeout;
int poll_nsec;
+
+ struct blk_stat_callback *poll_cb;
+ struct blk_stats poll_stats;
+
struct timer_list timeout;
struct work_struct timeout_work;
struct list_head timeout_list;
@@ -611,6 +616,7 @@ struct request_queue {
#define QUEUE_FLAG_DAX 26 /* device supports DAX */
#define QUEUE_FLAG_STATS 27 /* track rq completion times */
#define QUEUE_FLAG_RESTART 28 /* queue needs restart at completion */
+#define QUEUE_FLAG_POLL_STATS 29 /* collecting stats for hybrid polling */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \