@@ -88,11 +88,19 @@ void blk_abort_request(struct request *req)
}
EXPORT_SYMBOL_GPL(blk_abort_request);
+/*
+ * Just a rough estimate, we don't care about specific values for timeouts.
+ */
+static inline unsigned long blk_round_jiffies(unsigned long j)
+{
+ return (j + CONFIG_HZ_ROUGH_MASK) + 1;
+}
+
unsigned long blk_rq_timeout(unsigned long timeout)
{
unsigned long maxt;
- maxt = round_jiffies_up(jiffies + BLK_MAX_TIMEOUT);
+ maxt = blk_round_jiffies(jiffies + BLK_MAX_TIMEOUT);
if (time_after(timeout, maxt))
timeout = maxt;
@@ -129,7 +137,7 @@ void blk_add_timer(struct request *req)
* than an existing one, modify the timer. Round up to next nearest
* second.
*/
- expiry = blk_rq_timeout(round_jiffies_up(expiry));
+ expiry = blk_rq_timeout(blk_round_jiffies(expiry));
if (!timer_pending(&q->timeout) ||
time_before(expiry, q->timeout.expires)) {
@@ -55,5 +55,11 @@ config HZ
default 300 if HZ_300
default 1000 if HZ_1000
+config HZ_ROUGH_MASK
+ int
+ default 127 if HZ_100
+ default 255 if HZ_250 || HZ_300
+ default 1023 if HZ_1000
+
config SCHED_HRTICK
def_bool HIGH_RES_TIMERS
In doing high IOPS testing, blk-mq is generally pretty well optimized. There are a few things that stuck out as using more CPU than what is really warranted, and one thing is the round_jiffies_up() that we do twice for each request. That accounts for about 0.8% of the CPU in my testing. We can make this cheaper by avoiding an integer division, by just adding a rough HZ mask that we can AND with instead. The timeouts are only on a second granularity already, we don't have to be that accurate here and this patch barely changes that. All we care about is nice grouping. Signed-off-by: Jens Axboe <axboe@kernel.dk> ---