Message ID | 20180731203647.19864-1-dennisszhou@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | block: make iolatency avg_lat exponentially decay | expand |
Hi Dennis, this generally looks good to me. Just two small nit picks: On Tue, Jul 31, 2018 at 01:36:47PM -0700, Dennis Zhou wrote: > @@ -135,6 +135,24 @@ struct iolatency_grp { > struct child_latency_info child_lat; > }; > > +#define BLKIOLATENCY_MIN_WIN_SIZE (100 * NSEC_PER_MSEC) > +#define BLKIOLATENCY_MAX_WIN_SIZE NSEC_PER_SEC > +/* > + * These are the constants used to fake the fixed-point moving average > + * calculation just like load average. The latency window is bucketed to > + * try to approximately calculate average latency for the last 1 minute. > + */ > +#define BLKIOLATENCY_NR_EXP_FACTORS 5 > +#define BLKIOLATENCY_EXP_BUCKET_SIZE (BLKIOLATENCY_MAX_WIN_SIZE / \ > + (BLKIOLATENCY_NR_EXP_FACTORS - 1)) > +static const u64 iolatency_exp_factors[BLKIOLATENCY_NR_EXP_FACTORS] = { > + 2045, // exp(1/600) - 600 samples > + 2039, // exp(1/240) - 240 samples > + 2031, // exp(1/120) - 120 samples > + 2023, // exp(1/80) - 80 samples > + 2014, // exp(1/60) - 60 samples Might be useful to drop the FIXED_1 name in a comment here. It says "fixed-point", and "load average", but since the numbers are directly in relationship to that constant, it'd be good to name it I think. > @@ -462,7 +480,7 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now) > struct child_latency_info *lat_info; > struct blk_rq_stat stat; > unsigned long flags; > - int cpu; > + int cpu, exp_idx; > > blk_rq_stat_init(&stat); > preempt_disable(); > @@ -480,11 +498,10 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now) > > lat_info = &parent->child_lat; > > - iolat->total_lat_avg = > - div64_u64((iolat->total_lat_avg * iolat->total_lat_nr) + > - stat.mean, iolat->total_lat_nr + 1); > - > - iolat->total_lat_nr++; > + exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1, > + iolat->cur_win_nsec / BLKIOLATENCY_EXP_BUCKET_SIZE); > + CALC_LOAD(iolat->total_lat_avg, iolatency_exp_factors[exp_idx], > + stat.mean); The load average keeps the running value in fixed point presentation to avoid rounding errors. I guess because this is IO time in ns, the values are so much higher than the FIXED_1 denominator (2048) that rounding errors are negligible, and we don't need to bother with it. Can you mention that in a comment, please?
Hi Johannes, On Tue, Jul 31, 2018 at 05:21:50PM -0400, Johannes Weiner wrote: > Hi Dennis, > > this generally looks good to me. Just two small nit picks: > > On Tue, Jul 31, 2018 at 01:36:47PM -0700, Dennis Zhou wrote: > > @@ -135,6 +135,24 @@ struct iolatency_grp { > > struct child_latency_info child_lat; > > }; > > > > +#define BLKIOLATENCY_MIN_WIN_SIZE (100 * NSEC_PER_MSEC) > > +#define BLKIOLATENCY_MAX_WIN_SIZE NSEC_PER_SEC > > +/* > > + * These are the constants used to fake the fixed-point moving average > > + * calculation just like load average. The latency window is bucketed to > > + * try to approximately calculate average latency for the last 1 minute. > > + */ > > +#define BLKIOLATENCY_NR_EXP_FACTORS 5 > > +#define BLKIOLATENCY_EXP_BUCKET_SIZE (BLKIOLATENCY_MAX_WIN_SIZE / \ > > + (BLKIOLATENCY_NR_EXP_FACTORS - 1)) > > +static const u64 iolatency_exp_factors[BLKIOLATENCY_NR_EXP_FACTORS] = { > > + 2045, // exp(1/600) - 600 samples > > + 2039, // exp(1/240) - 240 samples > > + 2031, // exp(1/120) - 120 samples > > + 2023, // exp(1/80) - 80 samples > > + 2014, // exp(1/60) - 60 samples > > Might be useful to drop the FIXED_1 name in a comment here. It says > "fixed-point", and "load average", but since the numbers are directly > in relationship to that constant, it'd be good to name it I think. > I've added a comment in v2 that points out FIXED_1 and mentions its value is 2048. I also explained a little more about the samples and binding the 1/exp window. > > + exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1, > > + iolat->cur_win_nsec / BLKIOLATENCY_EXP_BUCKET_SIZE); > > + CALC_LOAD(iolat->total_lat_avg, iolatency_exp_factors[exp_idx], > > + stat.mean); > > The load average keeps the running value in fixed point presentation > to avoid rounding errors. I guess because this is IO time in ns, the > values are so much higher than the FIXED_1 denominator (2048) that > rounding errors are negligible, and we don't need to bother with it. > > Can you mention that in a comment, please? I've added what you said here in a comment above this. Thanks, Dennis
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index bb59b2929e0d..1db3244eac05 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -69,6 +69,7 @@ #include <linux/module.h> #include <linux/timer.h> #include <linux/memcontrol.h> +#include <linux/sched/loadavg.h> #include <linux/sched/signal.h> #include <trace/events/block.h> #include "blk-rq-qos.h" @@ -127,7 +128,6 @@ struct iolatency_grp { /* total running average of our io latency. */ u64 total_lat_avg; - u64 total_lat_nr; /* Our current number of IO's for the last summation. */ u64 nr_samples; @@ -135,6 +135,24 @@ struct iolatency_grp { struct child_latency_info child_lat; }; +#define BLKIOLATENCY_MIN_WIN_SIZE (100 * NSEC_PER_MSEC) +#define BLKIOLATENCY_MAX_WIN_SIZE NSEC_PER_SEC +/* + * These are the constants used to fake the fixed-point moving average + * calculation just like load average. The latency window is bucketed to + * try to approximately calculate average latency for the last 1 minute. + */ +#define BLKIOLATENCY_NR_EXP_FACTORS 5 +#define BLKIOLATENCY_EXP_BUCKET_SIZE (BLKIOLATENCY_MAX_WIN_SIZE / \ + (BLKIOLATENCY_NR_EXP_FACTORS - 1)) +static const u64 iolatency_exp_factors[BLKIOLATENCY_NR_EXP_FACTORS] = { + 2045, // exp(1/600) - 600 samples + 2039, // exp(1/240) - 240 samples + 2031, // exp(1/120) - 120 samples + 2023, // exp(1/80) - 80 samples + 2014, // exp(1/60) - 60 samples +}; + static inline struct iolatency_grp *pd_to_lat(struct blkg_policy_data *pd) { return pd ? container_of(pd, struct iolatency_grp, pd) : NULL; @@ -462,7 +480,7 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now) struct child_latency_info *lat_info; struct blk_rq_stat stat; unsigned long flags; - int cpu; + int cpu, exp_idx; blk_rq_stat_init(&stat); preempt_disable(); @@ -480,11 +498,10 @@ static void iolatency_check_latencies(struct iolatency_grp *iolat, u64 now) lat_info = &parent->child_lat; - iolat->total_lat_avg = - div64_u64((iolat->total_lat_avg * iolat->total_lat_nr) + - stat.mean, iolat->total_lat_nr + 1); - - iolat->total_lat_nr++; + exp_idx = min_t(int, BLKIOLATENCY_NR_EXP_FACTORS - 1, + iolat->cur_win_nsec / BLKIOLATENCY_EXP_BUCKET_SIZE); + CALC_LOAD(iolat->total_lat_avg, iolatency_exp_factors[exp_idx], + stat.mean); /* Everything is ok and we don't need to adjust the scale. */ if (stat.mean <= iolat->min_lat_nsec && @@ -700,8 +717,9 @@ static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) u64 oldval = iolat->min_lat_nsec; iolat->min_lat_nsec = val; - iolat->cur_win_nsec = max_t(u64, val << 4, 100 * NSEC_PER_MSEC); - iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec, NSEC_PER_SEC); + iolat->cur_win_nsec = max_t(u64, val << 4, BLKIOLATENCY_MIN_WIN_SIZE); + iolat->cur_win_nsec = min_t(u64, iolat->cur_win_nsec, + BLKIOLATENCY_MAX_WIN_SIZE); if (!oldval && val) atomic_inc(&blkiolat->enabled); @@ -811,13 +829,14 @@ static size_t iolatency_pd_stat(struct blkg_policy_data *pd, char *buf, { struct iolatency_grp *iolat = pd_to_lat(pd); unsigned long long avg_lat = div64_u64(iolat->total_lat_avg, NSEC_PER_USEC); + unsigned long long cur_win = div64_u64(iolat->cur_win_nsec, NSEC_PER_MSEC); if (iolat->rq_depth.max_depth == UINT_MAX) - return scnprintf(buf, size, " depth=max avg_lat=%llu", - avg_lat); + return scnprintf(buf, size, " depth=max avg_lat=%llu win=%llu", + avg_lat, cur_win); - return scnprintf(buf, size, " depth=%u avg_lat=%llu", - iolat->rq_depth.max_depth, avg_lat); + return scnprintf(buf, size, " depth=%u avg_lat=%llu win=%llu", + iolat->rq_depth.max_depth, avg_lat, cur_win); }