diff mbox series

[RFC,v3,04/14] mm: use part per 1000 for bdi ratios.

Message ID 20221024190603.3987969-5-shr@devkernel.io (mailing list archive)
State New
Headers show
Series mm/block: add bdi sysfs knobs | expand

Commit Message

Stefan Roesch Oct. 24, 2022, 7:05 p.m. UTC
To get finer granularity for ratio calculations use part per 1000
instead of percentiles. This is especially important if we want to
automatically convert byte values to ratios. Otherwise the values that
are actually used can be quite different. This is also important for
machines with more main memory (1% of 256GB is already 2.5GB).

Signed-off-by: Stefan Roesch <shr@devkernel.io>
---
 include/linux/backing-dev.h |  3 +++
 mm/backing-dev.c            |  6 +++---
 mm/page-writeback.c         | 15 +++++++++------
 3 files changed, 15 insertions(+), 9 deletions(-)

Comments

Andrew Morton Nov. 16, 2022, 9:29 p.m. UTC | #1
On Mon, 24 Oct 2022 12:05:53 -0700 Stefan Roesch <shr@devkernel.io> wrote:

> To get finer granularity for ratio calculations use part per 1000
> instead of percentiles. This is especially important if we want to
> automatically convert byte values to ratios. Otherwise the values that
> are actually used can be quite different. This is also important for
> machines with more main memory (1% of 256GB is already 2.5GB).
> 
> ...
> 

This changes an existing userspace interface, doesn't it? 
/sys/class/bdi/<bdi>/min_ratio.  Can't do that!

We could add a new interace to the same thing, I guess. 
/sys/class/bdi/<bdi>/min_ratio_fine or whatever.

We might want to go for more than 100->1000, too.  Otherwise in a few
years we'll be adding /sys/class/bdi/<bdi>/min_ratio_even_finer.

Also, this patch forgot to update
Documentation/ABI/testing/sysfs-class-bdi.
Stefan Roesch Nov. 19, 2022, 12:03 a.m. UTC | #2
Andrew Morton <akpm@linux-foundation.org> writes:

> On Mon, 24 Oct 2022 12:05:53 -0700 Stefan Roesch <shr@devkernel.io> wrote:
>
>> To get finer granularity for ratio calculations use part per 1000
>> instead of percentiles. This is especially important if we want to
>> automatically convert byte values to ratios. Otherwise the values that
>> are actually used can be quite different. This is also important for
>> machines with more main memory (1% of 256GB is already 2.5GB).
>>
>> ...
>>
>
> This changes an existing userspace interface, doesn't it?
> /sys/class/bdi/<bdi>/min_ratio.  Can't do that!
>

It does not change the user interface. It maintains the percent values
in the min_ratio and max_ratio knobs.

For instance:

-BDI_SHOW(min_ratio, bdi->min_ratio)
+BDI_SHOW(min_ratio, bdi->min_ratio / BDI_RATIO_SCALE)

> We could add a new interace to the same thing, I guess.
> /sys/class/bdi/<bdi>/min_ratio_fine or whatever.
>
> We might want to go for more than 100->1000, too.  Otherwise in a few
> years we'll be adding /sys/class/bdi/<bdi>/min_ratio_even_finer.
>

The next version of the patch series will also add min_ratio_fine and
max_ratio_fine. This makes sure that also ratio values can be specified
with a finer granularity.

> Also, this patch forgot to update
> Documentation/ABI/testing/sysfs-class-bdi.

The exisiting user behavior is not changed, only the internal
calculation.
diff mbox series

Patch

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 9c984ffc8a0a..19fe0e605ed8 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -102,6 +102,9 @@  static inline unsigned long wb_stat_error(void)
 #endif
 }
 
+/* BDI ratio is expressed as part per 1000 for finer granularity. */
+#define BDI_RATIO_SCALE 10
+
 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index a0899cce72ef..90fa517123dc 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -178,7 +178,7 @@  static ssize_t min_ratio_store(struct device *dev,
 
 	return ret;
 }
-BDI_SHOW(min_ratio, bdi->min_ratio)
+BDI_SHOW(min_ratio, bdi->min_ratio / BDI_RATIO_SCALE)
 
 static ssize_t max_ratio_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
@@ -197,7 +197,7 @@  static ssize_t max_ratio_store(struct device *dev,
 
 	return ret;
 }
-BDI_SHOW(max_ratio, bdi->max_ratio)
+BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE)
 
 static ssize_t stable_pages_required_show(struct device *dev,
 					  struct device_attribute *attr,
@@ -809,7 +809,7 @@  int bdi_init(struct backing_dev_info *bdi)
 
 	kref_init(&bdi->refcnt);
 	bdi->min_ratio = 0;
-	bdi->max_ratio = 100;
+	bdi->max_ratio = 100 * BDI_RATIO_SCALE;
 	bdi->max_prop_frac = FPROP_FRAC_BASE;
 	INIT_LIST_HEAD(&bdi->bdi_list);
 	INIT_LIST_HEAD(&bdi->wb_list);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e22aae0ecacd..4d5383d4da45 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -197,7 +197,7 @@  static void wb_min_max_ratio(struct bdi_writeback *wb,
 			min *= this_bw;
 			min = div64_ul(min, tot_bw);
 		}
-		if (max < 100) {
+		if (max < 100 * BDI_RATIO_SCALE) {
 			max *= this_bw;
 			max = div64_ul(max, tot_bw);
 		}
@@ -655,6 +655,8 @@  int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 	unsigned int delta;
 	int ret = 0;
 
+	min_ratio *= BDI_RATIO_SCALE;
+
 	spin_lock_bh(&bdi_lock);
 	if (min_ratio > bdi->max_ratio) {
 		ret = -EINVAL;
@@ -665,7 +667,7 @@  int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 			bdi->min_ratio = min_ratio;
 		} else {
 			delta = min_ratio - bdi->min_ratio;
-			if (bdi_min_ratio + delta < 100) {
+			if (bdi_min_ratio + delta < 100 * BDI_RATIO_SCALE) {
 				bdi_min_ratio += delta;
 				bdi->min_ratio = min_ratio;
 			} else {
@@ -684,6 +686,7 @@  int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
 
 	if (max_ratio > 100)
 		return -EINVAL;
+	max_ratio *= BDI_RATIO_SCALE;
 
 	spin_lock_bh(&bdi_lock);
 	if (bdi->min_ratio > max_ratio) {
@@ -776,15 +779,15 @@  static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
 	fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
 			      &numerator, &denominator);
 
-	wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
+	wb_thresh = (thresh * (100 * BDI_RATIO_SCALE - bdi_min_ratio)) / (100 * BDI_RATIO_SCALE);
 	wb_thresh *= numerator;
 	wb_thresh = div64_ul(wb_thresh, denominator);
 
 	wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
 
-	wb_thresh += (thresh * wb_min_ratio) / 100;
-	if (wb_thresh > (thresh * wb_max_ratio) / 100)
-		wb_thresh = thresh * wb_max_ratio / 100;
+	wb_thresh += (thresh * wb_min_ratio) / (100 * BDI_RATIO_SCALE);
+	if (wb_thresh > (thresh * wb_max_ratio) / (100 * BDI_RATIO_SCALE))
+		wb_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE);
 
 	return wb_thresh;
 }