diff mbox series

[mm,v2,1/3] mm: prepare for swap over-high accounting and penalty calculation

Message ID 20200511225516.2431921-2-kuba@kernel.org (mailing list archive)
State New, archived
Headers show
Series memcg: Slow down swap allocation as the available space gets depleted | expand

Commit Message

Jakub Kicinski May 11, 2020, 10:55 p.m. UTC
Slice the memory overage calculation logic a little bit so we can
reuse it to apply a similar penalty to the swap. The logic which
accesses the memory-specific fields (use and high values) has to
be taken out of calculate_high_delay().

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 mm/memcontrol.c | 62 ++++++++++++++++++++++++++++---------------------
 1 file changed, 35 insertions(+), 27 deletions(-)

Comments

Michal Hocko May 12, 2020, 7:08 a.m. UTC | #1
On Mon 11-05-20 15:55:14, Jakub Kicinski wrote:
> Slice the memory overage calculation logic a little bit so we can
> reuse it to apply a similar penalty to the swap. The logic which
> accesses the memory-specific fields (use and high values) has to
> be taken out of calculate_high_delay().
> 
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>

Acked-by: Michal Hocko <mhocko@suse.com>

some recommendations below.

> ---
>  mm/memcontrol.c | 62 ++++++++++++++++++++++++++++---------------------
>  1 file changed, 35 insertions(+), 27 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 05dcb72314b5..8a9b671c3249 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -2321,41 +2321,48 @@ static void high_work_func(struct work_struct *work)
>   #define MEMCG_DELAY_PRECISION_SHIFT 20
>   #define MEMCG_DELAY_SCALING_SHIFT 14
>  
> -/*
> - * Get the number of jiffies that we should penalise a mischievous cgroup which
> - * is exceeding its memory.high by checking both it and its ancestors.
> - */
> -static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
> -					  unsigned int nr_pages)
> +static u64 calculate_overage(unsigned long usage, unsigned long high)

the naming is slightly confusing. I would concider the return value
to be in memory units rather than time because I would read it as
overrage of high. calculate_throttle_penalty would be more clear to me.

>  {
> -	unsigned long penalty_jiffies;
> -	u64 max_overage = 0;
> -
> -	do {
> -		unsigned long usage, high;
> -		u64 overage;
> +	u64 overage;
>  
> -		usage = page_counter_read(&memcg->memory);
> -		high = READ_ONCE(memcg->high);
> +	if (usage <= high)
> +		return 0;
>  
> -		if (usage <= high)
> -			continue;
> +	/*
> +	 * Prevent division by 0 in overage calculation by acting as if
> +	 * it was a threshold of 1 page
> +	 */
> +	high = max(high, 1UL);
>  
> -		/*
> -		 * Prevent division by 0 in overage calculation by acting as if
> -		 * it was a threshold of 1 page
> -		 */
> -		high = max(high, 1UL);
> +	overage = usage - high;
> +	overage <<= MEMCG_DELAY_PRECISION_SHIFT;
> +	return div64_u64(overage, high);
> +}
>  
> -		overage = usage - high;
> -		overage <<= MEMCG_DELAY_PRECISION_SHIFT;
> -		overage = div64_u64(overage, high);
> +static u64 mem_find_max_overage(struct mem_cgroup *memcg)

This would then become find_high_throttle_penalty

> +{
> +	u64 overage, max_overage = 0;
>  
> -		if (overage > max_overage)
> -			max_overage = overage;
> +	do {
> +		overage = calculate_overage(page_counter_read(&memcg->memory),
> +					    READ_ONCE(memcg->high));
> +		max_overage = max(overage, max_overage);
>  	} while ((memcg = parent_mem_cgroup(memcg)) &&
>  		 !mem_cgroup_is_root(memcg));
>  
> +	return max_overage;
> +}
> +
> +/*
> + * Get the number of jiffies that we should penalise a mischievous cgroup which
> + * is exceeding its memory.high by checking both it and its ancestors.
> + */
> +static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
> +					  unsigned int nr_pages,
> +					  u64 max_overage)
> +{
> +	unsigned long penalty_jiffies;
> +
>  	if (!max_overage)
>  		return 0;
>  
> @@ -2411,7 +2418,8 @@ void mem_cgroup_handle_over_high(void)
>  	 * memory.high is breached and reclaim is unable to keep up. Throttle
>  	 * allocators proactively to slow down excessive growth.
>  	 */
> -	penalty_jiffies = calculate_high_delay(memcg, nr_pages);
> +	penalty_jiffies = calculate_high_delay(memcg, nr_pages,
> +					       mem_find_max_overage(memcg));
>  
>  	/*
>  	 * Don't sleep if the amount of jiffies this memcg owes us is so low
> -- 
> 2.25.4
Jakub Kicinski May 12, 2020, 5:28 p.m. UTC | #2
On Tue, 12 May 2020 09:08:58 +0200 Michal Hocko wrote:
> On Mon 11-05-20 15:55:14, Jakub Kicinski wrote:
> > Slice the memory overage calculation logic a little bit so we can
> > reuse it to apply a similar penalty to the swap. The logic which
> > accesses the memory-specific fields (use and high values) has to
> > be taken out of calculate_high_delay().
> > 
> > Signed-off-by: Jakub Kicinski <kuba@kernel.org>  
> 
> Acked-by: Michal Hocko <mhocko@suse.com>
> 
> some recommendations below.

Thank you!

> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 05dcb72314b5..8a9b671c3249 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -2321,41 +2321,48 @@ static void high_work_func(struct work_struct *work)
> >   #define MEMCG_DELAY_PRECISION_SHIFT 20
> >   #define MEMCG_DELAY_SCALING_SHIFT 14
> >  
> > -/*
> > - * Get the number of jiffies that we should penalise a mischievous cgroup which
> > - * is exceeding its memory.high by checking both it and its ancestors.
> > - */
> > -static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
> > -					  unsigned int nr_pages)
> > +static u64 calculate_overage(unsigned long usage, unsigned long high)  
> 
> the naming is slightly confusing. I would concider the return value
> to be in memory units rather than time because I would read it as
> overrage of high. calculate_throttle_penalty would be more clear to me.

Hm. The unit is the fraction of high. Here is the code, it's quite hard
to read in diff form (I should have used --histogram, sorry):

static u64 calculate_overage(unsigned long usage, unsigned long high)
{
	u64 overage;

	if (usage <= high)
		return 0;

	/*
	 * Prevent division by 0 in overage calculation by acting as if
	 * it was a threshold of 1 page
	 */
	high = max(high, 1UL);

	overage = usage - high;
	overage <<= MEMCG_DELAY_PRECISION_SHIFT;
	return div64_u64(overage, high);
}

calculate_throttle_penalty() sounds like it returns time. How about
something like calc_overage_frac() ? Or calc_overage_perc()?
(abbreviating to "calc" so the caller fits on a line)
Michal Hocko May 13, 2020, 8:06 a.m. UTC | #3
On Tue 12-05-20 10:28:19, Jakub Kicinski wrote:
> On Tue, 12 May 2020 09:08:58 +0200 Michal Hocko wrote:
> > On Mon 11-05-20 15:55:14, Jakub Kicinski wrote:
> > > Slice the memory overage calculation logic a little bit so we can
> > > reuse it to apply a similar penalty to the swap. The logic which
> > > accesses the memory-specific fields (use and high values) has to
> > > be taken out of calculate_high_delay().
> > > 
> > > Signed-off-by: Jakub Kicinski <kuba@kernel.org>  
> > 
> > Acked-by: Michal Hocko <mhocko@suse.com>
> > 
> > some recommendations below.
> 
> Thank you!
> 
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index 05dcb72314b5..8a9b671c3249 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > @@ -2321,41 +2321,48 @@ static void high_work_func(struct work_struct *work)
> > >   #define MEMCG_DELAY_PRECISION_SHIFT 20
> > >   #define MEMCG_DELAY_SCALING_SHIFT 14
> > >  
> > > -/*
> > > - * Get the number of jiffies that we should penalise a mischievous cgroup which
> > > - * is exceeding its memory.high by checking both it and its ancestors.
> > > - */
> > > -static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
> > > -					  unsigned int nr_pages)
> > > +static u64 calculate_overage(unsigned long usage, unsigned long high)  
> > 
> > the naming is slightly confusing. I would concider the return value
> > to be in memory units rather than time because I would read it as
> > overrage of high. calculate_throttle_penalty would be more clear to me.
> 
> Hm. The unit is the fraction of high. Here is the code, it's quite hard
> to read in diff form (I should have used --histogram, sorry):

Yeah, I have checked the resulting code.

> static u64 calculate_overage(unsigned long usage, unsigned long high)
> {
> 	u64 overage;
> 
> 	if (usage <= high)
> 		return 0;
> 
> 	/*
> 	 * Prevent division by 0 in overage calculation by acting as if
> 	 * it was a threshold of 1 page
> 	 */
> 	high = max(high, 1UL);
> 
> 	overage = usage - high;
> 	overage <<= MEMCG_DELAY_PRECISION_SHIFT;
> 	return div64_u64(overage, high);
> }
> 
> calculate_throttle_penalty() sounds like it returns time. How about
> something like calc_overage_frac() ? Or calc_overage_perc()?
> (abbreviating to "calc" so the caller fits on a line)

heh, naming is hard and not the most important thing in the world. So if
_penalty doesn't really sound good to you then let's just stick with
what you've had. I do not really like the _perc/_frac much more because
this is more about the implementation of the function than the
intention. We shouldn't really care whether the throttling is based on
overage scaled linearly (aka fraction) or by other means. The
implementation might change in the future.
diff mbox series

Patch

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 05dcb72314b5..8a9b671c3249 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2321,41 +2321,48 @@  static void high_work_func(struct work_struct *work)
  #define MEMCG_DELAY_PRECISION_SHIFT 20
  #define MEMCG_DELAY_SCALING_SHIFT 14
 
-/*
- * Get the number of jiffies that we should penalise a mischievous cgroup which
- * is exceeding its memory.high by checking both it and its ancestors.
- */
-static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
-					  unsigned int nr_pages)
+static u64 calculate_overage(unsigned long usage, unsigned long high)
 {
-	unsigned long penalty_jiffies;
-	u64 max_overage = 0;
-
-	do {
-		unsigned long usage, high;
-		u64 overage;
+	u64 overage;
 
-		usage = page_counter_read(&memcg->memory);
-		high = READ_ONCE(memcg->high);
+	if (usage <= high)
+		return 0;
 
-		if (usage <= high)
-			continue;
+	/*
+	 * Prevent division by 0 in overage calculation by acting as if
+	 * it was a threshold of 1 page
+	 */
+	high = max(high, 1UL);
 
-		/*
-		 * Prevent division by 0 in overage calculation by acting as if
-		 * it was a threshold of 1 page
-		 */
-		high = max(high, 1UL);
+	overage = usage - high;
+	overage <<= MEMCG_DELAY_PRECISION_SHIFT;
+	return div64_u64(overage, high);
+}
 
-		overage = usage - high;
-		overage <<= MEMCG_DELAY_PRECISION_SHIFT;
-		overage = div64_u64(overage, high);
+static u64 mem_find_max_overage(struct mem_cgroup *memcg)
+{
+	u64 overage, max_overage = 0;
 
-		if (overage > max_overage)
-			max_overage = overage;
+	do {
+		overage = calculate_overage(page_counter_read(&memcg->memory),
+					    READ_ONCE(memcg->high));
+		max_overage = max(overage, max_overage);
 	} while ((memcg = parent_mem_cgroup(memcg)) &&
 		 !mem_cgroup_is_root(memcg));
 
+	return max_overage;
+}
+
+/*
+ * Get the number of jiffies that we should penalise a mischievous cgroup which
+ * is exceeding its memory.high by checking both it and its ancestors.
+ */
+static unsigned long calculate_high_delay(struct mem_cgroup *memcg,
+					  unsigned int nr_pages,
+					  u64 max_overage)
+{
+	unsigned long penalty_jiffies;
+
 	if (!max_overage)
 		return 0;
 
@@ -2411,7 +2418,8 @@  void mem_cgroup_handle_over_high(void)
 	 * memory.high is breached and reclaim is unable to keep up. Throttle
 	 * allocators proactively to slow down excessive growth.
 	 */
-	penalty_jiffies = calculate_high_delay(memcg, nr_pages);
+	penalty_jiffies = calculate_high_delay(memcg, nr_pages,
+					       mem_find_max_overage(memcg));
 
 	/*
 	 * Don't sleep if the amount of jiffies this memcg owes us is so low