Message ID | 20200511225516.2431921-2-kuba@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | memcg: Slow down swap allocation as the available space gets depleted | expand |
On Mon 11-05-20 15:55:14, Jakub Kicinski wrote: > Slice the memory overage calculation logic a little bit so we can > reuse it to apply a similar penalty to the swap. The logic which > accesses the memory-specific fields (use and high values) has to > be taken out of calculate_high_delay(). > > Signed-off-by: Jakub Kicinski <kuba@kernel.org> Acked-by: Michal Hocko <mhocko@suse.com> some recommendations below. > --- > mm/memcontrol.c | 62 ++++++++++++++++++++++++++++--------------------- > 1 file changed, 35 insertions(+), 27 deletions(-) > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index 05dcb72314b5..8a9b671c3249 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -2321,41 +2321,48 @@ static void high_work_func(struct work_struct *work) > #define MEMCG_DELAY_PRECISION_SHIFT 20 > #define MEMCG_DELAY_SCALING_SHIFT 14 > > -/* > - * Get the number of jiffies that we should penalise a mischievous cgroup which > - * is exceeding its memory.high by checking both it and its ancestors. > - */ > -static unsigned long calculate_high_delay(struct mem_cgroup *memcg, > - unsigned int nr_pages) > +static u64 calculate_overage(unsigned long usage, unsigned long high) the naming is slightly confusing. I would concider the return value to be in memory units rather than time because I would read it as overrage of high. calculate_throttle_penalty would be more clear to me. > { > - unsigned long penalty_jiffies; > - u64 max_overage = 0; > - > - do { > - unsigned long usage, high; > - u64 overage; > + u64 overage; > > - usage = page_counter_read(&memcg->memory); > - high = READ_ONCE(memcg->high); > + if (usage <= high) > + return 0; > > - if (usage <= high) > - continue; > + /* > + * Prevent division by 0 in overage calculation by acting as if > + * it was a threshold of 1 page > + */ > + high = max(high, 1UL); > > - /* > - * Prevent division by 0 in overage calculation by acting as if > - * it was a threshold of 1 page > - */ > - high = max(high, 1UL); > + overage = usage - high; > + overage <<= MEMCG_DELAY_PRECISION_SHIFT; > + return div64_u64(overage, high); > +} > > - overage = usage - high; > - overage <<= MEMCG_DELAY_PRECISION_SHIFT; > - overage = div64_u64(overage, high); > +static u64 mem_find_max_overage(struct mem_cgroup *memcg) This would then become find_high_throttle_penalty > +{ > + u64 overage, max_overage = 0; > > - if (overage > max_overage) > - max_overage = overage; > + do { > + overage = calculate_overage(page_counter_read(&memcg->memory), > + READ_ONCE(memcg->high)); > + max_overage = max(overage, max_overage); > } while ((memcg = parent_mem_cgroup(memcg)) && > !mem_cgroup_is_root(memcg)); > > + return max_overage; > +} > + > +/* > + * Get the number of jiffies that we should penalise a mischievous cgroup which > + * is exceeding its memory.high by checking both it and its ancestors. > + */ > +static unsigned long calculate_high_delay(struct mem_cgroup *memcg, > + unsigned int nr_pages, > + u64 max_overage) > +{ > + unsigned long penalty_jiffies; > + > if (!max_overage) > return 0; > > @@ -2411,7 +2418,8 @@ void mem_cgroup_handle_over_high(void) > * memory.high is breached and reclaim is unable to keep up. Throttle > * allocators proactively to slow down excessive growth. > */ > - penalty_jiffies = calculate_high_delay(memcg, nr_pages); > + penalty_jiffies = calculate_high_delay(memcg, nr_pages, > + mem_find_max_overage(memcg)); > > /* > * Don't sleep if the amount of jiffies this memcg owes us is so low > -- > 2.25.4
On Tue, 12 May 2020 09:08:58 +0200 Michal Hocko wrote: > On Mon 11-05-20 15:55:14, Jakub Kicinski wrote: > > Slice the memory overage calculation logic a little bit so we can > > reuse it to apply a similar penalty to the swap. The logic which > > accesses the memory-specific fields (use and high values) has to > > be taken out of calculate_high_delay(). > > > > Signed-off-by: Jakub Kicinski <kuba@kernel.org> > > Acked-by: Michal Hocko <mhocko@suse.com> > > some recommendations below. Thank you! > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > > index 05dcb72314b5..8a9b671c3249 100644 > > --- a/mm/memcontrol.c > > +++ b/mm/memcontrol.c > > @@ -2321,41 +2321,48 @@ static void high_work_func(struct work_struct *work) > > #define MEMCG_DELAY_PRECISION_SHIFT 20 > > #define MEMCG_DELAY_SCALING_SHIFT 14 > > > > -/* > > - * Get the number of jiffies that we should penalise a mischievous cgroup which > > - * is exceeding its memory.high by checking both it and its ancestors. > > - */ > > -static unsigned long calculate_high_delay(struct mem_cgroup *memcg, > > - unsigned int nr_pages) > > +static u64 calculate_overage(unsigned long usage, unsigned long high) > > the naming is slightly confusing. I would concider the return value > to be in memory units rather than time because I would read it as > overrage of high. calculate_throttle_penalty would be more clear to me. Hm. The unit is the fraction of high. Here is the code, it's quite hard to read in diff form (I should have used --histogram, sorry): static u64 calculate_overage(unsigned long usage, unsigned long high) { u64 overage; if (usage <= high) return 0; /* * Prevent division by 0 in overage calculation by acting as if * it was a threshold of 1 page */ high = max(high, 1UL); overage = usage - high; overage <<= MEMCG_DELAY_PRECISION_SHIFT; return div64_u64(overage, high); } calculate_throttle_penalty() sounds like it returns time. How about something like calc_overage_frac() ? Or calc_overage_perc()? (abbreviating to "calc" so the caller fits on a line)
On Tue 12-05-20 10:28:19, Jakub Kicinski wrote: > On Tue, 12 May 2020 09:08:58 +0200 Michal Hocko wrote: > > On Mon 11-05-20 15:55:14, Jakub Kicinski wrote: > > > Slice the memory overage calculation logic a little bit so we can > > > reuse it to apply a similar penalty to the swap. The logic which > > > accesses the memory-specific fields (use and high values) has to > > > be taken out of calculate_high_delay(). > > > > > > Signed-off-by: Jakub Kicinski <kuba@kernel.org> > > > > Acked-by: Michal Hocko <mhocko@suse.com> > > > > some recommendations below. > > Thank you! > > > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > > > index 05dcb72314b5..8a9b671c3249 100644 > > > --- a/mm/memcontrol.c > > > +++ b/mm/memcontrol.c > > > @@ -2321,41 +2321,48 @@ static void high_work_func(struct work_struct *work) > > > #define MEMCG_DELAY_PRECISION_SHIFT 20 > > > #define MEMCG_DELAY_SCALING_SHIFT 14 > > > > > > -/* > > > - * Get the number of jiffies that we should penalise a mischievous cgroup which > > > - * is exceeding its memory.high by checking both it and its ancestors. > > > - */ > > > -static unsigned long calculate_high_delay(struct mem_cgroup *memcg, > > > - unsigned int nr_pages) > > > +static u64 calculate_overage(unsigned long usage, unsigned long high) > > > > the naming is slightly confusing. I would concider the return value > > to be in memory units rather than time because I would read it as > > overrage of high. calculate_throttle_penalty would be more clear to me. > > Hm. The unit is the fraction of high. Here is the code, it's quite hard > to read in diff form (I should have used --histogram, sorry): Yeah, I have checked the resulting code. > static u64 calculate_overage(unsigned long usage, unsigned long high) > { > u64 overage; > > if (usage <= high) > return 0; > > /* > * Prevent division by 0 in overage calculation by acting as if > * it was a threshold of 1 page > */ > high = max(high, 1UL); > > overage = usage - high; > overage <<= MEMCG_DELAY_PRECISION_SHIFT; > return div64_u64(overage, high); > } > > calculate_throttle_penalty() sounds like it returns time. How about > something like calc_overage_frac() ? Or calc_overage_perc()? > (abbreviating to "calc" so the caller fits on a line) heh, naming is hard and not the most important thing in the world. So if _penalty doesn't really sound good to you then let's just stick with what you've had. I do not really like the _perc/_frac much more because this is more about the implementation of the function than the intention. We shouldn't really care whether the throttling is based on overage scaled linearly (aka fraction) or by other means. The implementation might change in the future.
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 05dcb72314b5..8a9b671c3249 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2321,41 +2321,48 @@ static void high_work_func(struct work_struct *work) #define MEMCG_DELAY_PRECISION_SHIFT 20 #define MEMCG_DELAY_SCALING_SHIFT 14 -/* - * Get the number of jiffies that we should penalise a mischievous cgroup which - * is exceeding its memory.high by checking both it and its ancestors. - */ -static unsigned long calculate_high_delay(struct mem_cgroup *memcg, - unsigned int nr_pages) +static u64 calculate_overage(unsigned long usage, unsigned long high) { - unsigned long penalty_jiffies; - u64 max_overage = 0; - - do { - unsigned long usage, high; - u64 overage; + u64 overage; - usage = page_counter_read(&memcg->memory); - high = READ_ONCE(memcg->high); + if (usage <= high) + return 0; - if (usage <= high) - continue; + /* + * Prevent division by 0 in overage calculation by acting as if + * it was a threshold of 1 page + */ + high = max(high, 1UL); - /* - * Prevent division by 0 in overage calculation by acting as if - * it was a threshold of 1 page - */ - high = max(high, 1UL); + overage = usage - high; + overage <<= MEMCG_DELAY_PRECISION_SHIFT; + return div64_u64(overage, high); +} - overage = usage - high; - overage <<= MEMCG_DELAY_PRECISION_SHIFT; - overage = div64_u64(overage, high); +static u64 mem_find_max_overage(struct mem_cgroup *memcg) +{ + u64 overage, max_overage = 0; - if (overage > max_overage) - max_overage = overage; + do { + overage = calculate_overage(page_counter_read(&memcg->memory), + READ_ONCE(memcg->high)); + max_overage = max(overage, max_overage); } while ((memcg = parent_mem_cgroup(memcg)) && !mem_cgroup_is_root(memcg)); + return max_overage; +} + +/* + * Get the number of jiffies that we should penalise a mischievous cgroup which + * is exceeding its memory.high by checking both it and its ancestors. + */ +static unsigned long calculate_high_delay(struct mem_cgroup *memcg, + unsigned int nr_pages, + u64 max_overage) +{ + unsigned long penalty_jiffies; + if (!max_overage) return 0; @@ -2411,7 +2418,8 @@ void mem_cgroup_handle_over_high(void) * memory.high is breached and reclaim is unable to keep up. Throttle * allocators proactively to slow down excessive growth. */ - penalty_jiffies = calculate_high_delay(memcg, nr_pages); + penalty_jiffies = calculate_high_delay(memcg, nr_pages, + mem_find_max_overage(memcg)); /* * Don't sleep if the amount of jiffies this memcg owes us is so low
Slice the memory overage calculation logic a little bit so we can reuse it to apply a similar penalty to the swap. The logic which accesses the memory-specific fields (use and high values) has to be taken out of calculate_high_delay(). Signed-off-by: Jakub Kicinski <kuba@kernel.org> --- mm/memcontrol.c | 62 ++++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 27 deletions(-)