@@ -13,6 +13,7 @@ struct page_counter {
* memcg->memory.usage is a hot member of struct mem_cgroup.
*/
atomic_long_t usage;
+ struct mem_cgroup *memcg; /* memcg that owns this counter */
CACHELINE_PADDING(_pad1_);
/* effective memory.min and memory.min usage tracking */
@@ -25,6 +26,10 @@ struct page_counter {
atomic_long_t low_usage;
atomic_long_t children_low_usage;
+ unsigned long elocallow;
+ atomic_long_t locallow_usage;
+ atomic_long_t children_locallow_usage;
+
unsigned long watermark;
/* Latest cg2 reset watermark */
unsigned long local_watermark;
@@ -36,6 +41,7 @@ struct page_counter {
bool protection_support;
unsigned long min;
unsigned long low;
+ unsigned long locallow;
unsigned long high;
unsigned long max;
struct page_counter *parent;
@@ -52,12 +58,13 @@ struct page_counter {
*/
static inline void page_counter_init(struct page_counter *counter,
struct page_counter *parent,
- bool protection_support)
+ bool protection_support, struct mem_cgroup *memcg)
{
counter->usage = (atomic_long_t)ATOMIC_LONG_INIT(0);
counter->max = PAGE_COUNTER_MAX;
counter->parent = parent;
counter->protection_support = protection_support;
+ counter->memcg = memcg;
}
static inline unsigned long page_counter_read(struct page_counter *counter)
@@ -72,7 +79,8 @@ bool page_counter_try_charge(struct page_counter *counter,
struct page_counter **fail);
void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages);
void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages);
-void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages);
+void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages,
+ unsigned long nr_pages_local);
static inline void page_counter_set_high(struct page_counter *counter,
unsigned long nr_pages)
@@ -99,11 +107,11 @@ static inline void page_counter_reset_watermark(struct page_counter *counter)
#ifdef CONFIG_MEMCG
void page_counter_calculate_protection(struct page_counter *root,
struct page_counter *counter,
- bool recursive_protection);
+ bool recursive_protection, int is_local);
#else
static inline void page_counter_calculate_protection(struct page_counter *root,
struct page_counter *counter,
- bool recursive_protection) {}
+ bool recursive_protection, int is_local) {}
#endif
#endif /* _LINUX_PAGE_COUNTER_H */
@@ -114,10 +114,10 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
}
page_counter_init(hugetlb_cgroup_counter_from_cgroup(h_cgroup,
idx),
- fault_parent, false);
+ fault_parent, false, NULL);
page_counter_init(
hugetlb_cgroup_counter_from_cgroup_rsvd(h_cgroup, idx),
- rsvd_parent, false);
+ rsvd_parent, false, NULL);
limit = round_down(PAGE_COUNTER_MAX,
pages_per_huge_page(&hstates[idx]));
@@ -1497,6 +1497,9 @@ static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
vm_event_name(memcg_vm_event_stat[i]),
memcg_events(memcg, memcg_vm_event_stat[i]));
}
+
+ seq_buf_printf(s, "local_usage %lu\n",
+ get_cgroup_local_usage(memcg, true));
}
static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s)
@@ -3597,8 +3600,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
if (parent) {
WRITE_ONCE(memcg->swappiness, mem_cgroup_swappiness(parent));
- page_counter_init(&memcg->memory, &parent->memory, true);
- page_counter_init(&memcg->swap, &parent->swap, false);
+ page_counter_init(&memcg->memory, &parent->memory, true, memcg);
+ page_counter_init(&memcg->swap, &parent->swap, false, NULL);
#ifdef CONFIG_MEMCG_V1
WRITE_ONCE(memcg->oom_kill_disable, READ_ONCE(parent->oom_kill_disable));
page_counter_init(&memcg->kmem, &parent->kmem, false);
@@ -3607,8 +3610,8 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
} else {
init_memcg_stats();
init_memcg_events();
- page_counter_init(&memcg->memory, NULL, true);
- page_counter_init(&memcg->swap, NULL, false);
+ page_counter_init(&memcg->memory, NULL, true, memcg);
+ page_counter_init(&memcg->swap, NULL, false, NULL);
#ifdef CONFIG_MEMCG_V1
page_counter_init(&memcg->kmem, NULL, false);
page_counter_init(&memcg->tcpmem, NULL, false);
@@ -3677,7 +3680,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
memcg1_css_offline(memcg);
page_counter_set_min(&memcg->memory, 0);
- page_counter_set_low(&memcg->memory, 0);
+ page_counter_set_low(&memcg->memory, 0, 0);
zswap_memcg_offline_cleanup(memcg);
@@ -3748,7 +3751,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
page_counter_set_max(&memcg->tcpmem, PAGE_COUNTER_MAX);
#endif
page_counter_set_min(&memcg->memory, 0);
- page_counter_set_low(&memcg->memory, 0);
+ page_counter_set_low(&memcg->memory, 0, 0);
page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX);
memcg1_soft_limit_reset(memcg);
page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX);
@@ -4051,6 +4054,12 @@ static ssize_t memory_min_write(struct kernfs_open_file *of,
return nbytes;
}
+static int memory_locallow_show(struct seq_file *m, void *v)
+{
+ return seq_puts_memcg_tunable(m,
+ READ_ONCE(mem_cgroup_from_seq(m)->memory.locallow));
+}
+
static int memory_low_show(struct seq_file *m, void *v)
{
return seq_puts_memcg_tunable(m,
@@ -4061,7 +4070,8 @@ static ssize_t memory_low_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
- unsigned long low;
+ struct sysinfo si;
+ unsigned long low, locallow, local_capacity, total_capacity;
int err;
buf = strstrip(buf);
@@ -4069,7 +4079,15 @@ static ssize_t memory_low_write(struct kernfs_open_file *of,
if (err)
return err;
- page_counter_set_low(&memcg->memory, low);
+ /* Hardcoded 0 for local node and 1 for remote. */
+ si_meminfo_node(&si, 0);
+ local_capacity = si.totalram; /* In pages. */
+ total_capacity = local_capacity;
+ si_meminfo_node(&si, 1);
+ total_capacity += si.totalram;
+ locallow = low * local_capacity / total_capacity;
+
+ page_counter_set_low(&memcg->memory, low, locallow);
return nbytes;
}
@@ -4394,6 +4412,11 @@ static struct cftype memory_files[] = {
.seq_show = memory_low_show,
.write = memory_low_write,
},
+ {
+ .name = "locallow",
+ .flags = CFTYPE_NOT_ON_ROOT,
+ .seq_show = memory_locallow_show,
+ },
{
.name = "high",
.flags = CFTYPE_NOT_ON_ROOT,
@@ -4483,7 +4506,8 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root,
if (!root)
root = root_mem_cgroup;
- page_counter_calculate_protection(&root->memory, &memcg->memory, recursive_protection);
+ page_counter_calculate_protection(&root->memory, &memcg->memory,
+ recursive_protection, false);
}
static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg,
@@ -18,8 +18,10 @@ static bool track_protection(struct page_counter *c)
return c->protection_support;
}
+extern unsigned long get_cgroup_local_usage(struct mem_cgroup *memcg, bool flush);
+
static void propagate_protected_usage(struct page_counter *c,
- unsigned long usage)
+ unsigned long usage, unsigned long local_usage)
{
unsigned long protected, old_protected;
long delta;
@@ -44,6 +46,15 @@ static void propagate_protected_usage(struct page_counter *c,
if (delta)
atomic_long_add(delta, &c->parent->children_low_usage);
}
+
+ protected = min(local_usage, READ_ONCE(c->locallow));
+ old_protected = atomic_long_read(&c->locallow_usage);
+ if (protected != old_protected) {
+ old_protected = atomic_long_xchg(&c->locallow_usage, protected);
+ delta = protected - old_protected;
+ if (delta)
+ atomic_long_add(delta, &c->parent->children_locallow_usage);
+ }
}
/**
@@ -63,7 +74,8 @@ void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
atomic_long_set(&counter->usage, new);
}
if (track_protection(counter))
- propagate_protected_usage(counter, new);
+ propagate_protected_usage(counter, new,
+ get_cgroup_local_usage(counter->memcg, false));
}
/**
@@ -83,7 +95,8 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
new = atomic_long_add_return(nr_pages, &c->usage);
if (protection)
- propagate_protected_usage(c, new);
+ propagate_protected_usage(c, new,
+ get_cgroup_local_usage(counter->memcg, false));
/*
* This is indeed racy, but we can live with some
* inaccuracy in the watermark.
@@ -151,7 +164,8 @@ bool page_counter_try_charge(struct page_counter *counter,
goto failed;
}
if (protection)
- propagate_protected_usage(c, new);
+ propagate_protected_usage(c, new,
+ get_cgroup_local_usage(counter->memcg, false));
/* see comment on page_counter_charge */
if (new > READ_ONCE(c->local_watermark)) {
@@ -238,7 +252,8 @@ void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages)
WRITE_ONCE(counter->min, nr_pages);
for (c = counter; c; c = c->parent)
- propagate_protected_usage(c, atomic_long_read(&c->usage));
+ propagate_protected_usage(c, atomic_long_read(&c->usage),
+ get_cgroup_local_usage(counter->memcg, false));
}
/**
@@ -248,14 +263,17 @@ void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages)
*
* The caller must serialize invocations on the same counter.
*/
-void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages)
+void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages,
+ unsigned long nr_pages_local)
{
struct page_counter *c;
WRITE_ONCE(counter->low, nr_pages);
+ WRITE_ONCE(counter->locallow, nr_pages_local);
for (c = counter; c; c = c->parent)
- propagate_protected_usage(c, atomic_long_read(&c->usage));
+ propagate_protected_usage(c, atomic_long_read(&c->usage),
+ get_cgroup_local_usage(counter->memcg, false));
}
/**
@@ -421,9 +439,9 @@ static unsigned long effective_protection(unsigned long usage,
*/
void page_counter_calculate_protection(struct page_counter *root,
struct page_counter *counter,
- bool recursive_protection)
+ bool recursive_protection, int is_local)
{
- unsigned long usage, parent_usage;
+ unsigned long usage, parent_usage, local_usage, parent_local_usage;
struct page_counter *parent = counter->parent;
/*
@@ -437,16 +455,19 @@ void page_counter_calculate_protection(struct page_counter *root,
return;
usage = page_counter_read(counter);
- if (!usage)
+ local_usage = get_cgroup_local_usage(counter->memcg, true);
+ if (!usage || !local_usage)
return;
if (parent == root) {
counter->emin = READ_ONCE(counter->min);
counter->elow = READ_ONCE(counter->low);
+ counter->elocallow = READ_ONCE(counter->locallow);
return;
}
parent_usage = page_counter_read(parent);
+ parent_local_usage = get_cgroup_local_usage(parent->memcg, true);
WRITE_ONCE(counter->emin, effective_protection(usage, parent_usage,
READ_ONCE(counter->min),
@@ -454,7 +475,16 @@ void page_counter_calculate_protection(struct page_counter *root,
atomic_long_read(&parent->children_min_usage),
recursive_protection));
- WRITE_ONCE(counter->elow, effective_protection(usage, parent_usage,
+ if (is_local)
+ WRITE_ONCE(counter->elocallow,
+ effective_protection(local_usage, parent_local_usage,
+ READ_ONCE(counter->locallow),
+ READ_ONCE(parent->elocallow),
+ atomic_long_read(&parent->children_locallow_usage),
+ recursive_protection));
+ else
+ WRITE_ONCE(counter->elow,
+ effective_protection(usage, parent_usage,
READ_ONCE(counter->low),
READ_ONCE(parent->elow),
atomic_long_read(&parent->children_low_usage),