@@ -1144,7 +1144,7 @@ static void blkcg_rstat_flush(struct cgroup_subsys_state *css, int cpu)
/*
* We source root cgroup stats from the system-wide stats to avoid
* tracking the same information twice and incurring overhead when no
- * cgroups are defined. For that reason, cgroup_rstat_flush in
+ * cgroups are defined. For that reason, css_rstat_flush in
* blkcg_print_stat does not actually fill out the iostat in the root
* cgroup's blkcg_gq.
*
@@ -1253,7 +1253,7 @@ static int blkcg_print_stat(struct seq_file *sf, void *v)
if (!seq_css(sf)->parent)
blkcg_fill_root_iostats();
else
- cgroup_rstat_flush(blkcg->css.cgroup);
+ css_rstat_flush(&blkcg->css);
rcu_read_lock();
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
@@ -2243,7 +2243,7 @@ void blk_cgroup_bio_start(struct bio *bio)
}
u64_stats_update_end_irqrestore(&bis->sync, flags);
- cgroup_rstat_updated(blkcg->css.cgroup, cpu);
+ css_rstat_updated(&blkcg->css, cpu);
put_cpu();
}
@@ -536,7 +536,7 @@ struct cgroup {
/*
* A singly-linked list of cgroup structures to be rstat flushed.
* This is a scratch field to be used exclusively by
- * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock.
+ * css_rstat_flush_locked() and protected by cgroup_rstat_lock.
*/
struct cgroup *rstat_flush_next;
@@ -693,8 +693,8 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
/*
* cgroup scalable recursive statistics.
*/
-void cgroup_rstat_updated(struct cgroup *cgrp, int cpu);
-void cgroup_rstat_flush(struct cgroup *cgrp);
+void css_rstat_updated(struct cgroup_subsys_state *css, int cpu);
+void css_rstat_flush(struct cgroup_subsys_state *css);
/*
* Basic resource stats.
@@ -270,8 +270,8 @@ int cgroup_task_count(const struct cgroup *cgrp);
/*
* rstat.c
*/
-int cgroup_rstat_init(struct cgroup *cgrp);
-void cgroup_rstat_exit(struct cgroup *cgrp);
+int css_rstat_init(struct cgroup_subsys_state *css);
+void css_rstat_exit(struct cgroup_subsys_state *css);
void cgroup_rstat_boot(void);
void cgroup_base_stat_cputime_show(struct seq_file *seq);
@@ -1375,7 +1375,7 @@ static void cgroup_destroy_root(struct cgroup_root *root)
cgroup_unlock();
- cgroup_rstat_exit(cgrp);
+ css_rstat_exit(&cgrp->self);
kernfs_destroy_root(root->kf_root);
cgroup_free_root(root);
}
@@ -2150,7 +2150,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
if (ret)
goto destroy_root;
- ret = cgroup_rstat_init(root_cgrp);
+ ret = css_rstat_init(&root_cgrp->self);
if (ret)
goto destroy_root;
@@ -2192,7 +2192,7 @@ int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
goto out;
exit_stats:
- cgroup_rstat_exit(root_cgrp);
+ css_rstat_exit(&root_cgrp->self);
destroy_root:
kernfs_destroy_root(root->kf_root);
root->kf_root = NULL;
@@ -5449,7 +5449,7 @@ static void css_free_rwork_fn(struct work_struct *work)
cgroup_put(cgroup_parent(cgrp));
kernfs_put(cgrp->kn);
psi_cgroup_free(cgrp);
- cgroup_rstat_exit(cgrp);
+ css_rstat_exit(css);
kfree(cgrp);
} else {
/*
@@ -5479,7 +5479,7 @@ static void css_release_work_fn(struct work_struct *work)
/* css release path */
if (!list_empty(&css->rstat_css_node)) {
- cgroup_rstat_flush(cgrp);
+ css_rstat_flush(css);
list_del_rcu(&css->rstat_css_node);
}
@@ -5507,7 +5507,7 @@ static void css_release_work_fn(struct work_struct *work)
/* cgroup release path */
TRACE_CGROUP_PATH(release, cgrp);
- cgroup_rstat_flush(cgrp);
+ css_rstat_flush(css);
spin_lock_irq(&css_set_lock);
for (tcgrp = cgroup_parent(cgrp); tcgrp;
@@ -5700,17 +5700,13 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
if (ret)
goto out_free_cgrp;
- ret = cgroup_rstat_init(cgrp);
- if (ret)
- goto out_cancel_ref;
-
/* create the directory */
kn = kernfs_create_dir_ns(parent->kn, name, mode,
current_fsuid(), current_fsgid(),
cgrp, NULL);
if (IS_ERR(kn)) {
ret = PTR_ERR(kn);
- goto out_stat_exit;
+ goto out_cancel_ref;
}
cgrp->kn = kn;
@@ -5720,6 +5716,14 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
cgrp->root = root;
cgrp->level = level;
+ /*
+ * Now that init_cgroup_housekeeping() has been called and cgrp->self
+ * is setup, it is safe to perform rstat initialization on it.
+ */
+ ret = css_rstat_init(&cgrp->self);
+ if (ret)
+ goto out_stat_exit;
+
ret = psi_cgroup_alloc(cgrp);
if (ret)
goto out_kernfs_remove;
@@ -5790,10 +5794,10 @@ static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
out_psi_free:
psi_cgroup_free(cgrp);
+out_stat_exit:
+ css_rstat_exit(&cgrp->self);
out_kernfs_remove:
kernfs_remove(cgrp->kn);
-out_stat_exit:
- cgroup_rstat_exit(cgrp);
out_cancel_ref:
percpu_ref_exit(&cgrp->self.refcnt);
out_free_cgrp:
@@ -34,9 +34,10 @@ static struct cgroup_rstat_base_cpu *cgroup_rstat_base_cpu(
* operations without handling high-frequency fast-path "update" events.
*/
static __always_inline
-unsigned long _cgroup_rstat_cpu_lock(raw_spinlock_t *cpu_lock, int cpu,
- struct cgroup *cgrp, const bool fast_path)
+unsigned long _css_rstat_cpu_lock(raw_spinlock_t *cpu_lock, int cpu,
+ struct cgroup_subsys_state *css, const bool fast_path)
{
+ struct cgroup *cgrp = css->cgroup;
unsigned long flags;
bool contended;
@@ -67,10 +68,12 @@ unsigned long _cgroup_rstat_cpu_lock(raw_spinlock_t *cpu_lock, int cpu,
}
static __always_inline
-void _cgroup_rstat_cpu_unlock(raw_spinlock_t *cpu_lock, int cpu,
- struct cgroup *cgrp, unsigned long flags,
+void _css_rstat_cpu_unlock(raw_spinlock_t *cpu_lock, int cpu,
+ struct cgroup_subsys_state *css, unsigned long flags,
const bool fast_path)
{
+ struct cgroup *cgrp = css->cgroup;
+
if (fast_path)
trace_cgroup_rstat_cpu_unlock_fastpath(cgrp, cpu, false);
else
@@ -80,16 +83,17 @@ void _cgroup_rstat_cpu_unlock(raw_spinlock_t *cpu_lock, int cpu,
}
/**
- * cgroup_rstat_updated - keep track of updated rstat_cpu
- * @cgrp: target cgroup
+ * css_rstat_updated - keep track of updated rstat_cpu
+ * @css: target cgroup subsystem state
* @cpu: cpu on which rstat_cpu was updated
*
- * @cgrp's rstat_cpu on @cpu was updated. Put it on the parent's matching
- * rstat_cpu->updated_children list. See the comment on top of
+ * @css->cgroup's rstat_cpu on @cpu was updated. Put it on the parent's
+ * matching rstat_cpu->updated_children list. See the comment on top of
* cgroup_rstat_cpu definition for details.
*/
-__bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
+__bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
{
+ struct cgroup *cgrp = css->cgroup;
raw_spinlock_t *cpu_lock = per_cpu_ptr(&cgroup_rstat_cpu_lock, cpu);
unsigned long flags;
@@ -104,7 +108,7 @@ __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
if (data_race(cgroup_rstat_cpu(cgrp, cpu)->updated_next))
return;
- flags = _cgroup_rstat_cpu_lock(cpu_lock, cpu, cgrp, true);
+ flags = _css_rstat_cpu_lock(cpu_lock, cpu, css, true);
/* put @cgrp and all ancestors on the corresponding updated lists */
while (true) {
@@ -132,7 +136,7 @@ __bpf_kfunc void cgroup_rstat_updated(struct cgroup *cgrp, int cpu)
cgrp = parent;
}
- _cgroup_rstat_cpu_unlock(cpu_lock, cpu, cgrp, flags, true);
+ _css_rstat_cpu_unlock(cpu_lock, cpu, css, flags, true);
}
/**
@@ -213,7 +217,7 @@ static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
struct cgroup *head = NULL, *parent, *child;
unsigned long flags;
- flags = _cgroup_rstat_cpu_lock(cpu_lock, cpu, root, false);
+ flags = _css_rstat_cpu_lock(cpu_lock, cpu, &root->self, false);
/* Return NULL if this subtree is not on-list */
if (!rstatc->updated_next)
@@ -250,14 +254,14 @@ static struct cgroup *cgroup_rstat_updated_list(struct cgroup *root, int cpu)
if (child != root)
head = cgroup_rstat_push_children(head, child, cpu);
unlock_ret:
- _cgroup_rstat_cpu_unlock(cpu_lock, cpu, root, flags, false);
+ _css_rstat_cpu_unlock(cpu_lock, cpu, &root->self, flags, false);
return head;
}
/*
* A hook for bpf stat collectors to attach to and flush their stats.
- * Together with providing bpf kfuncs for cgroup_rstat_updated() and
- * cgroup_rstat_flush(), this enables a complete workflow where bpf progs that
+ * Together with providing bpf kfuncs for css_rstat_updated() and
+ * css_rstat_flush(), this enables a complete workflow where bpf progs that
* collect cgroup stats can integrate with rstat for efficient flushing.
*
* A static noinline declaration here could cause the compiler to optimize away
@@ -285,9 +289,11 @@ __bpf_hook_end();
* value -1 is used when obtaining the main lock else this is the CPU
* number processed last.
*/
-static inline void __cgroup_rstat_lock(struct cgroup *cgrp, int cpu_in_loop)
+static inline void __css_rstat_lock(struct cgroup_subsys_state *css,
+ int cpu_in_loop)
__acquires(&cgroup_rstat_lock)
{
+ struct cgroup *cgrp = css->cgroup;
bool contended;
contended = !spin_trylock_irq(&cgroup_rstat_lock);
@@ -298,36 +304,41 @@ static inline void __cgroup_rstat_lock(struct cgroup *cgrp, int cpu_in_loop)
trace_cgroup_rstat_locked(cgrp, cpu_in_loop, contended);
}
-static inline void __cgroup_rstat_unlock(struct cgroup *cgrp, int cpu_in_loop)
+static inline void __css_rstat_unlock(struct cgroup_subsys_state *css,
+ int cpu_in_loop)
__releases(&cgroup_rstat_lock)
{
+ struct cgroup *cgrp = css->cgroup;
+
trace_cgroup_rstat_unlock(cgrp, cpu_in_loop, false);
spin_unlock_irq(&cgroup_rstat_lock);
}
/**
- * cgroup_rstat_flush - flush stats in @cgrp's subtree
- * @cgrp: target cgroup
+ * css_rstat_flush - flush stats in @css->cgroup's subtree
+ * @css: target cgroup subsystem state
*
- * Collect all per-cpu stats in @cgrp's subtree into the global counters
+ * Collect all per-cpu stats in @css->cgroup's subtree into the global counters
* and propagate them upwards. After this function returns, all cgroups in
* the subtree have up-to-date ->stat.
*
- * This also gets all cgroups in the subtree including @cgrp off the
+ * This also gets all cgroups in the subtree including @css->cgroup off the
* ->updated_children lists.
*
* This function may block.
*/
-__bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp)
+__bpf_kfunc void css_rstat_flush(struct cgroup_subsys_state *css)
{
+ struct cgroup *cgrp = css->cgroup;
int cpu;
might_sleep();
for_each_possible_cpu(cpu) {
- struct cgroup *pos = cgroup_rstat_updated_list(cgrp, cpu);
+ struct cgroup *pos;
/* Reacquire for each CPU to avoid disabling IRQs too long */
- __cgroup_rstat_lock(cgrp, cpu);
+ __css_rstat_lock(css, cpu);
+ pos = cgroup_rstat_updated_list(cgrp, cpu);
for (; pos; pos = pos->rstat_flush_next) {
struct cgroup_subsys_state *css;
@@ -340,14 +351,15 @@ __bpf_kfunc void cgroup_rstat_flush(struct cgroup *cgrp)
css->ss->css_rstat_flush(css, cpu);
rcu_read_unlock();
}
- __cgroup_rstat_unlock(cgrp, cpu);
+ __css_rstat_unlock(css, cpu);
if (!cond_resched())
cpu_relax();
}
}
-int cgroup_rstat_init(struct cgroup *cgrp)
+int css_rstat_init(struct cgroup_subsys_state *css)
{
+ struct cgroup *cgrp = css->cgroup;
int cpu;
/* the root cgrp has rstat_cpu preallocated */
@@ -378,11 +390,12 @@ int cgroup_rstat_init(struct cgroup *cgrp)
return 0;
}
-void cgroup_rstat_exit(struct cgroup *cgrp)
+void css_rstat_exit(struct cgroup_subsys_state *css)
{
+ struct cgroup *cgrp = css->cgroup;
int cpu;
- cgroup_rstat_flush(cgrp);
+ css_rstat_flush(&cgrp->self);
/* sanity check */
for_each_possible_cpu(cpu) {
@@ -489,7 +502,7 @@ static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
unsigned long flags)
{
u64_stats_update_end_irqrestore(&rstatbc->bsync, flags);
- cgroup_rstat_updated(cgrp, smp_processor_id());
+ css_rstat_updated(&cgrp->self, smp_processor_id());
put_cpu_ptr(rstatbc);
}
@@ -591,12 +604,12 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
struct cgroup_base_stat bstat;
if (cgroup_parent(cgrp)) {
- cgroup_rstat_flush(cgrp);
- __cgroup_rstat_lock(cgrp, -1);
+ css_rstat_flush(&cgrp->self);
+ __css_rstat_lock(&cgrp->self, -1);
bstat = cgrp->bstat;
cputime_adjust(&cgrp->bstat.cputime, &cgrp->prev_cputime,
&bstat.cputime.utime, &bstat.cputime.stime);
- __cgroup_rstat_unlock(cgrp, -1);
+ __css_rstat_unlock(&cgrp->self, -1);
} else {
root_cgroup_cputime(&bstat);
}
@@ -618,10 +631,10 @@ void cgroup_base_stat_cputime_show(struct seq_file *seq)
cgroup_force_idle_show(seq, &bstat);
}
-/* Add bpf kfuncs for cgroup_rstat_updated() and cgroup_rstat_flush() */
+/* Add bpf kfuncs for css_rstat_updated() and css_rstat_flush() */
BTF_KFUNCS_START(bpf_rstat_kfunc_ids)
-BTF_ID_FLAGS(func, cgroup_rstat_updated)
-BTF_ID_FLAGS(func, cgroup_rstat_flush, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, css_rstat_updated)
+BTF_ID_FLAGS(func, css_rstat_flush, KF_SLEEPABLE)
BTF_KFUNCS_END(bpf_rstat_kfunc_ids)
static const struct btf_kfunc_id_set bpf_rstat_kfunc_set = {
@@ -582,7 +582,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, int val)
if (!val)
return;
- cgroup_rstat_updated(memcg->css.cgroup, cpu);
+ css_rstat_updated(&memcg->css, cpu);
statc = this_cpu_ptr(memcg->vmstats_percpu);
for (; statc; statc = statc->parent) {
stats_updates = READ_ONCE(statc->stats_updates) + abs(val);
@@ -614,7 +614,7 @@ static void __mem_cgroup_flush_stats(struct mem_cgroup *memcg, bool force)
if (mem_cgroup_is_root(memcg))
WRITE_ONCE(flush_last_time, jiffies_64);
- cgroup_rstat_flush(memcg->css.cgroup);
+ css_rstat_flush(&memcg->css);
}
/*
@@ -37,8 +37,9 @@ struct {
__type(value, struct attach_counter);
} attach_counters SEC(".maps");
-extern void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) __ksym;
-extern void cgroup_rstat_flush(struct cgroup *cgrp) __ksym;
+extern void css_rstat_updated(
+ struct cgroup_subsys_state *css, int cpu) __ksym;
+extern void css_rstat_flush(struct cgroup_subsys_state *css) __ksym;
static uint64_t cgroup_id(struct cgroup *cgrp)
{
@@ -75,7 +76,7 @@ int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader,
else if (create_percpu_attach_counter(cg_id, 1))
return 0;
- cgroup_rstat_updated(dst_cgrp, bpf_get_smp_processor_id());
+ css_rstat_updated(&dst_cgrp->self, bpf_get_smp_processor_id());
return 0;
}
@@ -141,7 +142,7 @@ int BPF_PROG(dumper, struct bpf_iter_meta *meta, struct cgroup *cgrp)
return 1;
/* Flush the stats to make sure we get the most updated numbers */
- cgroup_rstat_flush(cgrp);
+ css_rstat_flush(&cgrp->self);
total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id);
if (!total_counter) {
This non-functional change serves as preparation for moving to subsystem-based rstat trees. To simplify future commits, change the signatures of existing cgroup-based rstat functions to become css-based and rename them to reflect that. Though the signatures have changed, the implementations have not. Within these functions use the css->cgroup pointer to obtain the associated cgroup and allow code to function the same just as it did before this patch. At applicable call sites, pass the subsystem-specific css pointer as an argument or pass a pointer to cgroup::self if not in subsystem context. Note that cgroup_rstat_updated_list() and cgroup_rstat_push_children() are not altered yet since there would be a larger amount of css to cgroup conversions which may overcomplicate the code at this intermediate phase. Signed-off-by: JP Kobryn <inwardvessel@gmail.com> --- block/blk-cgroup.c | 6 +- include/linux/cgroup-defs.h | 2 +- include/linux/cgroup.h | 4 +- kernel/cgroup/cgroup-internal.h | 4 +- kernel/cgroup/cgroup.c | 30 ++++--- kernel/cgroup/rstat.c | 83 +++++++++++-------- mm/memcontrol.c | 4 +- .../bpf/progs/cgroup_hierarchical_stats.c | 9 +- 8 files changed, 80 insertions(+), 62 deletions(-)