@@ -20,6 +20,7 @@
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
#include <linux/close_range.h>
+#include <linux/misc_cgroup.h>
#include <net/sock.h>
#include "internal.h"
@@ -318,6 +319,45 @@ static unsigned int sane_fdtable_size(struct fdtable *fdt, unsigned int max_fds)
return ALIGN(min(count, max_fds), BITS_PER_LONG);
}
+#ifdef CONFIG_CGROUP_MISC
+static int charge_current_fds(struct files_struct *files, unsigned int count)
+{
+ return misc_cg_try_charge(MISC_CG_RES_NOFILE, files->mcg, count);
+}
+
+static void uncharge_current_fds(struct files_struct *files, unsigned int count)
+{
+ misc_cg_uncharge(MISC_CG_RES_NOFILE, files->mcg, count);
+}
+
+static void files_get_misc_cg(struct files_struct *newf)
+{
+ newf->mcg = get_current_misc_cg();
+}
+
+static void files_put_misc_cg(struct files_struct *newf)
+{
+ put_misc_cg(newf->mcg);
+}
+#else
+static int charge_current_fds(struct files_struct *files, unsigned int count)
+{
+ return 0;
+}
+
+static void uncharge_current_fds(struct files_struct *files, unsigned int count)
+{
+}
+
+static void files_get_misc_cg(struct files_struct *newf)
+{
+}
+
+static void files_put_misc_cg(struct files_struct *newf)
+{
+}
+#endif
+
/*
* Allocate a new files structure and copy contents from the
* passed in files structure.
@@ -341,6 +381,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
newf->resize_in_progress = false;
init_waitqueue_head(&newf->resize_wait);
newf->next_fd = 0;
+ files_get_misc_cg(newf);
new_fdt = &newf->fdtab;
new_fdt->max_fds = NR_OPEN_DEFAULT;
new_fdt->close_on_exec = newf->close_on_exec_init;
@@ -350,6 +391,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
spin_lock(&oldf->file_lock);
old_fdt = files_fdtable(oldf);
+
open_files = sane_fdtable_size(old_fdt, max_fds);
/*
@@ -411,9 +453,22 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int
rcu_assign_pointer(newf->fdt, new_fdt);
- return newf;
+ if (!charge_current_fds(newf, count_open_files(new_fdt)))
+ return newf;
+
+ new_fds = new_fdt->fd;
+ for (i = open_files; i != 0; i--) {
+ struct file *f = *new_fds++;
+
+ if (f)
+ fput(f);
+ }
+ if (new_fdt != &newf->fdtab)
+ __free_fdtable(new_fdt);
+ *errorp = -EMFILE;
out_release:
+ files_put_misc_cg(newf);
kmem_cache_free(files_cachep, newf);
out:
return NULL;
@@ -439,6 +494,7 @@ static struct fdtable *close_files(struct files_struct * files)
if (set & 1) {
struct file * file = xchg(&fdt->fd[i], NULL);
if (file) {
+ uncharge_current_fds(files, 1);
filp_close(file, files);
cond_resched();
}
@@ -448,6 +504,8 @@ static struct fdtable *close_files(struct files_struct * files)
}
}
+ files_put_misc_cg(files);
+
return fdt;
}
@@ -542,6 +600,10 @@ static int alloc_fd(unsigned start, unsigned end, unsigned flags)
if (error)
goto repeat;
+ error = -EMFILE;
+ if (charge_current_fds(files, 1) < 0)
+ goto out;
+
if (start <= files->next_fd)
files->next_fd = fd + 1;
@@ -578,6 +640,8 @@ EXPORT_SYMBOL(get_unused_fd_flags);
static void __put_unused_fd(struct files_struct *files, unsigned int fd)
{
struct fdtable *fdt = files_fdtable(files);
+ if (test_bit(fd, fdt->open_fds))
+ uncharge_current_fds(files, 1);
__clear_open_fd(fd, fdt);
if (fd < files->next_fd)
files->next_fd = fd;
@@ -1248,7 +1312,7 @@ __releases(&files->file_lock)
*/
fdt = files_fdtable(files);
tofree = fdt->fd[fd];
- if (!tofree && fd_is_open(fd, fdt))
+ if (!tofree && (fd_is_open(fd, fdt) || charge_current_fds(files, 1) < 0))
goto Ebusy;
get_file(file);
rcu_assign_pointer(fdt->fd[fd], file);
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include <linux/init.h>
#include <linux/fs.h>
+#include <linux/misc_cgroup.h>
#include <linux/atomic.h>
@@ -65,6 +66,9 @@ struct files_struct {
unsigned long open_fds_init[1];
unsigned long full_fds_bits_init[1];
struct file __rcu * fd_array[NR_OPEN_DEFAULT];
+#ifdef CONFIG_CGROUP_MISC
+ struct misc_cg *mcg;
+#endif
};
struct file_operations;
@@ -18,6 +18,7 @@ enum misc_res_type {
/* AMD SEV-ES ASIDs resource */
MISC_CG_RES_SEV_ES,
#endif
+ MISC_CG_RES_NOFILE,
MISC_CG_RES_TYPES
};
@@ -12,6 +12,8 @@
#include <linux/atomic.h>
#include <linux/slab.h>
#include <linux/misc_cgroup.h>
+#include <linux/mm.h>
+#include <linux/fdtable.h>
#define MAX_STR "max"
#define MAX_NUM U64_MAX
@@ -24,6 +26,7 @@ static const char *const misc_res_name[] = {
/* AMD SEV-ES ASIDs resource */
"sev_es",
#endif
+ "nofile",
};
/* Root misc cgroup */
@@ -37,7 +40,9 @@ static struct misc_cg root_cg;
* more than the actual capacity. We are using Limits resource distribution
* model of cgroup for miscellaneous controller.
*/
-static u64 misc_res_capacity[MISC_CG_RES_TYPES];
+static u64 misc_res_capacity[MISC_CG_RES_TYPES] = {
+ [MISC_CG_RES_NOFILE] = MAX_NUM,
+};
/**
* parent_misc() - Get the parent of the passed misc cgroup.
@@ -445,10 +450,203 @@ static void misc_cg_free(struct cgroup_subsys_state *css)
kfree(css_misc(css));
}
+static void revert_attach_until(struct cgroup_taskset *tset, struct task_struct *stop)
+{
+ struct task_struct *task;
+ struct cgroup_subsys_state *dst_css;
+
+ cgroup_taskset_for_each(task, dst_css, tset) {
+ struct misc_cg *misc, *old_misc;
+ struct cgroup_subsys_state *old_css;
+ struct files_struct *files;
+ struct fdtable *fdt;
+ unsigned long nofile;
+
+ if (task == stop)
+ break;
+
+ misc = css_misc(dst_css);
+ old_css = task_css(task, misc_cgrp_id);
+ old_misc = css_misc(old_css);
+
+ if (misc == old_misc)
+ continue;
+
+ task_lock(task);
+ files = task->files;
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+
+ if (old_misc == files->mcg)
+ goto done;
+
+ WARN_ON_ONCE(misc != files->mcg);
+
+ nofile = count_open_files(fdt);
+ misc_cg_charge(MISC_CG_RES_NOFILE, old_misc, nofile);
+ misc_cg_uncharge(MISC_CG_RES_NOFILE, misc, nofile);
+
+ put_misc_cg(files->mcg);
+ css_get(old_css);
+ files->mcg = old_misc;
+
+done:
+ spin_unlock(&files->file_lock);
+ task_unlock(task);
+ }
+}
+
+static int misc_cg_can_attach(struct cgroup_taskset *tset)
+{
+ struct task_struct *task;
+ struct cgroup_subsys_state *dst_css;
+
+ cgroup_taskset_for_each(task, dst_css, tset) {
+ struct misc_cg *misc, *old_misc;
+ struct cgroup_subsys_state *old_css;
+ unsigned long nofile;
+ struct files_struct *files;
+ struct fdtable *fdt;
+ int ret;
+
+ misc = css_misc(dst_css);
+ old_css = task_css(task, misc_cgrp_id);
+ old_misc = css_misc(old_css);
+
+ if (misc == old_misc)
+ continue;
+
+ task_lock(task);
+ files = task->files;
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+
+ /*
+ * If this task->files was already in the right place (either
+ * because of dup_fd() or because some other thread had already
+ * migrated it), we don't need to do anything.
+ */
+ if (misc == files->mcg)
+ goto done;
+
+ WARN_ON_ONCE(old_misc != files->mcg);
+
+ nofile = count_open_files(fdt);
+ ret = misc_cg_try_charge(MISC_CG_RES_NOFILE, misc, nofile);
+ if (ret < 0) {
+ spin_unlock(&files->file_lock);
+ task_unlock(task);
+ revert_attach_until(tset, task);
+ return ret;
+ }
+ misc_cg_uncharge(MISC_CG_RES_NOFILE, old_misc, nofile);
+
+ /*
+ * let's ref the new table, install it, and
+ * deref the old one.
+ */
+ put_misc_cg(files->mcg);
+ css_get(dst_css);
+ files->mcg = misc;
+
+done:
+ spin_unlock(&files->file_lock);
+ task_unlock(task);
+
+ }
+
+ return 0;
+}
+
+static void misc_cg_cancel_attach(struct cgroup_taskset *tset)
+{
+ revert_attach_until(tset, NULL);
+}
+
+static int misc_cg_can_fork(struct task_struct *task, struct css_set *cset)
+{
+ struct misc_cg *dst_misc, *init_misc;
+ struct files_struct *files;
+ struct fdtable *fdt;
+ unsigned long nofile;
+ struct cgroup_subsys_state *dst_css, *cur_css;
+ int ret;
+
+ init_misc = css_misc(init_css_set.subsys[misc_cgrp_id]);
+ cur_css = task_get_css(task, misc_cgrp_id);
+
+ WARN_ON_ONCE(init_misc != css_misc(cur_css));
+
+ dst_css = cset->subsys[misc_cgrp_id];
+ dst_misc = css_misc(dst_css);
+
+ /*
+ * When forking, tasks are initially put into the init_css_set (see
+ * cgroup_fork()). Then, we do a dup_fd() and charge init_css_set for
+ * the new task's fds. We need to migrate from the init_css_set to the
+ * target one so we can charge the right place.
+ */
+ task_lock(task);
+ files = task->files;
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+
+ ret = 0;
+ if (files->mcg == dst_misc)
+ goto out;
+
+ nofile = count_open_files(fdt);
+ ret = misc_cg_try_charge(MISC_CG_RES_NOFILE, dst_misc, nofile);
+ if (ret < 0)
+ goto out;
+
+ misc_cg_uncharge(MISC_CG_RES_NOFILE, init_misc, nofile);
+
+ put_misc_cg(files->mcg);
+ css_get(dst_css);
+ files->mcg = dst_misc;
+ ret = 0;
+
+out:
+ spin_unlock(&files->file_lock);
+ task_unlock(task);
+
+ return ret;
+}
+
+static void misc_cg_cancel_fork(struct task_struct *task, struct css_set *cset)
+{
+ struct misc_cg *dst_misc;
+ struct files_struct *files;
+ struct fdtable *fdt;
+ unsigned long nofile;
+ struct cgroup_subsys_state *dst_css;
+
+ dst_css = cset->subsys[misc_cgrp_id];
+ dst_misc = css_misc(dst_css);
+
+ task_lock(task);
+ files = task->files;
+ spin_lock(&files->file_lock);
+ fdt = files_fdtable(files);
+
+ /*
+ * we don't need to re-charge anyone, since this fork is going away.
+ */
+ nofile = count_open_files(fdt);
+ misc_cg_uncharge(MISC_CG_RES_NOFILE, dst_misc, nofile);
+ spin_unlock(&files->file_lock);
+ task_unlock(task);
+}
+
/* Cgroup controller callbacks */
struct cgroup_subsys misc_cgrp_subsys = {
.css_alloc = misc_cg_alloc,
.css_free = misc_cg_free,
.legacy_cftypes = misc_cg_files,
.dfl_cftypes = misc_cg_files,
+ .can_attach = misc_cg_can_attach,
+ .cancel_attach = misc_cg_cancel_attach,
+ .can_fork = misc_cg_can_fork,
+ .cancel_fork = misc_cg_cancel_fork,
};