@@ -194,6 +194,7 @@ read the file /proc/PID/status:
CapBnd: ffffffffffffffff
NoNewPrivs: 0
Seccomp: 0
+ ModulesAutoloadMode: 0
voluntary_ctxt_switches: 0
nonvoluntary_ctxt_switches: 1
@@ -267,6 +268,8 @@ Table 1-2: Contents of the status files (as of 4.8)
CapBnd bitmap of capabilities bounding set
NoNewPrivs no_new_privs, like prctl(PR_GET_NO_NEW_PRIV, ...)
Seccomp seccomp mode, like prctl(PR_GET_SECCOMP, ...)
+ ModulesAutoloadMode modules auto-load mode, like
+ prctl(PR_GET_MODULES_AUTOLOAD_MODE, ...)
Cpus_allowed mask of CPUs on which this process may run
Cpus_allowed_list Same as previous, but in "list format"
Mems_allowed mask of memory nodes allowed to this process
@@ -17,6 +17,7 @@ place where this information is gathered.
:maxdepth: 2
no_new_privs
+ modules_autoload_mode
seccomp_filter
unshare
new file mode 100644
@@ -0,0 +1,115 @@
+======================================
+Per-task module auto-load restrictions
+======================================
+
+
+Introduction
+============
+
+Usually a request to a kernel feature that is implemented by a module
+that is not loaded may trigger automatic module loading feature, allowing
+to transparently satisfy userspace, and provide numerous other features
+as they are needed. In this case an implicit kernel module load
+operation happens.
+
+In most cases to load or unload a kernel module, an explicit operation
+happens where programs are required to have ``CAP_SYS_MODULE`` capability
+to perform so. However, with implicit module loading, no capabilities are
+required, or only ``CAP_NET_ADMIN`` in rare cases where the module has the
+'netdev-%s' alias. Historically this was always the case as automatic
+module loading is one of the most important and transparent operations
+of Linux, users expect that their programs just work, yet, recent cases
+showed that this can be abused by unprivileged users or attackers to load
+modules that were not updated, or modules that contain bugs and
+vulnerabilities.
+
+Currently most of Linux code is in a form of modules, hence, allowing to
+control automatic module loading in some cases is as important as the
+operation itself, especially in the context where Linux is used in
+different appliances.
+
+Restricting automatic module loading allows administratros to have the
+appropriate time to update or deny module autoloading in advance. In a
+container or sandbox world where apps can be moved from one context to
+another, the ability to restrict some containers or apps to load extra
+kernel modules will prevent exposing some kernel interfaces that may not
+receive the same care as some other parts of the core. The DCCP vulnerability
+CVE-2017-6074 that can be triggered by unprivileged, or CVE-2017-7184
+in the XFRM framework are some real examples where users or programs are
+able to expose such kernel interfaces and escape their sandbox.
+
+The per-task ``modules_autoload_mode`` allow to restrict automatic module
+loading per task, preventing the kernel from exposing more of its
+interface. This is particularly useful for containers and sandboxes as
+noted above, they are restricted from affecting the rest of the system
+without affecting its functionality, automatic module loading is still
+available for others.
+
+
+Usage
+=====
+
+When the kernel is compiled with modules support ``CONFIG_MODULES``, then:
+
+``PR_SET_MODULES_AUTOLOAD_MODE``:
+ Set the current task ``modules_autoload_mode``. When a module
+ auto-load request is triggered by current task, then the
+ operation has first to satisfy the per-task access mode before
+ attempting to implicitly load the module. As an example,
+ automatic loading of modules that contain bugs or vulnerabilities
+ can be restricted and unprivileged users can no longer abuse such
+ interfaces. Once set, this setting is inherited across ``fork(2)``,
+ ``clone(2)`` and ``execve(2)``.
+
+ Prior to use, the task must call ``prctl(PR_SET_NO_NEW_PRIVS, 1)``
+ or run with ``CAP_SYS_ADMIN`` privileges in its namespace. If
+ these are not true, ``-EACCES`` will be returned. This requirement
+ ensures that unprivileged programs cannot affect the behaviour or
+ surprise privileged children.
+
+ Usage:
+ ``prctl(PR_SET_MODULES_AUTOLOAD_MODE, mode, 0, 0, 0);``
+
+ The 'mode' argument supports the following values:
+ 0 There are no restrictions, usually the default unless set
+ by parent.
+ 1 The task must have ``CAP_SYS_MODULE`` to be able to trigger a
+ module auto-load operation, or ``CAP_NET_ADMIN`` for modules
+ with a 'netdev-%s' alias.
+ 2 Automatic modules loading is disabled for the current task.
+
+ The mode may only be increased, never decreased, thus ensuring
+ that once applied, processes can never relax their setting.
+
+
+ Returned values:
+ 0 On success.
+ ``-EINVAL`` If 'mode' is not valid, or the operation is not
+ supported.
+ ``-EACCES`` If task does not have ``CAP_SYS_ADMIN`` in its namespace
+ or is not running with ``no_new_privs``.
+ ``-EPERM`` If 'mode' is less strict than current task
+ ``modules_autoload_mode``.
+
+
+ Note that even if the per-task ``modules_autoload_mode`` allows to
+ auto-load the corresponding modules, automatic module loading
+ may still fail due to the global sysctl ``modules_autoload_mode``.
+ For more details please see Documentation/sysctl/kernel.txt,
+ section "modules_autoload_mode".
+
+
+ When a request to a kernel module is denied, the module name with the
+ corresponding process name and its pid are logged. Administrators can
+ use such information to explicitly load the appropriate modules.
+
+
+``PR_GET_MODULES_AUTOLOAD_MODE``:
+ Return the current task ``modules_autoload_mode``.
+
+ Usage:
+ ``prctl(PR_GET_MODULES_AUTOLOAD_MODE, 0, 0, 0, 0);``
+
+ Returned values:
+ mode The task's ``modules_autoload_mode``
+ ``-ENOSYS`` If the kernel was compiled without ``CONFIG_MODULES``.
@@ -88,6 +88,7 @@
#include <linux/string_helpers.h>
#include <linux/user_namespace.h>
#include <linux/fs_struct.h>
+#include <linux/module.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -346,10 +347,15 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
{
+ int autoload = task_modules_autoload_mode(p);
+
seq_put_decimal_ull(m, "NoNewPrivs:\t", task_no_new_privs(p));
#ifdef CONFIG_SECCOMP
seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
#endif
+ if (autoload != -ENOSYS)
+ seq_put_decimal_ull(m, "\nModulesAutoloadMode:\t", autoload);
+
seq_putc(m, '\n');
}
@@ -159,6 +159,13 @@ extern struct cred init_cred;
# define INIT_CGROUP_SCHED(tsk)
#endif
+#ifdef CONFIG_MODULES
+# define INIT_MODULES_AUTOLOAD_MODE(tsk) \
+ .modules_autoload_mode = 0,
+#else
+# define INIT_MODULES_AUTOLOAD_MODE(tsk)
+#endif
+
#ifdef CONFIG_PERF_EVENTS
# define INIT_PERF_EVENTS(tsk) \
.perf_event_mutex = \
@@ -257,6 +264,7 @@ extern struct cred init_cred;
.tasks = LIST_HEAD_INIT(tsk.tasks), \
INIT_PUSHABLE_TASKS(tsk) \
INIT_CGROUP_SCHED(tsk) \
+ INIT_MODULES_AUTOLOAD_MODE(tsk) \
.ptraced = LIST_HEAD_INIT(tsk.ptraced), \
.ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \
.real_parent = &tsk, \
@@ -13,6 +13,7 @@
#include <linux/kmod.h>
#include <linux/init.h>
#include <linux/elf.h>
+#include <linux/sched.h>
#include <linux/stringify.h>
#include <linux/kobject.h>
#include <linux/moduleparam.h>
@@ -507,7 +508,16 @@ bool is_module_percpu_address(unsigned long addr);
bool is_module_text_address(unsigned long addr);
/* Determine whether a module auto-load operation is permitted. */
-int may_autoload_module(char *kmod_name, int allow_cap);
+int may_autoload_module(struct task_struct *task, char *kmod_name, int allow_cap);
+
+/* Set modules_autoload_mode of current task */
+int task_set_modules_autoload_mode(unsigned long value);
+
+/* Read task's modules_autoload_mode */
+static inline int task_modules_autoload_mode(struct task_struct *task)
+{
+ return task->modules_autoload_mode;
+}
static inline bool within_module_core(unsigned long addr,
const struct module *mod)
@@ -653,11 +663,23 @@ static inline bool is_livepatch_module(struct module *mod)
#else /* !CONFIG_MODULES... */
-static inline int may_autoload_module(char *kmod_name, int allow_cap)
+static inline int may_autoload_module(struct task_struct *task, char *kmod_name,
+ int allow_cap)
{
return -ENOSYS;
}
+int task_set_modules_autoload_mode(unsigned long value)
+{
+ return -ENOSYS;
+}
+
+static inline int task_modules_autoload_mode(struct task_struct *task)
+{
+ return -ENOSYS;
+}
+
+static inline bool within_module_core(unsigned long addr,
static inline struct module *__module_address(unsigned long addr)
{
return NULL;
@@ -613,6 +613,11 @@ struct task_struct {
struct restart_block restart_block;
+#ifdef CONFIG_MODULES
+ /* per-task modules auto-load mode */
+ unsigned modules_autoload_mode:2;
+#endif
+
pid_t pid;
pid_t tgid;
@@ -197,4 +197,12 @@ struct prctl_mm_map {
# define PR_CAP_AMBIENT_LOWER 3
# define PR_CAP_AMBIENT_CLEAR_ALL 4
+/*
+ * Control the per-task modules auto-load mode
+ *
+ * See Documentation/prctl/modules_autoload_mode.txt for more details.
+ */
+#define PR_SET_MODULES_AUTOLOAD_MODE 48
+#define PR_GET_MODULES_AUTOLOAD_MODE 49
+
#endif /* _LINUX_PRCTL_H */
@@ -4301,12 +4301,15 @@ EXPORT_SYMBOL_GPL(__module_text_address);
/**
* may_autoload_module - Determine whether a module auto-load operation
* is permitted
+ * @task: The task performing the request
* @kmod_name: The module name
* @allow_cap: if positive, may allow to auto-load the module if this capability
* is set
*
- * Determine whether a module auto-load operation is allowed or not. The check
- * uses the sysctl "modules_autoload_mode" value.
+ * Determine whether a module auto-load operation is allowed or not. First we
+ * check if the task is allowed to perform the module auto-load request, we
+ * check per-task "modules_autoload_mode", if the access is not denied, then
+ * we check the global sysctl "modules_autoload_mode".
*
* This allows to have more control on automatic module loading, and align it
* with explicit load/unload module operations. The kernel contains several
@@ -4323,11 +4326,14 @@ EXPORT_SYMBOL_GPL(__module_text_address);
*
* Returns 0 if the module request is allowed or -EPERM if not.
*/
-int may_autoload_module(char *kmod_name, int allow_cap)
+int may_autoload_module(struct task_struct *task, char *kmod_name, int allow_cap)
{
- if (modules_autoload_mode == MODULES_AUTOLOAD_ALLOWED)
+ unsigned int autoload = max_t(unsigned int, modules_autoload_mode,
+ task->modules_autoload_mode);
+
+ if (autoload == MODULES_AUTOLOAD_ALLOWED)
return 0;
- else if (modules_autoload_mode == MODULES_AUTOLOAD_PRIVILEGED) {
+ else if (autoload == MODULES_AUTOLOAD_PRIVILEGED) {
/* Check CAP_SYS_MODULE then allow_cap if valid */
if (capable(CAP_SYS_MODULE) ||
(allow_cap > 0 && capable(allow_cap)))
@@ -4338,6 +4344,51 @@ int may_autoload_module(char *kmod_name, int allow_cap)
return -EPERM;
}
+/**
+ * task_set_modules_autoload_mode - Set per-task modules auto-load mode
+ * @value: Value to set "modules_autoload_mode" of current task
+ *
+ * Set current task "modules_autoload_mode". The task has to have
+ * CAP_SYS_ADMIN in its namespace or be running with no_new_privs. This
+ * avoids scenarios where unprivileged tasks can affect the behaviour of
+ * privilged children by restricting module features.
+ *
+ * The task's "modules_autoload_mode" may only be increased, never decreased.
+ *
+ * Returns 0 on success, -EINVAL if @value is not valid, -EACCES if task does
+ * not have CAP_SYS_ADMIN in its namespace or is not running with no_new_privs,
+ * and finally -EPERM if @value is less strict than current task
+ * "modules_autoload_mode".
+ *
+ */
+int task_set_modules_autoload_mode(unsigned long value)
+{
+ if (value > MODULES_AUTOLOAD_DISABLED)
+ return -EINVAL;
+
+ /*
+ * To set task "modules_autoload_mode" requires that the task has
+ * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
+ * This avoids scenarios where unprivileged tasks can affect the
+ * behaviour of privileged children by restricting module features.
+ */
+ if (!task_no_new_privs(current) &&
+ security_capable_noaudit(current_cred(), current_user_ns(),
+ CAP_SYS_ADMIN) != 0)
+ return -EACCES;
+
+ /*
+ * The "modules_autoload_mode" may only be increased, never decreased,
+ * ensuring that once applied, processes can never relax their settings.
+ */
+ if (current->modules_autoload_mode > value)
+ return -EPERM;
+ else if (current->modules_autoload_mode < value)
+ current->modules_autoload_mode = value;
+
+ return 0;
+}
+
/* Don't grab lock, we're oopsing. */
void print_modules(void)
{
@@ -886,6 +886,36 @@ static int cap_prctl_drop(unsigned long cap)
return commit_creds(new);
}
+/*
+ * Implement PR_SET_MODULES_AUTOLOAD_MODE.
+ *
+ * Returns 0 on success, -ve on error.
+ */
+static int pr_set_modules_autoload_mode(unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5)
+{
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+
+ return task_set_modules_autoload_mode(arg2);
+}
+
+/*
+ * Implement PR_GET_MODULES_AUTOLOAD_MODE.
+ *
+ * Return current task "modules_autoload_mode", -ve on error.
+ */
+static inline int pr_get_modules_autoload_mode(unsigned long arg2,
+ unsigned long arg3,
+ unsigned long arg4,
+ unsigned long arg5)
+{
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+
+ return task_modules_autoload_mode(current);
+}
+
/**
* cap_task_prctl - Implement process control functions for this security module
* @option: The process control function requested
@@ -1016,6 +1046,12 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
return commit_creds(new);
}
+ case PR_SET_MODULES_AUTOLOAD_MODE:
+ return pr_set_modules_autoload_mode(arg2, arg3, arg4, arg5);
+
+ case PR_GET_MODULES_AUTOLOAD_MODE:
+ return pr_get_modules_autoload_mode(arg2, arg3, arg4, arg5);
+
default:
/* No functionality available - continue with default */
return -ENOSYS;
@@ -1083,7 +1119,7 @@ int cap_kernel_module_request(char *kmod_name, int allow_cap)
int ret;
char comm[sizeof(current->comm)];
- ret = may_autoload_module(kmod_name, allow_cap);
+ ret = may_autoload_module(current, kmod_name, allow_cap);
if (ret < 0)
pr_notice_ratelimited(
"module: automatic module loading of %.64s by \"%s\"[%d] was denied\n",