@@ -195,6 +195,7 @@ read the file /proc/PID/status:
CapBnd: ffffffffffffffff
NoNewPrivs: 0
Seccomp: 0
+ ModulesAutoloadMode: 0
voluntary_ctxt_switches: 0
nonvoluntary_ctxt_switches: 1
@@ -269,6 +270,8 @@ Table 1-2: Contents of the status files (as of 4.8)
CapBnd bitmap of capabilities bounding set
NoNewPrivs no_new_privs, like prctl(PR_GET_NO_NEW_PRIV, ...)
Seccomp seccomp mode, like prctl(PR_GET_SECCOMP, ...)
+ ModulesAutoloadMode modules auto-load mode, like
+ prctl(PR_GET_MODULES_AUTOLOAD_MODE, ...)
Cpus_allowed mask of CPUs on which this process may run
Cpus_allowed_list Same as previous, but in "list format"
Mems_allowed mask of memory nodes allowed to this process
@@ -17,6 +17,7 @@ place where this information is gathered.
:maxdepth: 2
no_new_privs
+ modules_autoload_mode
seccomp_filter
unshare
new file mode 100644
@@ -0,0 +1,116 @@
+======================================
+Per-task module auto-load restrictions
+======================================
+
+
+Introduction
+============
+
+Usually a request to a kernel feature that is implemented by a module
+that is not loaded may trigger automatic module loading feature, allowing
+to transparently satisfy userspace, and provide numerous other features
+as they are needed. In this case an implicit kernel module load
+operation happens.
+
+In most cases to load or unload a kernel module, an explicit operation
+happens where programs are required to have ``CAP_SYS_MODULE`` capability
+to perform so. However, with implicit module loading, no capabilities are
+required, or only ``CAP_NET_ADMIN`` in rare cases where the module has the
+'netdev-%s' alias. Historically this was always the case as automatic
+module loading is one of the most important and transparent operations
+of Linux, users expect that their programs just work, yet, recent cases
+showed that this can be abused by unprivileged users or attackers to load
+modules that were not updated, or modules that contain bugs and
+vulnerabilities.
+
+Currently most of Linux code is in a form of modules, hence, allowing to
+control automatic module loading in some cases is as important as the
+operation itself, especially in the context where Linux is used in
+different appliances.
+
+Restricting automatic module loading allows administratros to have the
+appropriate time to update or deny module autoloading in advance. In a
+container or sandbox world where apps can be moved from one context to
+another, the ability to restrict some containers or apps to load extra
+kernel modules will prevent exposing some kernel interfaces that may not
+receive the same care as some other parts of the core. The DCCP vulnerability
+CVE-2017-6074 that can be triggered by unprivileged, or CVE-2017-7184
+in the XFRM framework are some real examples where users or programs are
+able to expose such kernel interfaces and escape their sandbox.
+
+The per-task ``modules_autoload_mode`` allow to restrict automatic module
+loading per task, preventing the kernel from exposing more of its
+interface. This is particularly useful for containers and sandboxes as
+noted above, they are restricted from affecting the rest of the system
+without affecting its functionality, automatic module loading is still
+available for others.
+
+
+Usage
+=====
+
+When the kernel is compiled with modules support ``CONFIG_MODULES``, then:
+
+``PR_SET_MODULES_AUTOLOAD_MODE``:
+ Set the current task ``modules_autoload_mode``. When a module
+ auto-load request is triggered by current task, then the
+ operation has first to satisfy the per-task access mode before
+ attempting to implicitly load the module. As an example,
+ automatic loading of modules that contain bugs or vulnerabilities
+ can be restricted and unprivileged users can no longer abuse such
+ interfaces. Once set, this setting is inherited across ``fork(2)``,
+ ``clone(2)`` and ``execve(2)``.
+
+ Prior to use, the task must call ``prctl(PR_SET_NO_NEW_PRIVS, 1)``
+ or run with ``CAP_SYS_ADMIN`` privileges in its namespace. If
+ these are not true, ``-EACCES`` will be returned. This requirement
+ ensures that unprivileged programs cannot affect the behaviour or
+ surprise privileged children.
+
+ Usage:
+ ``prctl(PR_SET_MODULES_AUTOLOAD_MODE, mode, 0, 0, 0);``
+
+ The 'mode' argument supports the following values:
+ 0 There are no restrictions, usually the default unless set
+ by parent.
+ 1 The task must have ``CAP_SYS_MODULE`` to be able to trigger a
+ module auto-load operation, or ``CAP_NET_ADMIN`` for modules
+ with a 'netdev-%s' alias.
+ 2 Automatic modules loading is disabled for the current task.
+
+ The mode may only be increased, never decreased, thus ensuring
+ that once applied, processes can never relax their setting.
+
+
+ Returned values:
+ 0 On success.
+ ``-EINVAL`` If 'mode' is not valid, or the operation is not
+ supported.
+ ``-EACCES`` If task does not have ``CAP_SYS_ADMIN`` in its namespace
+ or is not running with ``no_new_privs``.
+ ``-EPERM`` If 'mode' is less strict than current task
+ ``modules_autoload_mode``.
+
+
+ Note that even if the per-task ``modules_autoload_mode`` allows to
+ auto-load the corresponding modules, automatic module loading
+ may still fail due to the global sysctl ``modules_autoload_mode``.
+ The default mode of ``modules_autoload_mode`` is to always allow
+ automatic module loading. For more details, please see
+ Documentation/sysctl/kernel.txt, section "modules_autoload_mode".
+
+
+ When a request to a kernel module is denied, the module name with the
+ corresponding process name and its pid are logged. Administrators can
+ use such information to explicitly load the appropriate modules.
+
+
+``PR_GET_MODULES_AUTOLOAD_MODE``:
+ Return the current task ``modules_autoload_mode``.
+
+ Usage:
+ ``prctl(PR_GET_MODULES_AUTOLOAD_MODE, 0, 0, 0, 0);``
+
+ Returned values:
+ mode The task's ``modules_autoload_mode``
+ ``-ENOSYS`` If the kernel was compiled without ``CONFIG_MODULES``.
@@ -90,6 +90,7 @@
#include <linux/string_helpers.h>
#include <linux/user_namespace.h>
#include <linux/fs_struct.h>
+#include <linux/module.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -343,10 +344,15 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
static inline void task_seccomp(struct seq_file *m, struct task_struct *p)
{
+ int autoload = task_modules_autoload_mode(p);
+
seq_put_decimal_ull(m, "NoNewPrivs:\t", task_no_new_privs(p));
#ifdef CONFIG_SECCOMP
seq_put_decimal_ull(m, "\nSeccomp:\t", p->seccomp.mode);
#endif
+ if (autoload != -ENOSYS)
+ seq_put_decimal_ull(m, "\nModulesAutoloadMode:\t", autoload);
+
seq_putc(m, '\n');
}
@@ -153,6 +153,13 @@ extern struct cred init_cred;
# define INIT_CGROUP_SCHED(tsk)
#endif
+#ifdef CONFIG_MODULES
+# define INIT_MODULES_AUTOLOAD_MODE(tsk) \
+ .modules_autoload_mode = 0,
+#else
+# define INIT_MODULES_AUTOLOAD_MODE(tsk)
+#endif
+
#ifdef CONFIG_PERF_EVENTS
# define INIT_PERF_EVENTS(tsk) \
.perf_event_mutex = \
@@ -250,6 +257,7 @@ extern struct cred init_cred;
.tasks = LIST_HEAD_INIT(tsk.tasks), \
INIT_PUSHABLE_TASKS(tsk) \
INIT_CGROUP_SCHED(tsk) \
+ INIT_MODULES_AUTOLOAD_MODE(tsk) \
.ptraced = LIST_HEAD_INIT(tsk.ptraced), \
.ptrace_entry = LIST_HEAD_INIT(tsk.ptrace_entry), \
.real_parent = &tsk, \
@@ -13,6 +13,7 @@
#include <linux/kmod.h>
#include <linux/init.h>
#include <linux/elf.h>
+#include <linux/sched.h>
#include <linux/stringify.h>
#include <linux/kobject.h>
#include <linux/moduleparam.h>
@@ -510,6 +511,15 @@ bool is_module_text_address(unsigned long addr);
int may_autoload_module(char *kmod_name, int required_cap,
const char *kmod_prefix);
+/* Set 'modules_autoload_mode' of current task */
+int task_set_modules_autoload_mode(unsigned long value);
+
+/* Read task's 'modules_autoload_mode' */
+static inline int task_modules_autoload_mode(struct task_struct *task)
+{
+ return task->modules_autoload_mode;
+}
+
static inline bool within_module_core(unsigned long addr,
const struct module *mod)
{
@@ -662,6 +672,16 @@ static inline int may_autoload_module(char *kmod_name, int required_cap,
return -ENOSYS;
}
+static inline int task_set_modules_autoload_mode(unsigned long value)
+{
+ return -ENOSYS;
+}
+
+static inline int task_modules_autoload_mode(struct task_struct *task)
+{
+ return -ENOSYS;
+}
+
static inline struct module *__module_address(unsigned long addr)
{
return NULL;
@@ -658,6 +658,11 @@ struct task_struct {
struct restart_block restart_block;
+#ifdef CONFIG_MODULES
+ /* per-task modules auto-load mode */
+ unsigned modules_autoload_mode:2;
+#endif
+
pid_t pid;
pid_t tgid;
@@ -211,4 +211,12 @@ struct prctl_mm_map {
#define PR_SET_PDEATHSIG_PROC 48
#define PR_GET_PDEATHSIG_PROC 49
+/*
+ * Control the per-task modules auto-load mode
+ *
+ * See Documentation/prctl/modules_autoload_mode.txt for more details.
+ */
+#define PR_SET_MODULES_AUTOLOAD_MODE 50
+#define PR_GET_MODULES_AUTOLOAD_MODE 51
+
#endif /* _LINUX_PRCTL_H */
@@ -4345,6 +4345,7 @@ EXPORT_SYMBOL_GPL(__module_text_address);
/**
* may_autoload_module - Determine whether a module auto-load operation
* is permitted
+ *
* @kmod_name: The module name
* @required_cap: if positive, may allow to auto-load the module if this
* capability is set
@@ -4362,47 +4363,51 @@ EXPORT_SYMBOL_GPL(__module_text_address);
* loading.
*
* However even if the caller has the required capability, the operation can
- * still be denied due to the global "modules_autoload_mode" sysctl mode. Unless
- * set by enduser, the operation is always allowed which is the default.
+ * still be denied due to the per-task "modules_autoload_mode" mode and the
+ * global "modules_autoload_mode" sysctl one. Unless set by enduser, the
+ * operation is always allowed which is the default.
*
* The permission check is performed in this order:
- * 1) If the global sysctl "modules_autoload_mode" is set to 'disabled', then
- * operation is denied.
+ * 1) We calculate the strict mode of both:
+ * per-task 'modules_autoload_mode' and global sysctl 'modules_autoload_mode'
+ *
+ * We follow up with the result mode as "modules_autoload_mode":
*
- * 2) If the global sysctl "modules_autoload_mode" is set to 'privileged', then:
+ * 2) If "modules_autoload_mode" is set to 'disabled', then operation is denied.
*
- * 2.1) If "@required_cap" is positive and "@kmod_prefix" is set, then
+ * 3) If "modules_autoload_mode" is set to 'privileged', then:
+ *
+ * 3.1) If "@required_cap" is positive and "@kmod_prefix" is set, then
* if the caller has the capability, the operation is allowed.
*
- * 2.2) If "@required_cap" is positive and "@kmod_prefix" is NULL, then we
+ * 3.2) If "@required_cap" is positive and "@kmod_prefix" is NULL, then we
* fallback to check if caller has CAP_SYS_MODULE, if so, operation is
* allowed.
*
- * 2.3) If caller passes "@required_cap" as a negative then we fallback to
+ * 3.3) If caller passes "@required_cap" as a negative then we fallback to
* check if caller has CAP_SYS_MODULE, if so, operation is allowed.
*
* We require capabilities to autoload modules here, and CAP_SYS_MODULE here is
* the default.
*
- * 2.4) Otherwise operation is denied.
+ * 3.4) Otherwise operation is denied.
*
- * 3) If the global sysctl "modules_autoload_mode" is set to 'allowed' which is
- * the default, then:
+ * 4) If "modules_autoload_mode" is set to 'allowed' which is the default, then:
*
- * 3.1) If "@required_cap" is positive and "@kmod_prefix" is set, we check if
+ * 4.1) If "@required_cap" is positive and "@kmod_prefix" is set, we check if
* caller has the capability, if so, operation is allowed.
* In this case the calling subsystem requires the capability to be set before
* allowing modules autoload operations and we have to honor that.
*
- * 3.2) If "@required_cap" is positive and "@kmod_prefix" is NULL, then we
+ * 4.2) If "@required_cap" is positive and "@kmod_prefix" is NULL, then we
* fallback to check if caller has CAP_SYS_MODULE, if so, operation is
* allowed.
*
- * 3.3) If caller passes "@required_cap" as a negative then operation is
+ * 4.3) If caller passes "@required_cap" as a negative then operation is
* allowed. This is the most common case as it is used now by
* request_module() function.
*
- * 3.4) Otherwise operation is denied.
+ * 4.4) Otherwise operation is denied.
*
* Returns 0 if the module request is allowed or -EPERM if not.
*/
@@ -4410,7 +4415,8 @@ int may_autoload_module(char *kmod_name, int required_cap,
const char *kmod_prefix)
{
int module_require_cap = CAP_SYS_MODULE;
- unsigned int autoload = modules_autoload_mode;
+ unsigned int autoload = max_t(unsigned int, modules_autoload_mode,
+ current->modules_autoload_mode);
/* Short-cut for most use cases where kmod auto-loading is allowed */
if (autoload == MODULES_AUTOLOAD_ALLOWED && required_cap < 0)
@@ -4442,6 +4448,51 @@ int may_autoload_module(char *kmod_name, int required_cap,
return -EPERM;
}
+/**
+ * task_set_modules_autoload_mode - Set per-task modules auto-load mode
+ * @value: Value to set "modules_autoload_mode" of current task
+ *
+ * Set current task "modules_autoload_mode". The task has to have
+ * CAP_SYS_ADMIN in its namespace or be running with no_new_privs. This
+ * avoids scenarios where unprivileged tasks can affect the behaviour of
+ * privilged children by restricting module or kernel features.
+ *
+ * The task's "modules_autoload_mode" may only be increased, never decreased.
+ *
+ * Returns 0 on success, -EINVAL if @value is not valid, -EACCES if task does
+ * not have CAP_SYS_ADMIN in its namespace or is not running with no_new_privs,
+ * and finally -EPERM if @value is less strict than current task
+ * "modules_autoload_mode".
+ *
+ */
+int task_set_modules_autoload_mode(unsigned long value)
+{
+ if (value > MODULES_AUTOLOAD_DISABLED)
+ return -EINVAL;
+
+ /*
+ * To set task "modules_autoload_mode" requires that the task has
+ * CAP_SYS_ADMIN in its namespace or be running with no_new_privs.
+ * This avoids scenarios where unprivileged tasks can affect the
+ * behaviour of privileged children by restricting module features.
+ */
+ if (!task_no_new_privs(current) &&
+ security_capable_noaudit(current_cred(), current_user_ns(),
+ CAP_SYS_ADMIN) != 0)
+ return -EACCES;
+
+ /*
+ * The "modules_autoload_mode" may only be increased, never decreased,
+ * ensuring that once applied, processes can never relax their settings.
+ */
+ if (current->modules_autoload_mode > value)
+ return -EPERM;
+ else if (current->modules_autoload_mode < value)
+ current->modules_autoload_mode = value;
+
+ return 0;
+}
+
/* Don't grab lock, we're oopsing. */
void print_modules(void)
{
@@ -1157,6 +1157,36 @@ static int cap_prctl_drop(unsigned long cap)
return commit_creds(new);
}
+/*
+ * Implement PR_SET_MODULES_AUTOLOAD_MODE.
+ *
+ * Returns 0 on success, -ve on error.
+ */
+static int pr_set_modules_autoload_mode(unsigned long arg2, unsigned long arg3,
+ unsigned long arg4, unsigned long arg5)
+{
+ if (arg3 || arg4 || arg5)
+ return -EINVAL;
+
+ return task_set_modules_autoload_mode(arg2);
+}
+
+/*
+ * Implement PR_GET_MODULES_AUTOLOAD_MODE.
+ *
+ * Return current task "modules_autoload_mode", -ve on error.
+ */
+static inline int pr_get_modules_autoload_mode(unsigned long arg2,
+ unsigned long arg3,
+ unsigned long arg4,
+ unsigned long arg5)
+{
+ if (arg2 || arg3 || arg4 || arg5)
+ return -EINVAL;
+
+ return task_modules_autoload_mode(current);
+}
+
/**
* cap_task_prctl - Implement process control functions for this security module
* @option: The process control function requested
@@ -1287,6 +1317,12 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
return commit_creds(new);
}
+ case PR_SET_MODULES_AUTOLOAD_MODE:
+ return pr_set_modules_autoload_mode(arg2, arg3, arg4, arg5);
+
+ case PR_GET_MODULES_AUTOLOAD_MODE:
+ return pr_get_modules_autoload_mode(arg2, arg3, arg4, arg5);
+
default:
/* No functionality available - continue with default */
return -ENOSYS;