@@ -192,6 +192,9 @@ struct net {
/* Move to a better place when the config guard is removed. */
struct mutex rtnl_mutex;
#endif
+#if IS_ENABLED(CONFIG_SECURITY_SELINUX)
+ struct sock *selnl;
+#endif
} __randomize_layout;
#include <linux/seq_file_net.h>
@@ -19,8 +19,6 @@
#include "security.h"
-static struct sock *selnl __ro_after_init;
-
static int selnl_msglen(int msgtype)
{
int ret = 0;
@@ -66,6 +64,7 @@ static void selnl_add_payload(struct nlmsghdr *nlh, int len, int msgtype, void *
static void selnl_notify(int msgtype, void *data)
{
+ struct sock *selnl = current->nsproxy->net_ns->selnl;
int len;
sk_buff_data_t tmp;
struct sk_buff *skb;
@@ -105,16 +104,36 @@ void selnl_notify_policyload(u32 seqno)
selnl_notify(SELNL_MSG_POLICYLOAD, &seqno);
}
-static int __init selnl_init(void)
+static int __net_init selnl_net_init(struct net *net)
{
+ struct sock *sk;
struct netlink_kernel_cfg cfg = {
.groups = SELNLGRP_MAX,
.flags = NL_CFG_F_NONROOT_RECV,
};
- selnl = netlink_kernel_create(&init_net, NETLINK_SELINUX, &cfg);
- if (selnl == NULL)
- panic("SELinux: Cannot create netlink socket.");
+ sk = netlink_kernel_create(net, NETLINK_SELINUX, &cfg);
+ if (!sk)
+ return -ENOMEM;
+ net->selnl = sk;
+ return 0;
+}
+
+static void __net_exit selnl_net_exit(struct net *net)
+{
+ netlink_kernel_release(net->selnl);
+ net->selnl = NULL;
+}
+
+static struct pernet_operations selnl_net_ops = {
+ .init = selnl_net_init,
+ .exit = selnl_net_exit,
+};
+
+static int __init selnl_init(void)
+{
+ if (register_pernet_subsys(&selnl_net_ops))
+ panic("Could not register selinux netlink operations\n");
return 0;
}
The selinux netlink socket is used to notify userspace of changes to the enforcing mode and policy reloads. At present, these notifications are always sent to the initial network namespace. In order to support multiple selinux namespaces, each with its own enforcing mode and policy, we need to create and use a separate selinux netlink socket for each network namespace. Without this change, a policy reload in a child selinux namespace causes a notification to be sent to processes in the init namespace with a sequence number that may be higher than the policy sequence number for that namespace. As a result, userspace AVC instances in the init namespace will then end up rejecting any further access vector results from its own security server instance due to the policy sequence number appearing to regress, which in turn causes all subsequent uncached access checks to fail. Similarly, without this change, changing enforcing mode in the child selinux namespace triggers a notification to all userspace AVC instances in the init namespace that will switch their enforcing modes. This change does alter SELinux behavior, since previously reloading policy or changing enforcing mode in a non-init network namespace would trigger a notification to processes in the init network namespace. However, this behavior is not being relied upon by existing userspace AFAICT and is arguably wrong regardless. This change presumes that one will always unshare the network namespace when unsharing a new selinux namespace (the reverse is not required). Otherwise, the same inconsistencies could arise between the notifications and the relevant policy. At present, nothing enforces this guarantee at the kernel level; it is left up to userspace (e.g. container runtimes). It is an open question as to whether this is a good idea or whether unsharing of the selinux namespace should automatically unshare the network namespace. However, keeping them separate is consistent with the handling of the mount namespace currently, which also should be unshared so that a private selinuxfs mount can be created. Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com> --- include/net/net_namespace.h | 3 +++ security/selinux/netlink.c | 31 +++++++++++++++++++++++++------ 2 files changed, 28 insertions(+), 6 deletions(-)