diff mbox

[V6,05/10] audit: log creation and deletion of namespace instances

Message ID 11270b0b1afd0a25b108915673e1e1b38dfeeafa.1429252659.git.rgb@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Richard Guy Briggs April 17, 2015, 7:35 a.m. UTC
Log the creation and deletion of namespace instances in all 6 types of
namespaces.

Twelve new audit message types have been introduced:
AUDIT_NS_INIT_MNT       1330    /* Record mount namespace instance creation */
AUDIT_NS_INIT_UTS       1331    /* Record UTS namespace instance creation */
AUDIT_NS_INIT_IPC       1332    /* Record IPC namespace instance creation */
AUDIT_NS_INIT_USER      1333    /* Record USER namespace instance creation */
AUDIT_NS_INIT_PID       1334    /* Record PID namespace instance creation */
AUDIT_NS_INIT_NET       1335    /* Record NET namespace instance creation */
AUDIT_NS_DEL_MNT        1336    /* Record mount namespace instance deletion */
AUDIT_NS_DEL_UTS        1337    /* Record UTS namespace instance deletion */
AUDIT_NS_DEL_IPC        1338    /* Record IPC namespace instance deletion */
AUDIT_NS_DEL_USER       1339    /* Record USER namespace instance deletion */
AUDIT_NS_DEL_PID        1340    /* Record PID namespace instance deletion */
AUDIT_NS_DEL_NET        1341    /* Record NET namespace instance deletion */

As suggested by Eric Paris, there are 12 message types, one for each of
creation and deletion, one for each type of namespace so that text searches are
easier in conjunction with the AUDIT_NS_INFO message type, being able to search
for all records such as "netns=4 " and to avoid fields disappearing per message
type to make ausearch more efficient.

A typical startup would look roughly like:

	type=AUDIT_NS_INIT_UTS msg=audit(1408577534.868:5): pid=1 uid=0 auid=4294967295 ses=4294967295 subj=kernel dev=00:03 old_utsns=(none) utsns=-2 res=1
	type=AUDIT_NS_INIT_USER msg=audit(1408577534.868:6): pid=1 uid=0 auid=4294967295 ses=4294967295 subj=kernel dev=00:03 old_userns=(none) userns=-3 res=1
	type=AUDIT_NS_INIT_PID msg=audit(1408577534.868:7): pid=1 uid=0 auid=4294967295 ses=4294967295 subj=kernel dev=00:03 old_pidns=(none) pidns=-4 res=1
	type=AUDIT_NS_INIT_MNT msg=audit(1408577534.868:8): pid=1 uid=0 auid=4294967295 ses=4294967295 subj=kernel dev=00:03 old_mntns=(none) mntns=0 res=1
	type=AUDIT_NS_INIT_IPC msg=audit(1408577534.868:9): pid=1 uid=0 auid=4294967295 ses=4294967295 subj=kernel dev=00:03 old_ipcns=(none) ipcns=-1 res=1
	type=AUDIT_NS_INIT_NET msg=audit(1408577533.500:10): pid=1 uid=0 auid=4294967295 ses=4294967295 subj=kernel dev=00:03 old_netns=(none) netns=2 res=1

And a CLONE action would result in:
	type=type=AUDIT_NS_INIT_NET msg=audit(1408577535.306:81): pid=481 uid=0 auid=4294967295 ses=4294967295 subj=system_u:system_r:init_t:s0 dev=00:03 old_netns=2 netns=3 res=1

While deleting a namespace would result in:
	type=type=AUDIT_NS_DEL_MNT msg=audit(1408577552.221:85): pid=481 uid=0 auid=4294967295 ses=4294967295 subj=system_u:system_r:init_t:s0 dev=00:03 mntns=4 res=1

If not "(none)", old_XXXns lists the namespace from which it was cloned.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
---
 fs/namespace.c             |   13 +++++++++
 include/linux/audit.h      |    8 +++++
 include/uapi/linux/audit.h |   12 ++++++++
 ipc/namespace.c            |   12 ++++++++
 kernel/audit.c             |   64 ++++++++++++++++++++++++++++++++++++++++++++
 kernel/pid_namespace.c     |   13 +++++++++
 kernel/user_namespace.c    |   13 +++++++++
 kernel/utsname.c           |   12 ++++++++
 net/core/net_namespace.c   |   12 ++++++++
 9 files changed, 159 insertions(+), 0 deletions(-)

Comments

Steve Grubb May 5, 2015, 2:22 p.m. UTC | #1
Hello,

I think there needs to be some more discussion around this. It seems like this 
is not exactly recording things that are useful for audit.

On Friday, April 17, 2015 03:35:52 AM Richard Guy Briggs wrote:
> Log the creation and deletion of namespace instances in all 6 types of
> namespaces.
> 
> Twelve new audit message types have been introduced:
> AUDIT_NS_INIT_MNT       1330    /* Record mount namespace instance creation
> */ AUDIT_NS_INIT_UTS       1331    /* Record UTS namespace instance
> creation */ AUDIT_NS_INIT_IPC       1332    /* Record IPC namespace
> instance creation */ AUDIT_NS_INIT_USER      1333    /* Record USER
> namespace instance creation */ AUDIT_NS_INIT_PID       1334    /* Record
> PID namespace instance creation */ AUDIT_NS_INIT_NET       1335    /*
> Record NET namespace instance creation */ AUDIT_NS_DEL_MNT        1336   
> /* Record mount namespace instance deletion */ AUDIT_NS_DEL_UTS        1337
>    /* Record UTS namespace instance deletion */ AUDIT_NS_DEL_IPC       
> 1338    /* Record IPC namespace instance deletion */ AUDIT_NS_DEL_USER     
>  1339    /* Record USER namespace instance deletion */ AUDIT_NS_DEL_PID    
>    1340    /* Record PID namespace instance deletion */ AUDIT_NS_DEL_NET   
>     1341    /* Record NET namespace instance deletion */

The requirements for auditing of containers should be derived from VPP. In it, 
it asks for selectable auditing, selective audit, and selective audit review. 
What this means is that we need the container and all its children to have one 
identifier that is inserted into all the events that are associated with the 
container.

With this, its possible to do a search for all events related to a container. 
Its possible to exclude events from a container. Its possible to not get any 
events.

The requirements also call out for the identification of the subject. This 
means that the event should be bound to a syscall such as clone, setns, or 
unshare.

Also, any user space events originating inside the container needs to have the 
container ID added to the user space event - just like auid and session id.

Recording each instance of a name space is giving me something that I cannot 
use to do queries required by the security target. Given these events, how do 
I locate a web server event where it accesses a watched file? That 
authentication failed? That an update within the container failed?

The requirements are that we have to log the creation, suspension, migration, 
and termination of a container. The requirements are not on the individual 
name space.

Maybe I'm missing how these events give me that. But I'd like to hear how I 
would be able to meet requirements with these 12 events.

-Steve

 
> As suggested by Eric Paris, there are 12 message types, one for each of
> creation and deletion, one for each type of namespace so that text searches
> are easier in conjunction with the AUDIT_NS_INFO message type, being able
> to search for all records such as "netns=4 " and to avoid fields
> disappearing per message type to make ausearch more efficient.
> 
> A typical startup would look roughly like:
> 
> 	type=AUDIT_NS_INIT_UTS msg=audit(1408577534.868:5): pid=1 uid=0
> auid=4294967295 ses=4294967295 subj=kernel dev=00:03 old_utsns=(none)
> utsns=-2 res=1 type=AUDIT_NS_INIT_USER msg=audit(1408577534.868:6): pid=1
> uid=0 auid=4294967295 ses=4294967295 subj=kernel dev=00:03
> old_userns=(none) userns=-3 res=1 type=AUDIT_NS_INIT_PID
> msg=audit(1408577534.868:7): pid=1 uid=0 auid=4294967295 ses=4294967295
> subj=kernel dev=00:03 old_pidns=(none) pidns=-4 res=1
> type=AUDIT_NS_INIT_MNT msg=audit(1408577534.868:8): pid=1 uid=0
> auid=4294967295 ses=4294967295 subj=kernel dev=00:03 old_mntns=(none)
> mntns=0 res=1 type=AUDIT_NS_INIT_IPC msg=audit(1408577534.868:9): pid=1
> uid=0 auid=4294967295 ses=4294967295 subj=kernel dev=00:03 old_ipcns=(none)
> ipcns=-1 res=1 type=AUDIT_NS_INIT_NET msg=audit(1408577533.500:10): pid=1
> uid=0 auid=4294967295 ses=4294967295 subj=kernel dev=00:03 old_netns=(none)
> netns=2 res=1
> 
> And a CLONE action would result in:
> 	type=type=AUDIT_NS_INIT_NET msg=audit(1408577535.306:81): pid=481 uid=0
> auid=4294967295 ses=4294967295 subj=system_u:system_r:init_t:s0 dev=00:03
> old_netns=2 netns=3 res=1
> 
> While deleting a namespace would result in:
> 	type=type=AUDIT_NS_DEL_MNT msg=audit(1408577552.221:85): pid=481 uid=0
> auid=4294967295 ses=4294967295 subj=system_u:system_r:init_t:s0 dev=00:03
> mntns=4 res=1
> 
> If not "(none)", old_XXXns lists the namespace from which it was cloned.
> 
> Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> ---
>  fs/namespace.c             |   13 +++++++++
>  include/linux/audit.h      |    8 +++++
>  include/uapi/linux/audit.h |   12 ++++++++
>  ipc/namespace.c            |   12 ++++++++
>  kernel/audit.c             |   64
> ++++++++++++++++++++++++++++++++++++++++++++ kernel/pid_namespace.c     |  
> 13 +++++++++
>  kernel/user_namespace.c    |   13 +++++++++
>  kernel/utsname.c           |   12 ++++++++
>  net/core/net_namespace.c   |   12 ++++++++
>  9 files changed, 159 insertions(+), 0 deletions(-)
> 
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 182bc41..7b62543 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -24,6 +24,7 @@
>  #include <linux/proc_ns.h>
>  #include <linux/magic.h>
>  #include <linux/bootmem.h>
> +#include <linux/audit.h>
>  #include "pnode.h"
>  #include "internal.h"
> 
> @@ -2459,6 +2460,7 @@ dput_out:
> 
>  static void free_mnt_ns(struct mnt_namespace *ns)
>  {
> +	audit_log_ns_del(AUDIT_NS_DEL_MNT, ns->proc_inum);
>  	proc_free_inum(ns->proc_inum);
>  	put_user_ns(ns->user_ns);
>  	kfree(ns);
> @@ -2518,6 +2520,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags,
> struct mnt_namespace *ns, new_ns = alloc_mnt_ns(user_ns);
>  	if (IS_ERR(new_ns))
>  		return new_ns;
> +	audit_log_ns_init(AUDIT_NS_INIT_MNT, ns->proc_inum, new_ns->proc_inum);
> 
>  	namespace_lock();
>  	/* First pass: copy the tree topology */
> @@ -2830,6 +2833,16 @@ static void __init init_mount_tree(void)
>  	set_fs_root(current->fs, &root);
>  }
> 
> +/* log the ID of init mnt namespace after audit service starts */
> +static int __init mnt_ns_init_log(void)
> +{
> +	struct mnt_namespace *init_mnt_ns = init_task.nsproxy->mnt_ns;
> +
> +	audit_log_ns_init(AUDIT_NS_INIT_MNT, 0, init_mnt_ns->proc_inum);
> +	return 0;
> +}
> +late_initcall(mnt_ns_init_log);
> +
>  void __init mnt_init(void)
>  {
>  	unsigned u;
> diff --git a/include/linux/audit.h b/include/linux/audit.h
> index 71698ec..b28dfb0 100644
> --- a/include/linux/audit.h
> +++ b/include/linux/audit.h
> @@ -484,6 +484,9 @@ extern void		    audit_log_ns_info(struct 
task_struct
> *tsk); static inline void	    audit_log_ns_info(struct task_struct *tsk) {
> }
>  #endif
> +extern void		    audit_log_ns_init(int type, unsigned int old_inum,
> +					      unsigned int inum);
> +extern void		    audit_log_ns_del(int type, unsigned int inum);
> 
>  extern int		    audit_update_lsm_rules(void);
> 
> @@ -542,6 +545,11 @@ static inline void audit_log_task_info(struct
> audit_buffer *ab, { }
>  static inline void audit_log_ns_info(struct task_struct *tsk)
>  { }
> +static inline int audit_log_ns_init(int type, unsigned int old_inum,
> +				    unsigned int inum)
> +{ }
> +static inline int audit_log_ns_del(int type, unsigned int inum)
> +{ }
>  #define audit_enabled 0
>  #endif /* CONFIG_AUDIT */
>  static inline void audit_log_string(struct audit_buffer *ab, const char
> *buf) diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> index 1ffb151..487cad6 100644
> --- a/include/uapi/linux/audit.h
> +++ b/include/uapi/linux/audit.h
> @@ -111,6 +111,18 @@
>  #define AUDIT_PROCTITLE		1327	/* Proctitle emit event */
>  #define AUDIT_FEATURE_CHANGE	1328	/* audit log listing feature changes 
*/
>  #define AUDIT_NS_INFO		1329	/* Record process namespace IDs */
> +#define AUDIT_NS_INIT_MNT	1330	/* Record mount namespace instance 
creation
> */ +#define AUDIT_NS_INIT_UTS	1331	/* Record UTS namespace instance
> creation */ +#define AUDIT_NS_INIT_IPC	1332	/* Record IPC namespace
> instance creation */ +#define AUDIT_NS_INIT_USER	1333	/* Record USER
> namespace instance creation */ +#define AUDIT_NS_INIT_PID	1334	/* Record
> PID namespace instance creation */ +#define AUDIT_NS_INIT_NET	1335	/*
> Record NET namespace instance creation */ +#define AUDIT_NS_DEL_MNT	1336	
/*
> Record mount namespace instance deletion */ +#define
> AUDIT_NS_DEL_UTS	1337	/* Record UTS namespace instance deletion */ 
+#define
> AUDIT_NS_DEL_IPC	1338	/* Record IPC namespace instance deletion */ 
+#define
> AUDIT_NS_DEL_USER	1339	/* Record USER namespace instance deletion */
> +#define AUDIT_NS_DEL_PID	1340	/* Record PID namespace instance 
deletion */
> +#define AUDIT_NS_DEL_NET	1341	/* Record NET namespace instance deletion 
*/
> 
>  #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
>  #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
> diff --git a/ipc/namespace.c b/ipc/namespace.c
> index 59451c1..73727ce 100644
> --- a/ipc/namespace.c
> +++ b/ipc/namespace.c
> @@ -13,6 +13,7 @@
>  #include <linux/mount.h>
>  #include <linux/user_namespace.h>
>  #include <linux/proc_ns.h>
> +#include <linux/audit.h>
> 
>  #include "util.h"
> 
> @@ -41,6 +42,8 @@ static struct ipc_namespace *create_ipc_ns(struct
> user_namespace *user_ns, }
>  	atomic_inc(&nr_ipc_ns);
> 
> +	audit_log_ns_init(AUDIT_NS_INIT_IPC, old_ns->proc_inum, ns->proc_inum);
> +
>  	sem_init_ns(ns);
>  	msg_init_ns(ns);
>  	shm_init_ns(ns);
> @@ -119,6 +122,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
>  	 */
>  	ipcns_notify(IPCNS_REMOVED);
>  	put_user_ns(ns->user_ns);
> +	audit_log_ns_del(AUDIT_NS_DEL_IPC, ns->proc_inum);
>  	proc_free_inum(ns->proc_inum);
>  	kfree(ns);
>  }
> @@ -197,3 +201,11 @@ const struct proc_ns_operations ipcns_operations = {
>  	.install	= ipcns_install,
>  	.inum		= ipcns_inum,
>  };
> +
> +/* log the ID of init IPC namespace after audit service starts */
> +static int __init ipc_namespaces_init(void)
> +{
> +	audit_log_ns_init(AUDIT_NS_INIT_IPC, 0, init_ipc_ns.proc_inum);
> +	return 0;
> +}
> +late_initcall(ipc_namespaces_init);
> diff --git a/kernel/audit.c b/kernel/audit.c
> index 63f32f4..e6230c4 100644
> --- a/kernel/audit.c
> +++ b/kernel/audit.c
> @@ -1978,6 +1978,70 @@ out:
>  	kfree(name);
>  }
> 
> +#ifdef CONFIG_NAMESPACES
> +static char *ns_name[] = {
> +	"mnt",
> +	"uts",
> +	"ipc",
> +	"user",
> +	"pid",
> +	"net",
> +};
> +
> +/**
> + * audit_log_ns_init - report a namespace instance creation
> + * @type: type of audit namespace instance created message
> + * @old_inum: the ID number of the cloned namespace instance
> + * @inum: the ID number of the new namespace instance
> + */
> +void  audit_log_ns_init(int type, unsigned int old_inum, unsigned int inum)
> +{
> +	struct audit_buffer *ab;
> +	char *audit_ns_name = ns_name[type - AUDIT_NS_INIT_MNT];
> +	struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
> +	struct super_block *sb = mnt->mnt_sb;
> +	char old_ns[16];
> +
> +	if (type < AUDIT_NS_INIT_MNT || type > AUDIT_NS_INIT_NET) {
> +		WARN(1, "audit_log_ns_init: type:%d out of range", type);
> +		return;
> +	}
> +	if (!old_inum)
> +		sprintf(old_ns, "(none)");
> +	else
> +		sprintf(old_ns, "%d", old_inum - PROC_DYNAMIC_FIRST);
> +	audit_log_common_recv_msg(&ab, type);
> +	audit_log_format(ab, " dev=%02x:%02x old_%sns=%s %sns=%d res=1",
> +			 MAJOR(sb->s_dev), MINOR(sb->s_dev),
> +			 audit_ns_name, old_ns,
> +			 audit_ns_name, inum - PROC_DYNAMIC_FIRST);
> +	audit_log_end(ab);
> +}
> +
> +/**
> + * audit_log_ns_del - report a namespace instance deleted
> + * @type: type of audit namespace instance deleted message
> + * @inum: the ID number of the namespace instance
> + */
> +void audit_log_ns_del(int type, unsigned int inum)
> +{
> +	struct audit_buffer *ab;
> +	char *audit_ns_name = ns_name[type - AUDIT_NS_DEL_MNT];
> +	struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
> +	struct super_block *sb = mnt->mnt_sb;
> +
> +	if (type < AUDIT_NS_DEL_MNT || type > AUDIT_NS_DEL_NET) {
> +		WARN(1, "audit_log_ns_del: type:%d out of range", type);
> +		return;
> +	}
> +	audit_log_common_recv_msg(&ab, type);
> +	audit_log_format(ab, " dev=%02x:%02x %sns=%d res=1",
> +			 MAJOR(sb->s_dev), MINOR(sb->s_dev), audit_ns_name,
> +			 inum - PROC_DYNAMIC_FIRST);
> +	audit_log_end(ab);
> +}
> +#endif /* CONFIG_NAMESPACES */
> +
>  /**
>   * audit_log_end - end one audit record
>   * @ab: the audit_buffer
> diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
> index db95d8e..d28fd14 100644
> --- a/kernel/pid_namespace.c
> +++ b/kernel/pid_namespace.c
> @@ -18,6 +18,7 @@
>  #include <linux/proc_ns.h>
>  #include <linux/reboot.h>
>  #include <linux/export.h>
> +#include <linux/audit.h>
> 
>  struct pid_cache {
>  	int nr_ids;
> @@ -109,6 +110,9 @@ static struct pid_namespace *create_pid_namespace(struct
> user_namespace *user_ns if (err)
>  		goto out_free_map;
> 
> +	audit_log_ns_init(AUDIT_NS_INIT_PID, parent_pid_ns->proc_inum,
> +			  ns->proc_inum);
> +
>  	kref_init(&ns->kref);
>  	ns->level = level;
>  	ns->parent = get_pid_ns(parent_pid_ns);
> @@ -142,6 +146,7 @@ static void destroy_pid_namespace(struct pid_namespace
> *ns) {
>  	int i;
> 
> +	audit_log_ns_del(AUDIT_NS_DEL_PID, ns->proc_inum);
>  	proc_free_inum(ns->proc_inum);
>  	for (i = 0; i < PIDMAP_ENTRIES; i++)
>  		kfree(ns->pidmap[i].page);
> @@ -388,3 +393,11 @@ static __init int pid_namespaces_init(void)
>  }
> 
>  __initcall(pid_namespaces_init);
> +
> +/* log the ID of init PID namespace after audit service starts */
> +static __init int pid_namespaces_late_init(void)
> +{
> +	audit_log_ns_init(AUDIT_NS_INIT_PID, 0, init_pid_ns.proc_inum);
> +	return 0;
> +}
> +late_initcall(pid_namespaces_late_init);
> diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> index fcc0256..89c2517 100644
> --- a/kernel/user_namespace.c
> +++ b/kernel/user_namespace.c
> @@ -22,6 +22,7 @@
>  #include <linux/ctype.h>
>  #include <linux/projid.h>
>  #include <linux/fs_struct.h>
> +#include <linux/audit.h>
> 
>  static struct kmem_cache *user_ns_cachep __read_mostly;
> 
> @@ -92,6 +93,9 @@ int create_user_ns(struct cred *new)
>  		return ret;
>  	}
> 
> +	audit_log_ns_init(AUDIT_NS_INIT_USER, parent_ns->proc_inum,
> +			  ns->proc_inum);
> +
>  	atomic_set(&ns->count, 1);
>  	/* Leave the new->user_ns reference with the new user namespace. */
>  	ns->parent = parent_ns;
> @@ -136,6 +140,7 @@ void free_user_ns(struct user_namespace *ns)
>  #ifdef CONFIG_PERSISTENT_KEYRINGS
>  		key_put(ns->persistent_keyring_register);
>  #endif
> +		audit_log_ns_del(AUDIT_NS_DEL_USER, ns->proc_inum);
>  		proc_free_inum(ns->proc_inum);
>  		kmem_cache_free(user_ns_cachep, ns);
>  		ns = parent;
> @@ -909,3 +914,11 @@ static __init int user_namespaces_init(void)
>  	return 0;
>  }
>  subsys_initcall(user_namespaces_init);
> +
> +/* log the ID of init user namespace after audit service starts */
> +static __init int user_namespaces_late_init(void)
> +{
> +	audit_log_ns_init(AUDIT_NS_INIT_USER, 0, init_user_ns.proc_inum);
> +	return 0;
> +}
> +late_initcall(user_namespaces_late_init);
> diff --git a/kernel/utsname.c b/kernel/utsname.c
> index fd39312..fa21e8d 100644
> --- a/kernel/utsname.c
> +++ b/kernel/utsname.c
> @@ -16,6 +16,7 @@
>  #include <linux/slab.h>
>  #include <linux/user_namespace.h>
>  #include <linux/proc_ns.h>
> +#include <linux/audit.h>
> 
>  static struct uts_namespace *create_uts_ns(void)
>  {
> @@ -48,6 +49,8 @@ static struct uts_namespace *clone_uts_ns(struct
> user_namespace *user_ns, return ERR_PTR(err);
>  	}
> 
> +	audit_log_ns_init(AUDIT_NS_INIT_UTS, old_ns->proc_inum, ns->proc_inum);
> +
>  	down_read(&uts_sem);
>  	memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
>  	ns->user_ns = get_user_ns(user_ns);
> @@ -84,6 +87,7 @@ void free_uts_ns(struct kref *kref)
> 
>  	ns = container_of(kref, struct uts_namespace, kref);
>  	put_user_ns(ns->user_ns);
> +	audit_log_ns_del(AUDIT_NS_DEL_UTS, ns->proc_inum);
>  	proc_free_inum(ns->proc_inum);
>  	kfree(ns);
>  }
> @@ -138,3 +142,11 @@ const struct proc_ns_operations utsns_operations = {
>  	.install	= utsns_install,
>  	.inum		= utsns_inum,
>  };
> +
> +/* log the ID of init UTS namespace after audit service starts */
> +static int __init uts_namespaces_init(void)
> +{
> +	audit_log_ns_init(AUDIT_NS_INIT_UTS, 0, init_uts_ns.proc_inum);
> +	return 0;
> +}
> +late_initcall(uts_namespaces_init);
> diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> index 85b6269..562eb85 100644
> --- a/net/core/net_namespace.c
> +++ b/net/core/net_namespace.c
> @@ -17,6 +17,7 @@
>  #include <linux/user_namespace.h>
>  #include <net/net_namespace.h>
>  #include <net/netns/generic.h>
> +#include <linux/audit.h>
> 
>  /*
>   *	Our network namespace constructor/destructor lists
> @@ -253,6 +254,8 @@ struct net *copy_net_ns(unsigned long flags,
>  	mutex_lock(&net_mutex);
>  	rv = setup_net(net, user_ns);
>  	if (rv == 0) {
> +		audit_log_ns_init(AUDIT_NS_INIT_NET, old_net->proc_inum,
> +				  net->proc_inum);
>  		rtnl_lock();
>  		list_add_tail_rcu(&net->list, &net_namespace_list);
>  		rtnl_unlock();
> @@ -389,6 +392,7 @@ static __net_init int net_ns_net_init(struct net *net)
> 
>  static __net_exit void net_ns_net_exit(struct net *net)
>  {
> +	audit_log_ns_del(AUDIT_NS_DEL_NET, net->proc_inum);
>  	proc_free_inum(net->proc_inum);
>  }
> 
> @@ -435,6 +439,14 @@ static int __init net_ns_init(void)
> 
>  pure_initcall(net_ns_init);
> 
> +/* log the ID of init_net namespace after audit service starts */
> +static int __init net_ns_init_log(void)
> +{
> +	audit_log_ns_init(AUDIT_NS_INIT_NET, 0, init_net.proc_inum);
> +	return 0;
> +}
> +late_initcall(net_ns_init_log);
> +
>  #ifdef CONFIG_NET_NS
>  static int __register_pernet_operations(struct list_head *list,
>  					struct pernet_operations *ops)

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
'arozansk@redhat.com' May 5, 2015, 2:31 p.m. UTC | #2
Hi Steve,
On Tue, May 05, 2015 at 10:22:32AM -0400, Steve Grubb wrote:
> The requirements for auditing of containers should be derived from VPP. In it, 
> it asks for selectable auditing, selective audit, and selective audit review. 
> What this means is that we need the container and all its children to have one 
> identifier that is inserted into all the events that are associated with the 
> container.
> 
> With this, its possible to do a search for all events related to a container. 
> Its possible to exclude events from a container. Its possible to not get any 
> events.
> 
> The requirements also call out for the identification of the subject. This 
> means that the event should be bound to a syscall such as clone, setns, or 
> unshare.
> 
> Also, any user space events originating inside the container needs to have the 
> container ID added to the user space event - just like auid and session id.
> 
> Recording each instance of a name space is giving me something that I cannot 
> use to do queries required by the security target. Given these events, how do 
> I locate a web server event where it accesses a watched file? That 
> authentication failed? That an update within the container failed?
> 
> The requirements are that we have to log the creation, suspension, migration, 
> and termination of a container. The requirements are not on the individual 
> name space.
> 
> Maybe I'm missing how these events give me that. But I'd like to hear how I 
> would be able to meet requirements with these 12 events.

what about cases you don't use lxc, libvirt to create namespaces? It's
easier if the logging is done by namespaces and in case they're created
by any container manager, it can generate a new event notifying it
created a container named "foo" with these namespaces: x, y, z, w and
from that you can piece together everything that happened. Userspace
tools can change to adapt to using namespaces and the idea of container
to make it easier to lookup for events instead of relying on a number
that might not be there (think someone using unshare, ip netns, ...). It
was discussed in the past and having the concept of "container" in
kernel space and it's not going to happen, so userspace should deal with
it.
Eric W. Biederman May 5, 2015, 2:56 p.m. UTC | #3
Steve Grubb <sgrubb@redhat.com> writes:

> The requirements for auditing of containers should be derived from VPP. In it, 
> it asks for selectable auditing, selective audit, and selective audit review. 
> What this means is that we need the container and all its children to have one 
> identifier that is inserted into all the events that are associated with the 
> container.

That is technically impossible.  Nested containers exist.

That is when container G is nested in container F which is in turn
nested in container E which is in turn nested in container D which is in
turn nested in container C which is in turn nested in container B which
is nested in container A there is no one label you can put on audit
messages from container G which is the ``correct'' one.

Or are you proposing that something in container G have labels
A B C D E F G included on every audit message?   That introduces enough
complexity in generating and parsing the messages I wouldn't trust those
messages as the least bug in generation and parsing would be a security
issue.

What is the world is VPP?  It sounds like something non-public thing.
Certainly it has never been a part of the public container discussion
and as such it appears to be completely ridiculous to bring up in a
public discussion.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Guy Briggs May 12, 2015, 7:57 p.m. UTC | #4
On 15/05/05, Steve Grubb wrote:
> Hello,
> 
> I think there needs to be some more discussion around this. It seems like this 
> is not exactly recording things that are useful for audit.

It seems to me that either audit has to assemble that information, or
the kernel has to do so.  The kernel doesn't know about containers
(yet?).

> On Friday, April 17, 2015 03:35:52 AM Richard Guy Briggs wrote:
> > Log the creation and deletion of namespace instances in all 6 types of
> > namespaces.
> > 
> > Twelve new audit message types have been introduced:
> > AUDIT_NS_INIT_MNT       1330    /* Record mount namespace instance creation
> > */ AUDIT_NS_INIT_UTS       1331    /* Record UTS namespace instance
> > creation */ AUDIT_NS_INIT_IPC       1332    /* Record IPC namespace
> > instance creation */ AUDIT_NS_INIT_USER      1333    /* Record USER
> > namespace instance creation */ AUDIT_NS_INIT_PID       1334    /* Record
> > PID namespace instance creation */ AUDIT_NS_INIT_NET       1335    /*
> > Record NET namespace instance creation */ AUDIT_NS_DEL_MNT        1336   
> > /* Record mount namespace instance deletion */ AUDIT_NS_DEL_UTS        1337
> >    /* Record UTS namespace instance deletion */ AUDIT_NS_DEL_IPC       
> > 1338    /* Record IPC namespace instance deletion */ AUDIT_NS_DEL_USER     
> >  1339    /* Record USER namespace instance deletion */ AUDIT_NS_DEL_PID    
> >    1340    /* Record PID namespace instance deletion */ AUDIT_NS_DEL_NET   
> >     1341    /* Record NET namespace instance deletion */
> 
> The requirements for auditing of containers should be derived from VPP. In it, 
> it asks for selectable auditing, selective audit, and selective audit review. 
> What this means is that we need the container and all its children to have one 
> identifier that is inserted into all the events that are associated with the 
> container.

Is that requirement for the records that are sent from the kernel, or
for the records stored by auditd, or by another facility that delivers
those records to a final consumer?

> With this, its possible to do a search for all events related to a container. 
> Its possible to exclude events from a container. Its possible to not get any 
> events.
> 
> The requirements also call out for the identification of the subject. This 
> means that the event should be bound to a syscall such as clone, setns, or 
> unshare.

Is it useful to have a reference of the init namespace set from which
all others are spawned?

If it isn't bound, I assume the subject should be added to the message
format?  I'm thinking of messages without an audit_context such as audit
user messages (such as AUDIT_NS_INFO and AUDIT_VIRT_CONTROL).

For now, we should not need to log namespaces with AUDIT_FEATURE_CHANGE
or AUDIT_CONFIG_CHANGE messages since only initial user namespace with
initial pid namespace has permission to do so.  This will need to be
addressed by having non-init config changes be limited to that container
or set of namespaces and possibly its children.  The other possibility
is to add the subject to the stand-alone message.

> Also, any user space events originating inside the container needs to have the 
> container ID added to the user space event - just like auid and session id.

This sounds like every task needs to record a container ID since that
information is otherwise unknown by the kernel except by what might be
provided by an audit user message such as AUDIT_VIRT_CONTROL or possibly
the new AUDIT_NS_INFO request.  It could be stored in struct task_struct
or in struct audit_context.  I don't have a suggestion on how to get
that information securely into the kernel.

> Recording each instance of a name space is giving me something that I cannot 
> use to do queries required by the security target. Given these events, how do 
> I locate a web server event where it accesses a watched file? That 
> authentication failed? That an update within the container failed?
> 
> The requirements are that we have to log the creation, suspension, migration, 
> and termination of a container. The requirements are not on the individual 
> name space.

Ok.  Do we have a robust definition of a container?  Where is that
definition managed?  If it is a userspace concept, then I think either
userspace should be assembling this information, or providing that
information to the entity that will be expected to know about and
provide it.

> Maybe I'm missing how these events give me that. But I'd like to hear how I 
> would be able to meet requirements with these 12 events.

Adding the infrastructure to give each of those 12 events an audit
context to be able to give meaningful subject fields in audit records
appears to require adding a struct task_struct argument to calls to
copy_mnt_ns(), copy_utsname(), copy_ipcs(), copy_pid_ns(),
copy_net_ns(), create_user_ns() unless I use current.  I think we must
use current since the userns is created before the spawned process is
mature or has an audit context in the case of clone.

Either that, or I have mis-understood and I should be stashing this
namespace ID information in an audit_aux_data structure or a more
permanent part of struct audit_context to be printed when required on
syscall exit.  I'm trying to think through if it is needed in any
non-syscall audit messages.

Another RFC patch set coming...

> -Steve
>  
> > As suggested by Eric Paris, there are 12 message types, one for each of
> > creation and deletion, one for each type of namespace so that text searches
> > are easier in conjunction with the AUDIT_NS_INFO message type, being able
> > to search for all records such as "netns=4 " and to avoid fields
> > disappearing per message type to make ausearch more efficient.
> > 
> > A typical startup would look roughly like:
> > 
> > 	type=AUDIT_NS_INIT_UTS msg=audit(1408577534.868:5): pid=1 uid=0
> > auid=4294967295 ses=4294967295 subj=kernel dev=00:03 old_utsns=(none)
> > utsns=-2 res=1 type=AUDIT_NS_INIT_USER msg=audit(1408577534.868:6): pid=1
> > uid=0 auid=4294967295 ses=4294967295 subj=kernel dev=00:03
> > old_userns=(none) userns=-3 res=1 type=AUDIT_NS_INIT_PID
> > msg=audit(1408577534.868:7): pid=1 uid=0 auid=4294967295 ses=4294967295
> > subj=kernel dev=00:03 old_pidns=(none) pidns=-4 res=1
> > type=AUDIT_NS_INIT_MNT msg=audit(1408577534.868:8): pid=1 uid=0
> > auid=4294967295 ses=4294967295 subj=kernel dev=00:03 old_mntns=(none)
> > mntns=0 res=1 type=AUDIT_NS_INIT_IPC msg=audit(1408577534.868:9): pid=1
> > uid=0 auid=4294967295 ses=4294967295 subj=kernel dev=00:03 old_ipcns=(none)
> > ipcns=-1 res=1 type=AUDIT_NS_INIT_NET msg=audit(1408577533.500:10): pid=1
> > uid=0 auid=4294967295 ses=4294967295 subj=kernel dev=00:03 old_netns=(none)
> > netns=2 res=1
> > 
> > And a CLONE action would result in:
> > 	type=type=AUDIT_NS_INIT_NET msg=audit(1408577535.306:81): pid=481 uid=0
> > auid=4294967295 ses=4294967295 subj=system_u:system_r:init_t:s0 dev=00:03
> > old_netns=2 netns=3 res=1
> > 
> > While deleting a namespace would result in:
> > 	type=type=AUDIT_NS_DEL_MNT msg=audit(1408577552.221:85): pid=481 uid=0
> > auid=4294967295 ses=4294967295 subj=system_u:system_r:init_t:s0 dev=00:03
> > mntns=4 res=1
> > 
> > If not "(none)", old_XXXns lists the namespace from which it was cloned.
> > 
> > Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
> > ---
> >  fs/namespace.c             |   13 +++++++++
> >  include/linux/audit.h      |    8 +++++
> >  include/uapi/linux/audit.h |   12 ++++++++
> >  ipc/namespace.c            |   12 ++++++++
> >  kernel/audit.c             |   64
> > ++++++++++++++++++++++++++++++++++++++++++++ kernel/pid_namespace.c     |  
> > 13 +++++++++
> >  kernel/user_namespace.c    |   13 +++++++++
> >  kernel/utsname.c           |   12 ++++++++
> >  net/core/net_namespace.c   |   12 ++++++++
> >  9 files changed, 159 insertions(+), 0 deletions(-)
> > 
> > diff --git a/fs/namespace.c b/fs/namespace.c
> > index 182bc41..7b62543 100644
> > --- a/fs/namespace.c
> > +++ b/fs/namespace.c
> > @@ -24,6 +24,7 @@
> >  #include <linux/proc_ns.h>
> >  #include <linux/magic.h>
> >  #include <linux/bootmem.h>
> > +#include <linux/audit.h>
> >  #include "pnode.h"
> >  #include "internal.h"
> > 
> > @@ -2459,6 +2460,7 @@ dput_out:
> > 
> >  static void free_mnt_ns(struct mnt_namespace *ns)
> >  {
> > +	audit_log_ns_del(AUDIT_NS_DEL_MNT, ns->proc_inum);
> >  	proc_free_inum(ns->proc_inum);
> >  	put_user_ns(ns->user_ns);
> >  	kfree(ns);
> > @@ -2518,6 +2520,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags,
> > struct mnt_namespace *ns, new_ns = alloc_mnt_ns(user_ns);
> >  	if (IS_ERR(new_ns))
> >  		return new_ns;
> > +	audit_log_ns_init(AUDIT_NS_INIT_MNT, ns->proc_inum, new_ns->proc_inum);
> > 
> >  	namespace_lock();
> >  	/* First pass: copy the tree topology */
> > @@ -2830,6 +2833,16 @@ static void __init init_mount_tree(void)
> >  	set_fs_root(current->fs, &root);
> >  }
> > 
> > +/* log the ID of init mnt namespace after audit service starts */
> > +static int __init mnt_ns_init_log(void)
> > +{
> > +	struct mnt_namespace *init_mnt_ns = init_task.nsproxy->mnt_ns;
> > +
> > +	audit_log_ns_init(AUDIT_NS_INIT_MNT, 0, init_mnt_ns->proc_inum);
> > +	return 0;
> > +}
> > +late_initcall(mnt_ns_init_log);
> > +
> >  void __init mnt_init(void)
> >  {
> >  	unsigned u;
> > diff --git a/include/linux/audit.h b/include/linux/audit.h
> > index 71698ec..b28dfb0 100644
> > --- a/include/linux/audit.h
> > +++ b/include/linux/audit.h
> > @@ -484,6 +484,9 @@ extern void		    audit_log_ns_info(struct 
> task_struct
> > *tsk); static inline void	    audit_log_ns_info(struct task_struct *tsk) {
> > }
> >  #endif
> > +extern void		    audit_log_ns_init(int type, unsigned int old_inum,
> > +					      unsigned int inum);
> > +extern void		    audit_log_ns_del(int type, unsigned int inum);
> > 
> >  extern int		    audit_update_lsm_rules(void);
> > 
> > @@ -542,6 +545,11 @@ static inline void audit_log_task_info(struct
> > audit_buffer *ab, { }
> >  static inline void audit_log_ns_info(struct task_struct *tsk)
> >  { }
> > +static inline int audit_log_ns_init(int type, unsigned int old_inum,
> > +				    unsigned int inum)
> > +{ }
> > +static inline int audit_log_ns_del(int type, unsigned int inum)
> > +{ }
> >  #define audit_enabled 0
> >  #endif /* CONFIG_AUDIT */
> >  static inline void audit_log_string(struct audit_buffer *ab, const char
> > *buf) diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> > index 1ffb151..487cad6 100644
> > --- a/include/uapi/linux/audit.h
> > +++ b/include/uapi/linux/audit.h
> > @@ -111,6 +111,18 @@
> >  #define AUDIT_PROCTITLE		1327	/* Proctitle emit event */
> >  #define AUDIT_FEATURE_CHANGE	1328	/* audit log listing feature changes 
> */
> >  #define AUDIT_NS_INFO		1329	/* Record process namespace IDs */
> > +#define AUDIT_NS_INIT_MNT	1330	/* Record mount namespace instance 
> creation
> > */ +#define AUDIT_NS_INIT_UTS	1331	/* Record UTS namespace instance
> > creation */ +#define AUDIT_NS_INIT_IPC	1332	/* Record IPC namespace
> > instance creation */ +#define AUDIT_NS_INIT_USER	1333	/* Record USER
> > namespace instance creation */ +#define AUDIT_NS_INIT_PID	1334	/* Record
> > PID namespace instance creation */ +#define AUDIT_NS_INIT_NET	1335	/*
> > Record NET namespace instance creation */ +#define AUDIT_NS_DEL_MNT	1336	
> /*
> > Record mount namespace instance deletion */ +#define
> > AUDIT_NS_DEL_UTS	1337	/* Record UTS namespace instance deletion */ 
> +#define
> > AUDIT_NS_DEL_IPC	1338	/* Record IPC namespace instance deletion */ 
> +#define
> > AUDIT_NS_DEL_USER	1339	/* Record USER namespace instance deletion */
> > +#define AUDIT_NS_DEL_PID	1340	/* Record PID namespace instance 
> deletion */
> > +#define AUDIT_NS_DEL_NET	1341	/* Record NET namespace instance deletion 
> */
> > 
> >  #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
> >  #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
> > diff --git a/ipc/namespace.c b/ipc/namespace.c
> > index 59451c1..73727ce 100644
> > --- a/ipc/namespace.c
> > +++ b/ipc/namespace.c
> > @@ -13,6 +13,7 @@
> >  #include <linux/mount.h>
> >  #include <linux/user_namespace.h>
> >  #include <linux/proc_ns.h>
> > +#include <linux/audit.h>
> > 
> >  #include "util.h"
> > 
> > @@ -41,6 +42,8 @@ static struct ipc_namespace *create_ipc_ns(struct
> > user_namespace *user_ns, }
> >  	atomic_inc(&nr_ipc_ns);
> > 
> > +	audit_log_ns_init(AUDIT_NS_INIT_IPC, old_ns->proc_inum, ns->proc_inum);
> > +
> >  	sem_init_ns(ns);
> >  	msg_init_ns(ns);
> >  	shm_init_ns(ns);
> > @@ -119,6 +122,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
> >  	 */
> >  	ipcns_notify(IPCNS_REMOVED);
> >  	put_user_ns(ns->user_ns);
> > +	audit_log_ns_del(AUDIT_NS_DEL_IPC, ns->proc_inum);
> >  	proc_free_inum(ns->proc_inum);
> >  	kfree(ns);
> >  }
> > @@ -197,3 +201,11 @@ const struct proc_ns_operations ipcns_operations = {
> >  	.install	= ipcns_install,
> >  	.inum		= ipcns_inum,
> >  };
> > +
> > +/* log the ID of init IPC namespace after audit service starts */
> > +static int __init ipc_namespaces_init(void)
> > +{
> > +	audit_log_ns_init(AUDIT_NS_INIT_IPC, 0, init_ipc_ns.proc_inum);
> > +	return 0;
> > +}
> > +late_initcall(ipc_namespaces_init);
> > diff --git a/kernel/audit.c b/kernel/audit.c
> > index 63f32f4..e6230c4 100644
> > --- a/kernel/audit.c
> > +++ b/kernel/audit.c
> > @@ -1978,6 +1978,70 @@ out:
> >  	kfree(name);
> >  }
> > 
> > +#ifdef CONFIG_NAMESPACES
> > +static char *ns_name[] = {
> > +	"mnt",
> > +	"uts",
> > +	"ipc",
> > +	"user",
> > +	"pid",
> > +	"net",
> > +};
> > +
> > +/**
> > + * audit_log_ns_init - report a namespace instance creation
> > + * @type: type of audit namespace instance created message
> > + * @old_inum: the ID number of the cloned namespace instance
> > + * @inum: the ID number of the new namespace instance
> > + */
> > +void  audit_log_ns_init(int type, unsigned int old_inum, unsigned int inum)
> > +{
> > +	struct audit_buffer *ab;
> > +	char *audit_ns_name = ns_name[type - AUDIT_NS_INIT_MNT];
> > +	struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
> > +	struct super_block *sb = mnt->mnt_sb;
> > +	char old_ns[16];
> > +
> > +	if (type < AUDIT_NS_INIT_MNT || type > AUDIT_NS_INIT_NET) {
> > +		WARN(1, "audit_log_ns_init: type:%d out of range", type);
> > +		return;
> > +	}
> > +	if (!old_inum)
> > +		sprintf(old_ns, "(none)");
> > +	else
> > +		sprintf(old_ns, "%d", old_inum - PROC_DYNAMIC_FIRST);
> > +	audit_log_common_recv_msg(&ab, type);
> > +	audit_log_format(ab, " dev=%02x:%02x old_%sns=%s %sns=%d res=1",
> > +			 MAJOR(sb->s_dev), MINOR(sb->s_dev),
> > +			 audit_ns_name, old_ns,
> > +			 audit_ns_name, inum - PROC_DYNAMIC_FIRST);
> > +	audit_log_end(ab);
> > +}
> > +
> > +/**
> > + * audit_log_ns_del - report a namespace instance deleted
> > + * @type: type of audit namespace instance deleted message
> > + * @inum: the ID number of the namespace instance
> > + */
> > +void audit_log_ns_del(int type, unsigned int inum)
> > +{
> > +	struct audit_buffer *ab;
> > +	char *audit_ns_name = ns_name[type - AUDIT_NS_DEL_MNT];
> > +	struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
> > +	struct super_block *sb = mnt->mnt_sb;
> > +
> > +	if (type < AUDIT_NS_DEL_MNT || type > AUDIT_NS_DEL_NET) {
> > +		WARN(1, "audit_log_ns_del: type:%d out of range", type);
> > +		return;
> > +	}
> > +	audit_log_common_recv_msg(&ab, type);
> > +	audit_log_format(ab, " dev=%02x:%02x %sns=%d res=1",
> > +			 MAJOR(sb->s_dev), MINOR(sb->s_dev), audit_ns_name,
> > +			 inum - PROC_DYNAMIC_FIRST);
> > +	audit_log_end(ab);
> > +}
> > +#endif /* CONFIG_NAMESPACES */
> > +
> >  /**
> >   * audit_log_end - end one audit record
> >   * @ab: the audit_buffer
> > diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
> > index db95d8e..d28fd14 100644
> > --- a/kernel/pid_namespace.c
> > +++ b/kernel/pid_namespace.c
> > @@ -18,6 +18,7 @@
> >  #include <linux/proc_ns.h>
> >  #include <linux/reboot.h>
> >  #include <linux/export.h>
> > +#include <linux/audit.h>
> > 
> >  struct pid_cache {
> >  	int nr_ids;
> > @@ -109,6 +110,9 @@ static struct pid_namespace *create_pid_namespace(struct
> > user_namespace *user_ns if (err)
> >  		goto out_free_map;
> > 
> > +	audit_log_ns_init(AUDIT_NS_INIT_PID, parent_pid_ns->proc_inum,
> > +			  ns->proc_inum);
> > +
> >  	kref_init(&ns->kref);
> >  	ns->level = level;
> >  	ns->parent = get_pid_ns(parent_pid_ns);
> > @@ -142,6 +146,7 @@ static void destroy_pid_namespace(struct pid_namespace
> > *ns) {
> >  	int i;
> > 
> > +	audit_log_ns_del(AUDIT_NS_DEL_PID, ns->proc_inum);
> >  	proc_free_inum(ns->proc_inum);
> >  	for (i = 0; i < PIDMAP_ENTRIES; i++)
> >  		kfree(ns->pidmap[i].page);
> > @@ -388,3 +393,11 @@ static __init int pid_namespaces_init(void)
> >  }
> > 
> >  __initcall(pid_namespaces_init);
> > +
> > +/* log the ID of init PID namespace after audit service starts */
> > +static __init int pid_namespaces_late_init(void)
> > +{
> > +	audit_log_ns_init(AUDIT_NS_INIT_PID, 0, init_pid_ns.proc_inum);
> > +	return 0;
> > +}
> > +late_initcall(pid_namespaces_late_init);
> > diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
> > index fcc0256..89c2517 100644
> > --- a/kernel/user_namespace.c
> > +++ b/kernel/user_namespace.c
> > @@ -22,6 +22,7 @@
> >  #include <linux/ctype.h>
> >  #include <linux/projid.h>
> >  #include <linux/fs_struct.h>
> > +#include <linux/audit.h>
> > 
> >  static struct kmem_cache *user_ns_cachep __read_mostly;
> > 
> > @@ -92,6 +93,9 @@ int create_user_ns(struct cred *new)
> >  		return ret;
> >  	}
> > 
> > +	audit_log_ns_init(AUDIT_NS_INIT_USER, parent_ns->proc_inum,
> > +			  ns->proc_inum);
> > +
> >  	atomic_set(&ns->count, 1);
> >  	/* Leave the new->user_ns reference with the new user namespace. */
> >  	ns->parent = parent_ns;
> > @@ -136,6 +140,7 @@ void free_user_ns(struct user_namespace *ns)
> >  #ifdef CONFIG_PERSISTENT_KEYRINGS
> >  		key_put(ns->persistent_keyring_register);
> >  #endif
> > +		audit_log_ns_del(AUDIT_NS_DEL_USER, ns->proc_inum);
> >  		proc_free_inum(ns->proc_inum);
> >  		kmem_cache_free(user_ns_cachep, ns);
> >  		ns = parent;
> > @@ -909,3 +914,11 @@ static __init int user_namespaces_init(void)
> >  	return 0;
> >  }
> >  subsys_initcall(user_namespaces_init);
> > +
> > +/* log the ID of init user namespace after audit service starts */
> > +static __init int user_namespaces_late_init(void)
> > +{
> > +	audit_log_ns_init(AUDIT_NS_INIT_USER, 0, init_user_ns.proc_inum);
> > +	return 0;
> > +}
> > +late_initcall(user_namespaces_late_init);
> > diff --git a/kernel/utsname.c b/kernel/utsname.c
> > index fd39312..fa21e8d 100644
> > --- a/kernel/utsname.c
> > +++ b/kernel/utsname.c
> > @@ -16,6 +16,7 @@
> >  #include <linux/slab.h>
> >  #include <linux/user_namespace.h>
> >  #include <linux/proc_ns.h>
> > +#include <linux/audit.h>
> > 
> >  static struct uts_namespace *create_uts_ns(void)
> >  {
> > @@ -48,6 +49,8 @@ static struct uts_namespace *clone_uts_ns(struct
> > user_namespace *user_ns, return ERR_PTR(err);
> >  	}
> > 
> > +	audit_log_ns_init(AUDIT_NS_INIT_UTS, old_ns->proc_inum, ns->proc_inum);
> > +
> >  	down_read(&uts_sem);
> >  	memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
> >  	ns->user_ns = get_user_ns(user_ns);
> > @@ -84,6 +87,7 @@ void free_uts_ns(struct kref *kref)
> > 
> >  	ns = container_of(kref, struct uts_namespace, kref);
> >  	put_user_ns(ns->user_ns);
> > +	audit_log_ns_del(AUDIT_NS_DEL_UTS, ns->proc_inum);
> >  	proc_free_inum(ns->proc_inum);
> >  	kfree(ns);
> >  }
> > @@ -138,3 +142,11 @@ const struct proc_ns_operations utsns_operations = {
> >  	.install	= utsns_install,
> >  	.inum		= utsns_inum,
> >  };
> > +
> > +/* log the ID of init UTS namespace after audit service starts */
> > +static int __init uts_namespaces_init(void)
> > +{
> > +	audit_log_ns_init(AUDIT_NS_INIT_UTS, 0, init_uts_ns.proc_inum);
> > +	return 0;
> > +}
> > +late_initcall(uts_namespaces_init);
> > diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
> > index 85b6269..562eb85 100644
> > --- a/net/core/net_namespace.c
> > +++ b/net/core/net_namespace.c
> > @@ -17,6 +17,7 @@
> >  #include <linux/user_namespace.h>
> >  #include <net/net_namespace.h>
> >  #include <net/netns/generic.h>
> > +#include <linux/audit.h>
> > 
> >  /*
> >   *	Our network namespace constructor/destructor lists
> > @@ -253,6 +254,8 @@ struct net *copy_net_ns(unsigned long flags,
> >  	mutex_lock(&net_mutex);
> >  	rv = setup_net(net, user_ns);
> >  	if (rv == 0) {
> > +		audit_log_ns_init(AUDIT_NS_INIT_NET, old_net->proc_inum,
> > +				  net->proc_inum);
> >  		rtnl_lock();
> >  		list_add_tail_rcu(&net->list, &net_namespace_list);
> >  		rtnl_unlock();
> > @@ -389,6 +392,7 @@ static __net_init int net_ns_net_init(struct net *net)
> > 
> >  static __net_exit void net_ns_net_exit(struct net *net)
> >  {
> > +	audit_log_ns_del(AUDIT_NS_DEL_NET, net->proc_inum);
> >  	proc_free_inum(net->proc_inum);
> >  }
> > 
> > @@ -435,6 +439,14 @@ static int __init net_ns_init(void)
> > 
> >  pure_initcall(net_ns_init);
> > 
> > +/* log the ID of init_net namespace after audit service starts */
> > +static int __init net_ns_init_log(void)
> > +{
> > +	audit_log_ns_init(AUDIT_NS_INIT_NET, 0, init_net.proc_inum);
> > +	return 0;
> > +}
> > +late_initcall(net_ns_init_log);
> > +
> >  #ifdef CONFIG_NET_NS
> >  static int __register_pernet_operations(struct list_head *list,
> >  					struct pernet_operations *ops)
> 

- RGB

--
Richard Guy Briggs <rbriggs@redhat.com>
Senior Software Engineer, Kernel Security, AMER ENG Base Operating Systems, Red Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635, Alt: +1.613.693.0684x3545
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Grubb May 14, 2015, 2:57 p.m. UTC | #5
On Tuesday, May 12, 2015 03:57:59 PM Richard Guy Briggs wrote:
> On 15/05/05, Steve Grubb wrote:
> > I think there needs to be some more discussion around this. It seems like
> > this is not exactly recording things that are useful for audit.
> 
> It seems to me that either audit has to assemble that information, or
> the kernel has to do so.  The kernel doesn't know about containers
> (yet?).

Auditing is something that has a lot of requirements imposed on it by security 
standards. There was no requirement to have an auid until audit came along and 
said that uid is not good enough to know who is issuing commands because of su 
or sudo. There was no requirement for sessionid until we had to track each 
action back to a login so we could see if the login came from the expected 
place. 

What I am saying is we have the same situation. Audit needs to track a 
container and we need an ID. The information that is being logged is not 
useful for auditing. Maybe someone wants that info in syslog, but I doubt it. 
The audit trail's purpose is to allow a security officer to reconstruct the 
events to determine what happened during some security incident.

What they would want to know is what resources were assigned; if two 
containers shared a resource, what resource and container was it shared with; 
if two containers can communicate, we need to see or control information flow 
when necessary; and we need to see termination and release of resources.

Also, if the host OS cannot make sense of the information being logged because 
the pid maps to another process name, or a uid maps to another user, or a file 
access maps to something not in the host's, then we need the container to do 
its own auditing and resolve these mappings and optionally pass these to an 
aggregation server.

Nothing else makes sense.


> > On Friday, April 17, 2015 03:35:52 AM Richard Guy Briggs wrote:
> > > Log the creation and deletion of namespace instances in all 6 types of
> > > namespaces.
> > > 
> > > Twelve new audit message types have been introduced:
> > > AUDIT_NS_INIT_MNT       1330    /* Record mount namespace instance
> > > creation
> > > */ AUDIT_NS_INIT_UTS       1331    /* Record UTS namespace instance
> > > creation */ AUDIT_NS_INIT_IPC       1332    /* Record IPC namespace
> > > instance creation */ AUDIT_NS_INIT_USER      1333    /* Record USER
> > > namespace instance creation */ AUDIT_NS_INIT_PID       1334    /* Record
> > > PID namespace instance creation */ AUDIT_NS_INIT_NET       1335    /*
> > > Record NET namespace instance creation */ AUDIT_NS_DEL_MNT        1336
> > > /* Record mount namespace instance deletion */ AUDIT_NS_DEL_UTS       
> > > 1337
> > > 
> > >    /* Record UTS namespace instance deletion */ AUDIT_NS_DEL_IPC
> > > 
> > > 1338    /* Record IPC namespace instance deletion */ AUDIT_NS_DEL_USER
> > > 
> > >  1339    /* Record USER namespace instance deletion */ AUDIT_NS_DEL_PID
> > >  
> > >    1340    /* Record PID namespace instance deletion */ AUDIT_NS_DEL_NET
> > >    
> > >     1341    /* Record NET namespace instance deletion */
> > 
> > The requirements for auditing of containers should be derived from VPP. In
> > it, it asks for selectable auditing, selective audit, and selective audit
> > review. What this means is that we need the container and all its
> > children to have one identifier that is inserted into all the events that
> > are associated with the container.
> 
> Is that requirement for the records that are sent from the kernel, or
> for the records stored by auditd, or by another facility that delivers
> those records to a final consumer?

A little of both. Selective audit means that you can set rules to include or 
exclude an event. This is done in the kernel. Selectable review means that the 
user space tools need to be able to skip past records not of interest to a 
specific line of inquiry. Also, logging everything and letting user space work 
it out later is also not a solution because the needle is harder to find in a 
larger haystack. Or, the logs may rotate and its gone forever because the 
partition is filled. 

 
> > With this, its possible to do a search for all events related to a
> > container. Its possible to exclude events from a container. Its possible
> > to not get any events.
> > 
> > The requirements also call out for the identification of the subject. This
> > means that the event should be bound to a syscall such as clone, setns, or
> > unshare.
> 
> Is it useful to have a reference of the init namespace set from which
> all others are spawned?

For things directly observable by the init name space, yes.

> If it isn't bound, I assume the subject should be added to the message
> format?  I'm thinking of messages without an audit_context such as audit
> user messages (such as AUDIT_NS_INFO and AUDIT_VIRT_CONTROL).

Making these events auxiliary records to a syscall is all that is needed. The 
same way that PATH is added to an open event. If someone wants to have 
container/namespace events, they add a rule on clone(2).


> For now, we should not need to log namespaces with AUDIT_FEATURE_CHANGE
> or AUDIT_CONFIG_CHANGE messages since only initial user namespace with
> initial pid namespace has permission to do so.  This will need to be
> addressed by having non-init config changes be limited to that container
> or set of namespaces and possibly its children.  The other possibility
> is to add the subject to the stand-alone message.
> 
> > Also, any user space events originating inside the container needs to have
> > the container ID added to the user space event - just like auid and
> > session id.
>
> This sounds like every task needs to record a container ID since that
> information is otherwise unknown by the kernel except by what might be
> provided by an audit user message such as AUDIT_VIRT_CONTROL or possibly
> the new AUDIT_NS_INFO request.

Right. The same as we record auid and ses on every event. We'll need a 
container ID logged with everything. -1 for unset, meaning init namespace.


> It could be stored in struct task_struct or in struct audit_context.  I
> don't have a suggestion on how to get that information securely into the
> kernel.

That is where I'd suggest. Its for audit subsystem needs.
 

> > Recording each instance of a name space is giving me something that I
> > cannot use to do queries required by the security target. Given these
> > events, how do I locate a web server event where it accesses a watched
> > file? That authentication failed? That an update within the container
> > failed?
> > 
> > The requirements are that we have to log the creation, suspension,
> > migration, and termination of a container. The requirements are not on
> > the individual name space.
> 
> Ok.  Do we have a robust definition of a container? 

We call the combination of name spaces, cgroups, and seccomp rules a 
container.

> Where is that definition managed?

In the thing that invokes a container.

> If it is a userspace concept, then I think either userspace should be
> assembling this information, or providing that information to the entity
> that will be expected to know about and provide it.

Well, uid is a userspace concept, too. But we record an auid and keep it 
immutable so that we can check enforcement of system security policy which is 
also a user space concept. These things need to be collected to a place that 
can be associated with events as needed. That place is the kernel.


> > Maybe I'm missing how these events give me that. But I'd like to hear how
> > I
> > would be able to meet requirements with these 12 events.
> 
> Adding the infrastructure to give each of those 12 events an audit
> context to be able to give meaningful subject fields in audit records
> appears to require adding a struct task_struct argument to calls to
> copy_mnt_ns(), copy_utsname(), copy_ipcs(), copy_pid_ns(),
> copy_net_ns(), create_user_ns() unless I use current.  I think we must
> use current since the userns is created before the spawned process is
> mature or has an audit context in the case of clone.

I think you are heading down the wrong path. We can tell from syscall flags 
what is being done. Try this:

## Optional - log container creation
-a always,exit -F arch=b32 -S clone -F a0&0x7C020000 -F key=container-create
-a always,exit -F arch=b64 -S clone -F a0&0x7C020000 -F key=container-create

## Optional - watch for containers that may change their configuration
-a always,exit -F arch=b32 -S unshare,setns -F key=container-config
-a always,exit -F arch=b64 -S unshare,setns -F key=container-config

Then muck with containers, then use ausearch --start recent -k container -i. I 
think you'll see that we know a bit about what's happening. What's needed is 
the breadcrumb trail to tie future events back to the container so that we can 
check for violations of host security policy.

> Either that, or I have mis-understood and I should be stashing this
> namespace ID information in an audit_aux_data structure or a more
> permanent part of struct audit_context to be printed when required on
> syscall exit.  I'm trying to think through if it is needed in any
> non-syscall audit messages.

I think this is what is required. But we also have the issue where an event's 
meaning can't be determined outside of a container. (For example, login, 
account creation, password change, uid change, file access, etc.) So, I think 
auditing needs to be local to the container for enrichment and ultimately 
forwarded to an aggregating server.

-Steve

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman May 14, 2015, 3:42 p.m. UTC | #6
Steve Grubb <sgrubb@redhat.com> writes:

> On Tuesday, May 12, 2015 03:57:59 PM Richard Guy Briggs wrote:
>> On 15/05/05, Steve Grubb wrote:
>> > I think there needs to be some more discussion around this. It seems like
>> > this is not exactly recording things that are useful for audit.
>> 
>> It seems to me that either audit has to assemble that information, or
>> the kernel has to do so.  The kernel doesn't know about containers
>> (yet?).
>
> Auditing is something that has a lot of requirements imposed on it by security 
> standards. There was no requirement to have an auid until audit came along and 
> said that uid is not good enough to know who is issuing commands because of su 
> or sudo. There was no requirement for sessionid until we had to track each 
> action back to a login so we could see if the login came from the expected 
> place. 

Stop right there.

You want a global identifier in a realm where only relative identifiers
exist, and make sense.

I am sorry that isn't going to happen. EVER.

Square peg, round hole.  It doesn't work, it doesn't make sense, and
most especially it doesn't allow anyone to reconstruct anything, because
it does not make sense and does not match what the kernel is doing.

Container IDs do not, and will not exist.  There is probably something
reasonable in your request but until you stop talking that nonsense I
can't see it.

Global IDs take us into the namespace of namespaces problem and that
isn't going to happen.  I have already bent as far in this direction as
I can go.  Further namespace creation is not a privileged event which
makes the requestion for a container ID make even less sense.  With
anyone able to create whatever they want it will not be a identifier
that makes any sense to someone reading an audit log.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Grubb May 14, 2015, 4:21 p.m. UTC | #7
On Thursday, May 14, 2015 10:42:38 AM Eric W. Biederman wrote:
> Steve Grubb <sgrubb@redhat.com> writes:
> > On Tuesday, May 12, 2015 03:57:59 PM Richard Guy Briggs wrote:
> >> On 15/05/05, Steve Grubb wrote:
> >> > I think there needs to be some more discussion around this. It seems
> >> > like
> >> > this is not exactly recording things that are useful for audit.
> >> 
> >> It seems to me that either audit has to assemble that information, or
> >> the kernel has to do so.  The kernel doesn't know about containers
> >> (yet?).
> > 
> > Auditing is something that has a lot of requirements imposed on it by
> > security standards. There was no requirement to have an auid until audit
> > came along and said that uid is not good enough to know who is issuing
> > commands because of su or sudo. There was no requirement for sessionid
> > until we had to track each action back to a login so we could see if the
> > login came from the expected place.
> 
> Stop right there.
> 
> You want a global identifier in a realm where only relative identifiers
> exist, and make sense.

Global to a name space for me is I guess relative for you. The ID is needed to 
tie events together to check for violations of the security policy of the 
container/namespace invoking child container/namespace.

As a concrete example, suppose a container is to have its own /etc/shadow. If 
for some reason the container used the host's copy, then that would point to a 
misconfiguration or perhaps indicate an escape from the container.

I would imagine that the next layer down has its own set of global identifiers 
so that it can verify enforcement of its own security assumptions. This does 
not need to be global to the system from top to 9 layers down. Each layer 
needs to have a way of locating events common to a child container instance.


> I am sorry that isn't going to happen. EVER.

Then I'd suggest we either scrap this set of patches and forget auditing of 
containers. (This would have the effect of disallowing them in a lot of 
environments because violations of security policy can't be detected.)

Or someone please explain how what is proposed to be logged allows the tying 
together of events. Or even supports the requirements I stated in my last 
email. 

-Steve

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Moore May 14, 2015, 7:19 p.m. UTC | #8
On Thursday, May 14, 2015 10:57:14 AM Steve Grubb wrote:
> On Tuesday, May 12, 2015 03:57:59 PM Richard Guy Briggs wrote:
> > On 15/05/05, Steve Grubb wrote:
> > > I think there needs to be some more discussion around this. It seems
> > > like this is not exactly recording things that are useful for audit.
> > 
> > It seems to me that either audit has to assemble that information, or
> > the kernel has to do so.  The kernel doesn't know about containers
> > (yet?).
> 
> Auditing is something that has a lot of requirements imposed on it by
> security standards. There was no requirement to have an auid until audit
> came along and said that uid is not good enough to know who is issuing
> commands because of su or sudo. There was no requirement for sessionid
> until we had to track each action back to a login so we could see if the
> login came from the expected place.
> 
> What I am saying is we have the same situation. Audit needs to track a
> container and we need an ID. The information that is being logged is not
> useful for auditing. Maybe someone wants that info in syslog, but I doubt
> it. The audit trail's purpose is to allow a security officer to reconstruct
> the events to determine what happened during some security incident.

As Eric, and others, have stated, the container concept is a userspace idea, 
not a kernel idea; the kernel only knows, and cares about, namespaces.  This 
is unlikely to change.

However, as Steve points out, there is precedence for the kernel to record 
userspace tokens for the sake of audit.  Personally I'm not a big fan of this 
in general, but I do recognize that it does satisfy a legitimate need.  Think 
of things like auid and the sessionid as necessary evils; audit is already 
chock full of evilness I doubt one more will doom us all to hell.

Moving forward, I'd like to see the following:

* Record the creation/removal/mgmt of the individual namespaces as Richard's 
patchset currently does.  However, I'd suggest using an explicit namespace 
value for the init namespace instead of the "unset" value in the V6 patchset 
(my apologies if you've already changed this Richard, I haven't looked at V7 
yet).

* Create a container ID token (unsigned 32-bit integer?), similar to 
auid/sessionid, that is set by userspace and carried by the kernel to be used 
in audit records.  I'd like to see some discussion on how we manage this, e.g. 
how do handle container ID inheritance, how do we handle nested containers 
(setting the containerid when it is already set), do we care if multiple 
different containers share the same namespace config, etc.?

* When userspace sets the container ID, emit a new audit record with the 
associated namespace tokens and the container ID.

* Look at our existing audit records to determine which records should have 
namespace and container ID tokens added.  We may only want to add the 
additional fields in the case where the namespace/container ID tokens are not 
the init namespace.

Can we all live with this?  If not, please suggest some alternate ideas; 
simply shouting "IT'S ALL CRAP!" isn't helpful for anyone ... it may be true, 
but it doesn't help us solve the problem ;)
Richard Guy Briggs May 15, 2015, 12:48 a.m. UTC | #9
On 15/05/14, Steve Grubb wrote:
> On Tuesday, May 12, 2015 03:57:59 PM Richard Guy Briggs wrote:
> > On 15/05/05, Steve Grubb wrote:
> > > I think there needs to be some more discussion around this. It seems like
> > > this is not exactly recording things that are useful for audit.
> > 
> > It seems to me that either audit has to assemble that information, or
> > the kernel has to do so.  The kernel doesn't know about containers
> > (yet?).
> 
> Auditing is something that has a lot of requirements imposed on it by security 
> standards. There was no requirement to have an auid until audit came along and 
> said that uid is not good enough to know who is issuing commands because of su 
> or sudo. There was no requirement for sessionid until we had to track each 
> action back to a login so we could see if the login came from the expected 
> place. 
> 
> What I am saying is we have the same situation. Audit needs to track a 
> container and we need an ID. The information that is being logged is not 
> useful for auditing. Maybe someone wants that info in syslog, but I doubt it. 
> The audit trail's purpose is to allow a security officer to reconstruct the 
> events to determine what happened during some security incident.

I agree the information being logged is not yet useful, but it is a
component of what would be.  I wasn't ever thinking about syslog...  It
is this trail that I was trying to help create.

> What they would want to know is what resources were assigned; if two 
> containers shared a resource, what resource and container was it shared with; 
> if two containers can communicate, we need to see or control information flow 
> when necessary; and we need to see termination and release of resources.

So, namespaces are a big part of this.  I understand how they are
spawned and potentially shared.  I have a more vague idea about how
cgroups contribute to this concept of a container.  So far, I have very
little idea how seccomp contributes, but I assume that it will also need
to be part of this tracking.

> Also, if the host OS cannot make sense of the information being logged because 
> the pid maps to another process name, or a uid maps to another user, or a file 
> access maps to something not in the host's, then we need the container to do 
> its own auditing and resolve these mappings and optionally pass these to an 
> aggregation server.

I'm open to both being possible.

> Nothing else makes sense.
> 
> > > On Friday, April 17, 2015 03:35:52 AM Richard Guy Briggs wrote:
> > > > Log the creation and deletion of namespace instances in all 6 types of
> > > > namespaces.
> > > > 
> > > > Twelve new audit message types have been introduced:
> > > > AUDIT_NS_INIT_MNT       1330    /* Record mount namespace instance
> > > > creation
> > > > */ AUDIT_NS_INIT_UTS       1331    /* Record UTS namespace instance
> > > > creation */ AUDIT_NS_INIT_IPC       1332    /* Record IPC namespace
> > > > instance creation */ AUDIT_NS_INIT_USER      1333    /* Record USER
> > > > namespace instance creation */ AUDIT_NS_INIT_PID       1334    /* Record
> > > > PID namespace instance creation */ AUDIT_NS_INIT_NET       1335    /*
> > > > Record NET namespace instance creation */ AUDIT_NS_DEL_MNT        1336
> > > > /* Record mount namespace instance deletion */ AUDIT_NS_DEL_UTS       
> > > > 1337
> > > > 
> > > >    /* Record UTS namespace instance deletion */ AUDIT_NS_DEL_IPC
> > > > 
> > > > 1338    /* Record IPC namespace instance deletion */ AUDIT_NS_DEL_USER
> > > > 
> > > >  1339    /* Record USER namespace instance deletion */ AUDIT_NS_DEL_PID
> > > >  
> > > >    1340    /* Record PID namespace instance deletion */ AUDIT_NS_DEL_NET
> > > >    
> > > >     1341    /* Record NET namespace instance deletion */
> > > 
> > > The requirements for auditing of containers should be derived from VPP. In
> > > it, it asks for selectable auditing, selective audit, and selective audit
> > > review. What this means is that we need the container and all its
> > > children to have one identifier that is inserted into all the events that
> > > are associated with the container.
> > 
> > Is that requirement for the records that are sent from the kernel, or
> > for the records stored by auditd, or by another facility that delivers
> > those records to a final consumer?
> 
> A little of both. Selective audit means that you can set rules to include or 
> exclude an event. This is done in the kernel. Selectable review means that the 
> user space tools need to be able to skip past records not of interest to a 
> specific line of inquiry. Also, logging everything and letting user space work 
> it out later is also not a solution because the needle is harder to find in a 
> larger haystack. Or, the logs may rotate and its gone forever because the 
> partition is filled. 

I agree it needs to be a balance of flexibility and efficiency.

> > > With this, its possible to do a search for all events related to a
> > > container. Its possible to exclude events from a container. Its possible
> > > to not get any events.
> > > 
> > > The requirements also call out for the identification of the subject. This
> > > means that the event should be bound to a syscall such as clone, setns, or
> > > unshare.
> > 
> > Is it useful to have a reference of the init namespace set from which
> > all others are spawned?
> 
> For things directly observable by the init name space, yes.

Ok, so we'll need to have a way to document that initial state on boot
before any other processes start, preferably in one clear brief record.

> > If it isn't bound, I assume the subject should be added to the message
> > format?  I'm thinking of messages without an audit_context such as audit
> > user messages (such as AUDIT_NS_INFO and AUDIT_VIRT_CONTROL).
> 
> Making these events auxiliary records to a syscall is all that is needed. The 
> same way that PATH is added to an open event. If someone wants to have 
> container/namespace events, they add a rule on clone(2).

This doesn't make sense.  The point of this type of record is to have a
way for a userspace container manager (which maybe should have a new CAP
type) to tie the creation of namespaces to a specific container name or
ID.  It might even contain cgroup and/or seccomp info.

> > For now, we should not need to log namespaces with AUDIT_FEATURE_CHANGE
> > or AUDIT_CONFIG_CHANGE messages since only initial user namespace with
> > initial pid namespace has permission to do so.  This will need to be
> > addressed by having non-init config changes be limited to that container
> > or set of namespaces and possibly its children.  The other possibility
> > is to add the subject to the stand-alone message.
> > 
> > > Also, any user space events originating inside the container needs to have
> > > the container ID added to the user space event - just like auid and
> > > session id.
> >
> > This sounds like every task needs to record a container ID since that
> > information is otherwise unknown by the kernel except by what might be
> > provided by an audit user message such as AUDIT_VIRT_CONTROL or possibly
> > the new AUDIT_NS_INFO request.
> 
> Right. The same as we record auid and ses on every event. We'll need a 
> container ID logged with everything. -1 for unset, meaning init namespace.

Ok, that might remove the need for the reply I just wrote above.

> > It could be stored in struct task_struct or in struct audit_context.  I
> > don't have a suggestion on how to get that information securely into the
> > kernel.
> 
> That is where I'd suggest. Its for audit subsystem needs.

struct audit_context would be my choice.

> > > Recording each instance of a name space is giving me something that I
> > > cannot use to do queries required by the security target. Given these
> > > events, how do I locate a web server event where it accesses a watched
> > > file? That authentication failed? That an update within the container
> > > failed?
> > > 
> > > The requirements are that we have to log the creation, suspension,
> > > migration, and termination of a container. The requirements are not on
> > > the individual name space.
> > 
> > Ok.  Do we have a robust definition of a container? 
> 
> We call the combination of name spaces, cgroups, and seccomp rules a 
> container.

Can you detail what information is required from each?

> > Where is that definition managed?
> 
> In the thing that invokes a container.

I was looking for a reference to a standards document rather than an
application...

> > If it is a userspace concept, then I think either userspace should be
> > assembling this information, or providing that information to the entity
> > that will be expected to know about and provide it.
> 
> Well, uid is a userspace concept, too. But we record an auid and keep it 
> immutable so that we can check enforcement of system security policy which is 
> also a user space concept. These things need to be collected to a place that 
> can be associated with events as needed. That place is the kernel.

I am fine with putting that in the kernel if that is what makes most
sense.

> > > Maybe I'm missing how these events give me that. But I'd like to
> > > hear how I would be able to meet requirements with these 12
> > > events.
> > 
> > Adding the infrastructure to give each of those 12 events an audit
> > context to be able to give meaningful subject fields in audit records
> > appears to require adding a struct task_struct argument to calls to
> > copy_mnt_ns(), copy_utsname(), copy_ipcs(), copy_pid_ns(),
> > copy_net_ns(), create_user_ns() unless I use current.  I think we must
> > use current since the userns is created before the spawned process is
> > mature or has an audit context in the case of clone.
> 
> I think you are heading down the wrong path.

That's why I started questioning it...

> We can tell from syscall flags what is being done. Try this:
> 
> ## Optional - log container creation
> -a always,exit -F arch=b32 -S clone -F a0&0x7C020000 -F key=container-create
> -a always,exit -F arch=b64 -S clone -F a0&0x7C020000 -F key=container-create
> 
> ## Optional - watch for containers that may change their configuration
> -a always,exit -F arch=b32 -S unshare,setns -F key=container-config
> -a always,exit -F arch=b64 -S unshare,setns -F key=container-config
> 
> Then muck with containers, then use ausearch --start recent -k container -i. I 
> think you'll see that we know a bit about what's happening. What's needed is 
> the breadcrumb trail to tie future events back to the container so that we can 
> check for violations of host security policy.

Agreed.

> > Either that, or I have mis-understood and I should be stashing this
> > namespace ID information in an audit_aux_data structure or a more
> > permanent part of struct audit_context to be printed when required on
> > syscall exit.  I'm trying to think through if it is needed in any
> > non-syscall audit messages.
> 
> I think this is what is required. But we also have the issue where an event's 
> meaning can't be determined outside of a container. (For example, login, 
> account creation, password change, uid change, file access, etc.) So, I think 
> auditing needs to be local to the container for enrichment and ultimately 
> forwarded to an aggregating server.

There are some events that will mean more to different layers...
They should be determined by the rules in each auditd jurisdiction,
potentially one per user namespace.

> -Steve

- RGB

--
Richard Guy Briggs <rbriggs@redhat.com>
Senior Software Engineer, Kernel Security, AMER ENG Base Operating Systems, Red Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635, Alt: +1.613.693.0684x3545
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman May 15, 2015, 1:31 a.m. UTC | #10
Paul Moore <pmoore@redhat.com> writes:
> As Eric, and others, have stated, the container concept is a userspace idea, 
> not a kernel idea; the kernel only knows, and cares about, namespaces.  This 
> is unlikely to change.
>
> However, as Steve points out, there is precedence for the kernel to record 
> userspace tokens for the sake of audit.  Personally I'm not a big fan of this 
> in general, but I do recognize that it does satisfy a legitimate need.  Think 
> of things like auid and the sessionid as necessary evils; audit is already 
> chock full of evilness I doubt one more will doom us all to hell.
>
> Moving forward, I'd like to see the following:

> * Create a container ID token (unsigned 32-bit integer?), similar to 
> auid/sessionid, that is set by userspace and carried by the kernel to be used 
> in audit records.  I'd like to see some discussion on how we manage this, e.g. 
> how do handle container ID inheritance, how do we handle nested containers 
> (setting the containerid when it is already set), do we care if multiple 
> different containers share the same namespace config, etc.?


> Can we all live with this?  If not, please suggest some alternate ideas; 
> simply shouting "IT'S ALL CRAP!" isn't helpful for anyone ... it may be true, 
> but it doesn't help us solve the problem ;)

Without stopping and defining what someone means by container I think it
is pretty much nonsense.

Should every vsftp connection get a container every?  Every chrome tab?

At some of the connections per second numbers I have seen we might
exhaust a 32bit number in an hour or two.  Will any of that make sense
to someone reading the audit logs?

Without considerning that container creation is an unprivileged
operation I think it is pretty much nonsense.  Do I get to say I am any
container I want?  That would seem to invalidate the concept of
userspace setting a container id.

How does any of this interact with setns?  AKA entering a container?

I will go as far as looking at patches.  If someone comes up with
a mission statement about what they are actually trying to achieve and a
mechanism that actually achieves that, and that allows for containers to
nest we can talk about doing something like that.

But for right now I just hear proposals for things that make no sense
and can not possibly work.  Not least because it will require modifying
every program that creates a container and who knows how many of them
there are.  Especially since you don't need to be root.  Modifying
/usr/bin/unshare seems a little far out to me.

Eric




--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Guy Briggs May 15, 2015, 2:03 a.m. UTC | #11
On 15/05/14, Eric W. Biederman wrote:
> Steve Grubb <sgrubb@redhat.com> writes:
> > On Tuesday, May 12, 2015 03:57:59 PM Richard Guy Briggs wrote:
> >> On 15/05/05, Steve Grubb wrote:
> >> > I think there needs to be some more discussion around this. It seems like
> >> > this is not exactly recording things that are useful for audit.
> >> 
> >> It seems to me that either audit has to assemble that information, or
> >> the kernel has to do so.  The kernel doesn't know about containers
> >> (yet?).
> >
> > Auditing is something that has a lot of requirements imposed on it by security 
> > standards. There was no requirement to have an auid until audit came along and 
> > said that uid is not good enough to know who is issuing commands because of su 
> > or sudo. There was no requirement for sessionid until we had to track each 
> > action back to a login so we could see if the login came from the expected 
> > place. 
> 
> Stop right there.
> 
> You want a global identifier in a realm where only relative identifiers
> exist, and make sense.

I am assuming he wants an identifier unique per container on one kernel
and what happens on other kernels is a matter for a management
application to take care of.  This kernel doesn't have to deal with it
other than taking information from a container management application.

> I am sorry that isn't going to happen. EVER.
> 
> Square peg, round hole.  It doesn't work, it doesn't make sense, and
> most especially it doesn't allow anyone to reconstruct anything, because
> it does not make sense and does not match what the kernel is doing.
> 
> Container IDs do not, and will not exist.  There is probably something
> reasonable in your request but until you stop talking that nonsense I
> can't see it.

I didn't see anything in any of what Steve said that suggested it was to
be unique beyond that one kernel.

> Global IDs take us into the namespace of namespaces problem and that
> isn't going to happen.  I have already bent as far in this direction as
> I can go.  Further namespace creation is not a privileged event which
> makes the requestion for a container ID make even less sense.  With
> anyone able to create whatever they want it will not be a identifier
> that makes any sense to someone reading an audit log.

Again, I assume this is up to a container management application that
will manage its pool of container hosts and an audit aggregator.

You keep raising an objection about the unworkability of a "namespace of
namespaces".  Just so we are all on the same page here, can you explain
exactly what you mean with "namespace of namespaces"?

> Eric

- RGB

--
Richard Guy Briggs <rbriggs@redhat.com>
Senior Software Engineer, Kernel Security, AMER ENG Base Operating Systems, Red Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635, Alt: +1.613.693.0684x3545
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Guy Briggs May 15, 2015, 2:11 a.m. UTC | #12
On 15/05/14, Oren Laadan wrote:
> On Thu, May 14, 2015 at 8:48 PM, Richard Guy Briggs <rgb@redhat.com> wrote:
> 
> >
> > > > > Recording each instance of a name space is giving me something that I
> > > > > cannot use to do queries required by the security target. Given these
> > > > > events, how do I locate a web server event where it accesses a
> > watched
> > > > > file? That authentication failed? That an update within the container
> > > > > failed?
> > > > >
> > > > > The requirements are that we have to log the creation, suspension,
> > > > > migration, and termination of a container. The requirements are not
> > on
> > > > > the individual name space.
> > > >
> > > > Ok.  Do we have a robust definition of a container?
> > >
> > > We call the combination of name spaces, cgroups, and seccomp rules a
> > > container.
> >
> > Can you detail what information is required from each?
> >
> > > > Where is that definition managed?
> > >
> > > In the thing that invokes a container.
> >
> > I was looking for a reference to a standards document rather than an
> > application...
> >
> >
> [focusing on "containers id" - snipped the rest away]
> 
> I am unfamiliar with the audit subsystem, but work with namespaces in other
> contexts. Perhaps the term "container" is overloaded here. The definition
> suggested by Steve in this thread makes sense to me: "a combination of
> namespaces". I imagine people may want to audit subsets of namespaces.

I assume it would be a bit more than that, including cgroup and seccomp info.

> For namespaces, can use a string like "A:B:C:D:E:F" as an identifier for a
> particular combination, where A-F are respective namespaces identifiers.
> (Can be taken for example from /proc/PID/ns/{mnt,uts,ipc,user,pid,net}).
>  That will even be grep-able to locate records related to a particular
> subset
> of namespaces. So a "container" in the classic meaning would have all A-F
> unique and different from the init process, but processes separated only by
> e.g. mnt-ns and net-ns will differ from the init process in  A and F.
> 
> (If a string is a no go, then perhaps combine the IDs in a unique way into a
> super ID).

I'd be fine with either, even including the nsfs deviceID.

> Oren.

- RGB

--
Richard Guy Briggs <rbriggs@redhat.com>
Senior Software Engineer, Kernel Security, AMER ENG Base Operating Systems, Red Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635, Alt: +1.613.693.0684x3545
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Guy Briggs May 15, 2015, 2:25 a.m. UTC | #13
On 15/05/14, Eric W. Biederman wrote:
> Paul Moore <pmoore@redhat.com> writes:
> > As Eric, and others, have stated, the container concept is a userspace idea, 
> > not a kernel idea; the kernel only knows, and cares about, namespaces.  This 
> > is unlikely to change.
> >
> > However, as Steve points out, there is precedence for the kernel to record 
> > userspace tokens for the sake of audit.  Personally I'm not a big fan of this 
> > in general, but I do recognize that it does satisfy a legitimate need.  Think 
> > of things like auid and the sessionid as necessary evils; audit is already 
> > chock full of evilness I doubt one more will doom us all to hell.
> >
> > Moving forward, I'd like to see the following:
> 
> > * Create a container ID token (unsigned 32-bit integer?), similar to 
> > auid/sessionid, that is set by userspace and carried by the kernel to be used 
> > in audit records.  I'd like to see some discussion on how we manage this, e.g. 
> > how do handle container ID inheritance, how do we handle nested containers 
> > (setting the containerid when it is already set), do we care if multiple 
> > different containers share the same namespace config, etc.?
> 
> 
> > Can we all live with this?  If not, please suggest some alternate ideas; 
> > simply shouting "IT'S ALL CRAP!" isn't helpful for anyone ... it may be true, 
> > but it doesn't help us solve the problem ;)
> 
> Without stopping and defining what someone means by container I think it
> is pretty much nonsense.

Not complete, but this is why I'm asking for a standards document...

> Should every vsftp connection get a container every?  Every chrome tab?
> 
> At some of the connections per second numbers I have seen we might
> exhaust a 32bit number in an hour or two.  Will any of that make sense
> to someone reading the audit logs?

So making it 64bits buys us some time, but sure...  I think your
definition of a container may be a bit more liberal than what we're
trying to understand...

> Without considerning that container creation is an unprivileged
> operation I think it is pretty much nonsense.  Do I get to say I am any
> container I want?  That would seem to invalidate the concept of
> userspace setting a container id.

Ok, my impression was that we're dealing with a privileged application
as I alluded with the need to create a new CAP_AUDIT_CONTAINER_ID or
something...

> How does any of this interact with setns?  AKA entering a container?

You mean entering another namespace that might all be part of one
container?  Or an an application attempting to enter the namespace of
another container?

> I will go as far as looking at patches.  If someone comes up with
> a mission statement about what they are actually trying to achieve and a
> mechanism that actually achieves that, and that allows for containers to
> nest we can talk about doing something like that.

I don't pretend these patches are anywhere near finished or ready for
upstream.

> But for right now I just hear proposals for things that make no sense
> and can not possibly work.  Not least because it will require modifying
> every program that creates a container and who knows how many of them
> there are.  Especially since you don't need to be root.  Modifying
> /usr/bin/unshare seems a little far out to me.

My understanding is that just spawning or changing namespace doesn't
imply spawning or changing containers.  I also don't necessarily assume
that creating a container is an atomic operation, though that concept
might make some sense to understand or predict the boundaries of
actions...

> Eric

- RGB

--
Richard Guy Briggs <rbriggs@redhat.com>
Senior Software Engineer, Kernel Security, AMER ENG Base Operating Systems, Red Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635, Alt: +1.613.693.0684x3545
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Richard Guy Briggs May 15, 2015, 2:32 a.m. UTC | #14
On 15/05/14, Paul Moore wrote:
> On Thursday, May 14, 2015 10:57:14 AM Steve Grubb wrote:
> > On Tuesday, May 12, 2015 03:57:59 PM Richard Guy Briggs wrote:
> > > On 15/05/05, Steve Grubb wrote:
> > > > I think there needs to be some more discussion around this. It seems
> > > > like this is not exactly recording things that are useful for audit.
> > > 
> > > It seems to me that either audit has to assemble that information, or
> > > the kernel has to do so.  The kernel doesn't know about containers
> > > (yet?).
> > 
> > Auditing is something that has a lot of requirements imposed on it by
> > security standards. There was no requirement to have an auid until audit
> > came along and said that uid is not good enough to know who is issuing
> > commands because of su or sudo. There was no requirement for sessionid
> > until we had to track each action back to a login so we could see if the
> > login came from the expected place.
> > 
> > What I am saying is we have the same situation. Audit needs to track a
> > container and we need an ID. The information that is being logged is not
> > useful for auditing. Maybe someone wants that info in syslog, but I doubt
> > it. The audit trail's purpose is to allow a security officer to reconstruct
> > the events to determine what happened during some security incident.
> 
> As Eric, and others, have stated, the container concept is a userspace idea, 
> not a kernel idea; the kernel only knows, and cares about, namespaces.  This 
> is unlikely to change.
> 
> However, as Steve points out, there is precedence for the kernel to record 
> userspace tokens for the sake of audit.  Personally I'm not a big fan of this 
> in general, but I do recognize that it does satisfy a legitimate need.  Think 
> of things like auid and the sessionid as necessary evils; audit is already 
> chock full of evilness I doubt one more will doom us all to hell.
> 
> Moving forward, I'd like to see the following:
> 
> * Record the creation/removal/mgmt of the individual namespaces as Richard's 
> patchset currently does.  However, I'd suggest using an explicit namespace 
> value for the init namespace instead of the "unset" value in the V6 patchset 
> (my apologies if you've already changed this Richard, I haven't looked at V7 
> yet).

The "unset" (none) value is only there before the first namespaces have
been created.  After that, any new ones are created relative to the init
namespace of that type.

> * Create a container ID token (unsigned 32-bit integer?), similar to 
> auid/sessionid, that is set by userspace and carried by the kernel to be used 
> in audit records.  I'd like to see some discussion on how we manage this, e.g. 
> how do handle container ID inheritance, how do we handle nested containers 
> (setting the containerid when it is already set), do we care if multiple 
> different containers share the same namespace config, etc.?

(Addressed in another reply.)  Nested will need some careful thought...

> * When userspace sets the container ID, emit a new audit record with the 
> associated namespace tokens and the container ID.

That was the goal of AUDIT_VIRT_CONTROL or AUDIT_NS_INFO messages from
userspace into the kernel.

> * Look at our existing audit records to determine which records should have 
> namespace and container ID tokens added.  We may only want to add the 
> additional fields in the case where the namespace/container ID tokens are not 
> the init namespace.

If we have a record that ties a set of namespace IDs with a container
ID, then I expect we only need to list the containerID along with auid
and sessionID.

> Can we all live with this?  If not, please suggest some alternate ideas; 
> simply shouting "IT'S ALL CRAP!" isn't helpful for anyone ... it may be true, 
> but it doesn't help us solve the problem ;)

Thanks Paul.

> paul moore

- RGB

--
Richard Guy Briggs <rbriggs@redhat.com>
Senior Software Engineer, Kernel Security, AMER ENG Base Operating Systems, Red Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635, Alt: +1.613.693.0684x3545
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andy Lutomirski May 15, 2015, 6:23 a.m. UTC | #15
On Thu, May 14, 2015 at 7:32 PM, Richard Guy Briggs <rgb@redhat.com> wrote:
> On 15/05/14, Paul Moore wrote:
>> * Look at our existing audit records to determine which records should have
>> namespace and container ID tokens added.  We may only want to add the
>> additional fields in the case where the namespace/container ID tokens are not
>> the init namespace.
>
> If we have a record that ties a set of namespace IDs with a container
> ID, then I expect we only need to list the containerID along with auid
> and sessionID.

The problem here is that the kernel has no concept of a "container", and I
don't think it makes any sense to add one just for audit.  "Container" is a
marketing term used by some userspace tools.

I can imagine that both audit could benefit from a concept of a
namespace *path* that understands nesting (e.g. root/2/5/1 or
something along those lines).  Mapping these to "containers" belongs
in userspace, I think.

--Andy
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Grubb May 15, 2015, 12:38 p.m. UTC | #16
On Thursday, May 14, 2015 11:23:09 PM Andy Lutomirski wrote:
> On Thu, May 14, 2015 at 7:32 PM, Richard Guy Briggs <rgb@redhat.com> wrote:
> > On 15/05/14, Paul Moore wrote:
> >> * Look at our existing audit records to determine which records should
> >> have
> >> namespace and container ID tokens added.  We may only want to add the
> >> additional fields in the case where the namespace/container ID tokens are
> >> not the init namespace.
> > 
> > If we have a record that ties a set of namespace IDs with a container
> > ID, then I expect we only need to list the containerID along with auid
> > and sessionID.
> 
> The problem here is that the kernel has no concept of a "container", and I
> don't think it makes any sense to add one just for audit.  "Container" is a
> marketing term used by some userspace tools.

No, its a real thing just like a login. Does the kernel have any concept of a 
login? Yet it happens. And it causes us to generate events describing who, 
where from, role, success, and time of day. :-)


> I can imagine that both audit could benefit from a concept of a
> namespace *path* that understands nesting (e.g. root/2/5/1 or
> something along those lines).  Mapping these to "containers" belongs
> in userspace, I think.

I don't doubt that just as user space sequences the actions that are a login. 
I just need the kernel to do some book keeping and associate the necessary 
attributes in the event record to be able to reconstruct what is actually 
happening.

-Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Steve Grubb May 15, 2015, 1:17 p.m. UTC | #17
On Thursday, May 14, 2015 08:31:45 PM Eric W. Biederman wrote:
> Paul Moore <pmoore@redhat.com> writes:
> > As Eric, and others, have stated, the container concept is a userspace
> > idea, not a kernel idea; the kernel only knows, and cares about,
> > namespaces.  This is unlikely to change.
> > 
> > However, as Steve points out, there is precedence for the kernel to record
> > userspace tokens for the sake of audit.  Personally I'm not a big fan of
> > this in general, but I do recognize that it does satisfy a legitimate
> > need.  Think of things like auid and the sessionid as necessary evils;
> > audit is already chock full of evilness I doubt one more will doom us all
> > to hell.
> > 
> > Moving forward, I'd like to see the following:
> > 
> > * Create a container ID token (unsigned 32-bit integer?), similar to
> > auid/sessionid, that is set by userspace and carried by the kernel to be
> > used in audit records.  I'd like to see some discussion on how we manage
> > this, e.g. how do handle container ID inheritance, how do we handle
> > nested containers (setting the containerid when it is already set), do we
> > care if multiple different containers share the same namespace config,
> > etc.?
> > 
> > Can we all live with this?  If not, please suggest some alternate ideas;
> > simply shouting "IT'S ALL CRAP!" isn't helpful for anyone ... it may be
> > true, but it doesn't help us solve the problem ;)
> 
> Without stopping and defining what someone means by container I think it
> is pretty much nonsense.

Maybe this is what's hanging everyone up? Its easy to get lost when your view 
is down at the syscall level and what is happening in the kernel. Starting a 
container is akin to the idea of login. Not every call to setresuid is a 
login. It could be a setuid program starting or a daemon dropping privileges. 
The idea of a container is a higher level concept that starting a name space. 
I think comparing a login with a container is a useful analogy because both 
are higher level concepts but employ low level ideas. A login is a collection 
of chdir, setuid, setgid, allocating a tty, associating the first 3 file 
descriptors, setting a process group, and starting a specific executable. All 
these low level concepts each by itself is not special.

A container is what we need auditing events around not creation of namespaces. 
If we want creation of namespaces, we can audit the clone/unshare/setns 
syscalls. The container is when a managing program such as docker, lxc, or 
sometimes systemd creates a special operating environment for the express 
purpose of running programs disassociated in some way from the parent 
namespaces, cgroups, and security assumptions. Its this orchestration, just as 
sshd orchestrates a login, that makes it different.


> Should every vsftp connection get a container every?  Every chrome tab?

No. Also, note that not every program that grants a user session constitutes a 
login.


> At some of the connections per second numbers I have seen we might
> exhaust a 32bit number in an hour or two.  Will any of that make sense
> to someone reading the audit logs?

I would agree if we were auditing creation of name spaces. But going back to 
the concept of login, these could occur at a high rate. This is a bruteforce 
login attack. We put countermeasures in place to prevent it. But it is 
possible for the session id to wrap. But in our case, things like lxc or 
docker don't start hundreds of these a minute.


> Without considerning that container creation is an unprivileged
> operation I think it is pretty much nonsense.  Do I get to say I am any
> container I want?  That would seem to invalidate the concept of
> userspace setting a container id.

It would need to be a privileged operation just as setuid is.

 
> How does any of this interact with setns?  AKA entering a container?

We have to audit this. For the moment, auditing the setns syscall may be 
enough. I'd have to look at the lifecycle of the application that's doing this 
to determine if we need more.

 
> I will go as far as looking at patches.  If someone comes up with
> a mission statement about what they are actually trying to achieve and a
> mechanism that actually achieves that, and that allows for containers to
> nest we can talk about doing something like that.

Auditing wouldn't impose any restrictions on this. We just need a way to 
observe actions within and associate them as needed to investigate violations 
of security policy.
 
> But for right now I just hear proposals for things that make no sense
> and can not possibly work.  Not least because it will require modifying
> every program that creates a container and who knows how many of them
> there are.

We only care about a couple programs doing the orchestration. They will need 
to have the right support added to them. I'm hoping the analogy of a login 
helps demonstrate what we are after.

-Steve
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Andy Lutomirski May 15, 2015, 1:17 p.m. UTC | #18
On May 15, 2015 9:38 PM, "Steve Grubb" <sgrubb@redhat.com> wrote:
>
> On Thursday, May 14, 2015 11:23:09 PM Andy Lutomirski wrote:
> > On Thu, May 14, 2015 at 7:32 PM, Richard Guy Briggs <rgb@redhat.com> wrote:
> > > On 15/05/14, Paul Moore wrote:
> > >> * Look at our existing audit records to determine which records should
> > >> have
> > >> namespace and container ID tokens added.  We may only want to add the
> > >> additional fields in the case where the namespace/container ID tokens are
> > >> not the init namespace.
> > >
> > > If we have a record that ties a set of namespace IDs with a container
> > > ID, then I expect we only need to list the containerID along with auid
> > > and sessionID.
> >
> > The problem here is that the kernel has no concept of a "container", and I
> > don't think it makes any sense to add one just for audit.  "Container" is a
> > marketing term used by some userspace tools.
>
> No, its a real thing just like a login. Does the kernel have any concept of a
> login? Yet it happens. And it causes us to generate events describing who,
> where from, role, success, and time of day. :-)
>

I really hope those records come from userspace, not the kernel.  I
also wonder what happens when a user logs in and types "sudo agetty
/dev/ttyS0 115200".  If a user does that and then someone logs in on
/dev/ttyS0, which login are they?

>
> > I can imagine that both audit could benefit from a concept of a
> > namespace *path* that understands nesting (e.g. root/2/5/1 or
> > something along those lines).  Mapping these to "containers" belongs
> > in userspace, I think.
>
> I don't doubt that just as user space sequences the actions that are a login.
> I just need the kernel to do some book keeping and associate the necessary
> attributes in the event record to be able to reconstruct what is actually
> happening.

A precondition for that is having those records have some
correspondence to what is actually happening.  Since the kernel has no
concept of a container, and since the same kernel mechanisms could be
used for things that are probably not whatever the Common Criteria
rules think a container is, this could be quite difficult to define in
a meaningful manner.

Hence my suggestion to add only minimal support in the kernel and to
do this in userspace.

--Andy
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Daniel Walsh May 15, 2015, 1:19 p.m. UTC | #19
On 05/14/2015 10:11 PM, Richard Guy Briggs wrote:
> On 15/05/14, Oren Laadan wrote:
>> On Thu, May 14, 2015 at 8:48 PM, Richard Guy Briggs <rgb@redhat.com> wrote:
>>
>>>>>> Recording each instance of a name space is giving me something that I
>>>>>> cannot use to do queries required by the security target. Given these
>>>>>> events, how do I locate a web server event where it accesses a
>>> watched
>>>>>> file? That authentication failed? That an update within the container
>>>>>> failed?
>>>>>>
>>>>>> The requirements are that we have to log the creation, suspension,
>>>>>> migration, and termination of a container. The requirements are not
>>> on
>>>>>> the individual name space.
>>>>> Ok.  Do we have a robust definition of a container?
>>>> We call the combination of name spaces, cgroups, and seccomp rules a
>>>> container.
>>> Can you detail what information is required from each?
>>>
>>>>> Where is that definition managed?
>>>> In the thing that invokes a container.
>>> I was looking for a reference to a standards document rather than an
>>> application...
>>>
>>>
>> [focusing on "containers id" - snipped the rest away]
>>
>> I am unfamiliar with the audit subsystem, but work with namespaces in other
>> contexts. Perhaps the term "container" is overloaded here. The definition
>> suggested by Steve in this thread makes sense to me: "a combination of
>> namespaces". I imagine people may want to audit subsets of namespaces.
> I assume it would be a bit more than that, including cgroup and seccomp info.
I don't see why seccomp versus other Security mechanism come into this. 
Not really
sure of cgroup.  That stuff would all be associated with the process.  I
would guess
you could look at the process that modified these for logging, but that
should happen
at the time they get changed,  Not recorded for every process.
>> For namespaces, can use a string like "A:B:C:D:E:F" as an identifier for a
>> particular combination, where A-F are respective namespaces identifiers.
>> (Can be taken for example from /proc/PID/ns/{mnt,uts,ipc,user,pid,net}).
>>  That will even be grep-able to locate records related to a particular
>> subset
>> of namespaces. So a "container" in the classic meaning would have all A-F
>> unique and different from the init process, but processes separated only by
>> e.g. mnt-ns and net-ns will differ from the init process in  A and F.
>>
>> (If a string is a no go, then perhaps combine the IDs in a unique way into a
>> super ID).
> I'd be fine with either, even including the nsfs deviceID.
>
>> Oren.
> - RGB
>
> --
> Richard Guy Briggs <rbriggs@redhat.com>
> Senior Software Engineer, Kernel Security, AMER ENG Base Operating Systems, Red Hat
> Remote, Ottawa, Canada
> Voice: +1.647.777.2635, Internal: (81) 32635, Alt: +1.613.693.0684x3545
>
> --
> Linux-audit mailing list
> Linux-audit@redhat.com
> https://www.redhat.com/mailman/listinfo/linux-audit

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman May 15, 2015, 2:51 p.m. UTC | #20
Steve Grubb <sgrubb@redhat.com> writes:

> On Thursday, May 14, 2015 08:31:45 PM Eric W. Biederman wrote:
>> Paul Moore <pmoore@redhat.com> writes:
>> > As Eric, and others, have stated, the container concept is a userspace
>> > idea, not a kernel idea; the kernel only knows, and cares about,
>> > namespaces.  This is unlikely to change.
>> > 
>> > However, as Steve points out, there is precedence for the kernel to record
>> > userspace tokens for the sake of audit.  Personally I'm not a big fan of
>> > this in general, but I do recognize that it does satisfy a legitimate
>> > need.  Think of things like auid and the sessionid as necessary evils;
>> > audit is already chock full of evilness I doubt one more will doom us all
>> > to hell.
>> > 
>> > Moving forward, I'd like to see the following:
>> > 
>> > * Create a container ID token (unsigned 32-bit integer?), similar to
>> > auid/sessionid, that is set by userspace and carried by the kernel to be
>> > used in audit records.  I'd like to see some discussion on how we manage
>> > this, e.g. how do handle container ID inheritance, how do we handle
>> > nested containers (setting the containerid when it is already set), do we
>> > care if multiple different containers share the same namespace config,
>> > etc.?
>> > 
>> > Can we all live with this?  If not, please suggest some alternate ideas;
>> > simply shouting "IT'S ALL CRAP!" isn't helpful for anyone ... it may be
>> > true, but it doesn't help us solve the problem ;)
>> 
>> Without stopping and defining what someone means by container I think it
>> is pretty much nonsense.
>
> Maybe this is what's hanging everyone up? Its easy to get lost when your view 
> is down at the syscall level and what is happening in the kernel. Starting a 
> container is akin to the idea of login. Not every call to setresuid is a 
> login. It could be a setuid program starting or a daemon dropping privileges. 
> The idea of a container is a higher level concept that starting a name space. 
> I think comparing a login with a container is a useful analogy because both 
> are higher level concepts but employ low level ideas. A login is a collection 
> of chdir, setuid, setgid, allocating a tty, associating the first 3 file 
> descriptors, setting a process group, and starting a specific executable. All 
> these low level concepts each by itself is not special.

Except login and setresuid are privileged operation.

CREATING A CONTAINER IS NOT A PRIVILGED OPERATION.
Your analagy fails rather badly with respect to that fact.

> A container is what we need auditing events around not creation of namespaces. 
> If we want creation of namespaces, we can audit the clone/unshare/setns 
> syscalls. The container is when a managing program such as docker, lxc, or 
> sometimes systemd creates a special operating environment for the express 
> purpose of running programs disassociated in some way from the parent 
> namespaces, cgroups, and security assumptions. Its this orchestration, just as 
> sshd orchestrates a login, that makes it different.

What do you define as a container?  From what I can tell we share
a similiar understanding of the term, and running lxc is not a
privileged operation.  Running sandstorm.io is not a privileged
operation.

>> Should every vsftp connection get a container every?  Every chrome tab?
>
> No. Also, note that not every program that grants a user session constitutes a 
> login.

>> At some of the connections per second numbers I have seen we might
>> exhaust a 32bit number in an hour or two.  Will any of that make sense
>> to someone reading the audit logs?
>
> I would agree if we were auditing creation of name spaces. But going back to 
> the concept of login, these could occur at a high rate. This is a bruteforce 
> login attack. We put countermeasures in place to prevent it. But it is 
> possible for the session id to wrap. But in our case, things like lxc or 
> docker don't start hundreds of these a minute.

Except there are reasonable situtations where container creation does
happen at fast rates.  Outside of a container per network connection
(which is likely to happen at some point) I have seen builds fire up
more containers than I can count as part of automated testing.

>> Without considerning that container creation is an unprivileged
>> operation I think it is pretty much nonsense.  Do I get to say I am any
>> container I want?  That would seem to invalidate the concept of
>> userspace setting a container id.
>
> It would need to be a privileged operation just as setuid is.

CONTAINER CREATION IS NOT A PRIVILEGED OPERATION.

That is today.  That is talking about lxc.

CONTAINER CREATION IS NOT A PRIVILEGED OPERATION.

And ultimately we don't want it to be, as if you can safely create a
container without privilege your system is safer.

>> How does any of this interact with setns?  AKA entering a container?
>
> We have to audit this. For the moment, auditing the setns syscall may be 
> enough. I'd have to look at the lifecycle of the application that's doing this 
> to determine if we need more.

Frequently it will be sysadmins for some arbitrary reason calling
nsenter or a similar program that is more aware of their favorite
container flavor.

>> I will go as far as looking at patches.  If someone comes up with
>> a mission statement about what they are actually trying to achieve and a
>> mechanism that actually achieves that, and that allows for containers to
>> nest we can talk about doing something like that.
>
> Auditing wouldn't impose any restrictions on this. We just need a way to 
> observe actions within and associate them as needed to investigate violations 
> of security policy.

*Rolls eyes*  But the rest of the container tool kit in the kernel will
impose limitations on those identifiers.

>> But for right now I just hear proposals for things that make no sense
>> and can not possibly work.  Not least because it will require modifying
>> every program that creates a container and who knows how many of them
>> there are.
>
> We only care about a couple programs doing the orchestration. They will need 
> to have the right support added to them. I'm hoping the analogy of a login 
> helps demonstrate what we are after.

All I see is that (a) you have not defined what you see a container as
(b) you have failed to acknowledge I can create a container without
privilege (which breaks your analogy with login).

But I think I am with Andy.  If you only care about privileged events
and privileged containers, it is unlikely you need to do anything in the
kernel and you can perform whatever logging you see fit in your
privileged userspace applications.

Of course in the log run I don't see what good that will do you as I
expect increasingly there will not need to be any special permissions to
create containers.

Eric

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Moore May 15, 2015, 8:26 p.m. UTC | #21
On Thursday, May 14, 2015 08:48:55 PM Richard Guy Briggs wrote:
> On 15/05/14, Steve Grubb wrote:
> > What they would want to know is what resources were assigned; if two
> > containers shared a resource, what resource and container was it shared
> > with; if two containers can communicate, we need to see or control
> > information flow when necessary; and we need to see termination and
> > release of resources.
>
> So, namespaces are a big part of this.  I understand how they are
> spawned and potentially shared.  I have a more vague idea about how
> cgroups contribute to this concept of a container.  So far, I have very
> little idea how seccomp contributes, but I assume that it will also need
> to be part of this tracking.

It doesn't, really.  We shouldn't worry about seccomp from a 
namespace/container auditing perspective.  The normal seccomp auditing should 
be sufficient for namespaces/containers.
Paul Moore May 15, 2015, 8:42 p.m. UTC | #22
On Thursday, May 14, 2015 09:10:56 PM Oren Laadan wrote:
> [focusing on "containers id" - snipped the rest away]
> 
> I am unfamiliar with the audit subsystem, but work with namespaces in other
> contexts. Perhaps the term "container" is overloaded here. The definition
> suggested by Steve in this thread makes sense to me: "a combination of
> namespaces". I imagine people may want to audit subsets of namespaces.
> 
> For namespaces, can use a string like "A:B:C:D:E:F" as an identifier for a
> particular combination, where A-F are respective namespaces identifiers.
> (Can be taken for example from /proc/PID/ns/{mnt,uts,ipc,user,pid,net}).
>  That will even be grep-able to locate records related to a particular
> subset
> of namespaces. So a "container" in the classic meaning would have all A-F
> unique and different from the init process, but processes separated only by
> e.g. mnt-ns and net-ns will differ from the init process in  A and F.
> 
> (If a string is a no go, then perhaps combine the IDs in a unique way into a
> super ID).

As has been mentioned in every other email in this thread, the kernel has no 
concept of a container, it is a userspace idea and trying to generate a 
meaningful value in the kernel is a mistake in my opinion.  My current opinion 
is that we allow userspace to set a container ID token as it sees fit and the 
kernel will just use the value provided by userspace.
Paul Moore May 15, 2015, 9:01 p.m. UTC | #23
On Thursday, May 14, 2015 08:31:45 PM Eric W. Biederman wrote:
> Paul Moore <pmoore@redhat.com> writes:
> > As Eric, and others, have stated, the container concept is a userspace
> > idea, not a kernel idea; the kernel only knows, and cares about,
> > namespaces.  This is unlikely to change.
> > 
> > However, as Steve points out, there is precedence for the kernel to record
> > userspace tokens for the sake of audit.  Personally I'm not a big fan of
> > this in general, but I do recognize that it does satisfy a legitimate
> > need.  Think of things like auid and the sessionid as necessary evils;
> > audit is already chock full of evilness I doubt one more will doom us all
> > to hell.
> > 
> > Moving forward, I'd like to see the following:
> > 
> > * Create a container ID token (unsigned 32-bit integer?), similar to
> > auid/sessionid, that is set by userspace and carried by the kernel to be
> > used in audit records.  I'd like to see some discussion on how we manage
> > this, e.g. how do handle container ID inheritance, how do we handle
> > nested containers (setting the containerid when it is already set), do we
> > care if multiple different containers share the same namespace config,
> > etc.?
> > 
> > 
> > Can we all live with this?  If not, please suggest some alternate ideas;
> > simply shouting "IT'S ALL CRAP!" isn't helpful for anyone ... it may be
> > true, but it doesn't help us solve the problem ;)
> 
> Without stopping and defining what someone means by container I think it
> is pretty much nonsense.

For what it is worth, I doubt we will ever arrive at a consistent definition 
of a container.  This is one of the reasons why I don't think we want the 
kernel generating a container ID token, although I understand the real world 
desire to have the kernel report such information back in the audit logs.

> Should every vsftp connection get a container every?  Every chrome tab?

That's up to the individual system.  I would argue that's a pretty silly 
configuration, but one persons silliness is another's best practice.  It's a 
mad, mad world.

> At some of the connections per second numbers I have seen we might
> exhaust a 32bit number in an hour or two.  Will any of that make sense
> to someone reading the audit logs?

If someone if going to spawn each process in a container then they will need 
to live with the fallout of that decision.

Also, if folks thing 32-bits is too small, we can always do 64-bits, but I 
don't think that was the point you were trying to make (I could be wrong).

> Without considerning that container creation is an unprivileged
> operation I think it is pretty much nonsense.  Do I get to say I am any
> container I want?  That would seem to invalidate the concept of
> userspace setting a container id.
>
> How does any of this interact with setns?  AKA entering a container?

As I said in my email, I think we need some discussion around this; I don't 
pretend to think we have this sorted at this point.  I just want to make sure 
were working towards some common ground instead of shouting the same stuff 
back and forth at each other.

> I will go as far as looking at patches.  If someone comes up with
> a mission statement about what they are actually trying to achieve and a
> mechanism that actually achieves that, and that allows for containers to
> nest we can talk about doing something like that.

I think Steve has posted some requirements that Richard is trying to satisfy 
with these patches; we've also heard from at least one person who is looking 
at how to deploy this in the Real World.  Perhaps in the next round of patches 
Richard can list the requirements in the 0/X patch and describe how they are 
satisfied in the patchset.

Beyond that, and ignoring for a moment the whole "a container is not a 
*thing*" argument, can I assume that the auditing of nested "containers" are 
your main remaining concern at this point?

> But for right now I just hear proposals for things that make no sense
> and can not possibly work.  Not least because it will require modifying
> every program that creates a container and who knows how many of them
> there are.  Especially since you don't need to be root.  Modifying
> /usr/bin/unshare seems a little far out to me.

I think it is very reasonable that there will be some container infrastructure 
tools which would handle this, we're already seeing this happening now; asking 
for minor changes to these infrastructure applications to support container 
auditing doesn't seem like a significant ask to me.  Also, to be perfectly 
clear, if the applications aren't updated it isn't as if they will fail to 
work, it is just that they won't be able to take advantage of the new 
container auditing capabilities.  That seems reasonable to me.
Paul Moore May 15, 2015, 9:05 p.m. UTC | #24
On Thursday, May 14, 2015 11:23:09 PM Andy Lutomirski wrote:
> On Thu, May 14, 2015 at 7:32 PM, Richard Guy Briggs <rgb@redhat.com> wrote:
> > On 15/05/14, Paul Moore wrote:
> >> * Look at our existing audit records to determine which records should
> >> have
> >> namespace and container ID tokens added.  We may only want to add the
> >> additional fields in the case where the namespace/container ID tokens are
> >> not the init namespace.
> > 
> > If we have a record that ties a set of namespace IDs with a container
> > ID, then I expect we only need to list the containerID along with auid
> > and sessionID.
> 
> The problem here is that the kernel has no concept of a "container", and I
> don't think it makes any sense to add one just for audit.  "Container" is a
> marketing term used by some userspace tools.
> 
> I can imagine that both audit could benefit from a concept of a
> namespace *path* that understands nesting (e.g. root/2/5/1 or
> something along those lines).  Mapping these to "containers" belongs
> in userspace, I think.

It might be helpful to climb up a few levels in this thread ...

I think we all agree that containers are not a kernel construct.  I further 
believe that the kernel has no business generating container IDs, those should 
come from userspace and will likely be different depending on how you define 
"container".  However, what is less clear to me at this point is how the 
kernel should handle the setting, reporting, and general management of this 
container ID token.
Daniel Walsh May 16, 2015, 9:46 a.m. UTC | #25
On 05/15/2015 05:05 PM, Paul Moore wrote:
> On Thursday, May 14, 2015 11:23:09 PM Andy Lutomirski wrote:
>> On Thu, May 14, 2015 at 7:32 PM, Richard Guy Briggs <rgb@redhat.com> wrote:
>>> On 15/05/14, Paul Moore wrote:
>>>> * Look at our existing audit records to determine which records should
>>>> have
>>>> namespace and container ID tokens added.  We may only want to add the
>>>> additional fields in the case where the namespace/container ID tokens are
>>>> not the init namespace.
>>> If we have a record that ties a set of namespace IDs with a container
>>> ID, then I expect we only need to list the containerID along with auid
>>> and sessionID.
>> The problem here is that the kernel has no concept of a "container", and I
>> don't think it makes any sense to add one just for audit.  "Container" is a
>> marketing term used by some userspace tools.
>>
>> I can imagine that both audit could benefit from a concept of a
>> namespace *path* that understands nesting (e.g. root/2/5/1 or
>> something along those lines).  Mapping these to "containers" belongs
>> in userspace, I think.
> It might be helpful to climb up a few levels in this thread ...
>
> I think we all agree that containers are not a kernel construct.  I further 
> believe that the kernel has no business generating container IDs, those should 
> come from userspace and will likely be different depending on how you define 
> "container".  However, what is less clear to me at this point is how the 
> kernel should handle the setting, reporting, and general management of this 
> container ID token.
>
Wouldn't the easiest thing be to just treat add a containerid to the
process context like auid.  Then
make it a privileged operation to set it.  Then tools that care about
auditing like docker can set the ID
and remove the Capability from it sub processes if it cares.  All
processes adopt parent processes containerid.
Now containers can be audited and as long as userspace is written
correctly nested containers can either override the containerid or not
depending on what the audit rules are.

No special handling inside of namespaces.
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Moore May 16, 2015, 12:16 p.m. UTC | #26
On Sat, May 16, 2015 at 5:46 AM, Daniel J Walsh <dwalsh@redhat.com> wrote:
> On 05/15/2015 05:05 PM, Paul Moore wrote:
>> On Thursday, May 14, 2015 11:23:09 PM Andy Lutomirski wrote:
>>> On Thu, May 14, 2015 at 7:32 PM, Richard Guy Briggs <rgb@redhat.com> wrote:
>>>> On 15/05/14, Paul Moore wrote:
>>>>> * Look at our existing audit records to determine which records should
>>>>> have
>>>>> namespace and container ID tokens added.  We may only want to add the
>>>>> additional fields in the case where the namespace/container ID tokens are
>>>>> not the init namespace.
>>>> If we have a record that ties a set of namespace IDs with a container
>>>> ID, then I expect we only need to list the containerID along with auid
>>>> and sessionID.
>>> The problem here is that the kernel has no concept of a "container", and I
>>> don't think it makes any sense to add one just for audit.  "Container" is a
>>> marketing term used by some userspace tools.
>>>
>>> I can imagine that both audit could benefit from a concept of a
>>> namespace *path* that understands nesting (e.g. root/2/5/1 or
>>> something along those lines).  Mapping these to "containers" belongs
>>> in userspace, I think.
>> It might be helpful to climb up a few levels in this thread ...
>>
>> I think we all agree that containers are not a kernel construct.  I further
>> believe that the kernel has no business generating container IDs, those should
>> come from userspace and will likely be different depending on how you define
>> "container".  However, what is less clear to me at this point is how the
>> kernel should handle the setting, reporting, and general management of this
>> container ID token.
>>
> Wouldn't the easiest thing be to just treat add a containerid to the
> process context like auid.

I believe so.  At least that was the point I was trying to get across
when I first jumped into this thread.

> Then make it a privileged operation to set it.  Then tools that care about
> auditing like docker can set the ID
> and remove the Capability from it sub processes if it cares.  All
> processes adopt parent processes containerid.
> Now containers can be audited and as long as userspace is written
> correctly nested containers can either override the containerid or not
> depending on what the audit rules are.

This part I'm still less certain on.  I agree that setting the
container ID should be privileged in some sense, but the kernel
shouldn't *require* privilege to create a new container (however the
user chooses to define it).  Simply requiring privilege to set the
container ID and failing silently may be sufficient.
Eric W. Biederman May 16, 2015, 2:46 p.m. UTC | #27
Paul Moore <paul@paul-moore.com> writes:

> On Sat, May 16, 2015 at 5:46 AM, Daniel J Walsh <dwalsh@redhat.com> wrote:
>> On 05/15/2015 05:05 PM, Paul Moore wrote:
>>> On Thursday, May 14, 2015 11:23:09 PM Andy Lutomirski wrote:
>>>> On Thu, May 14, 2015 at 7:32 PM, Richard Guy Briggs <rgb@redhat.com> wrote:
>>>>> On 15/05/14, Paul Moore wrote:
>>>>>> * Look at our existing audit records to determine which records should
>>>>>> have
>>>>>> namespace and container ID tokens added.  We may only want to add the
>>>>>> additional fields in the case where the namespace/container ID tokens are
>>>>>> not the init namespace.
>>>>> If we have a record that ties a set of namespace IDs with a container
>>>>> ID, then I expect we only need to list the containerID along with auid
>>>>> and sessionID.
>>>> The problem here is that the kernel has no concept of a "container", and I
>>>> don't think it makes any sense to add one just for audit.  "Container" is a
>>>> marketing term used by some userspace tools.
>>>>
>>>> I can imagine that both audit could benefit from a concept of a
>>>> namespace *path* that understands nesting (e.g. root/2/5/1 or
>>>> something along those lines).  Mapping these to "containers" belongs
>>>> in userspace, I think.
>>> It might be helpful to climb up a few levels in this thread ...
>>>
>>> I think we all agree that containers are not a kernel construct.  I further
>>> believe that the kernel has no business generating container IDs, those should
>>> come from userspace and will likely be different depending on how you define
>>> "container".  However, what is less clear to me at this point is how the
>>> kernel should handle the setting, reporting, and general management of this
>>> container ID token.
>>>
>> Wouldn't the easiest thing be to just treat add a containerid to the
>> process context like auid.
>
> I believe so.  At least that was the point I was trying to get across
> when I first jumped into this thread.

It sounds nice but containers are not just a per process construct.
Sometimes you might know anamespace but not which process instigated
action to happen on that namespace.

>> Then make it a privileged operation to set it.  Then tools that care about
>> auditing like docker can set the ID
>> and remove the Capability from it sub processes if it cares.  All
>> processes adopt parent processes containerid.
>> Now containers can be audited and as long as userspace is written
>> correctly nested containers can either override the containerid or not
>> depending on what the audit rules are.
>
> This part I'm still less certain on.  I agree that setting the
> container ID should be privileged in some sense, but the kernel
> shouldn't *require* privilege to create a new container (however the
> user chooses to define it).  Simply requiring privilege to set the
> container ID and failing silently may be sufficient.

My hope is as things mature fewer and fewer container things will need
any special privilege to create.

I think it needs to start with a clear definition of what is wanted and
then working backwards through which messages in which contexts you want
to have your magic bits.

Eric

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Moore May 16, 2015, 10:49 p.m. UTC | #28
On Sat, May 16, 2015 at 10:46 AM, Eric W. Biederman
<ebiederm@xmission.com> wrote:
> Paul Moore <paul@paul-moore.com> writes:
>> On Sat, May 16, 2015 at 5:46 AM, Daniel J Walsh <dwalsh@redhat.com> wrote:
>>> On 05/15/2015 05:05 PM, Paul Moore wrote:
>>>> On Thursday, May 14, 2015 11:23:09 PM Andy Lutomirski wrote:
>>>>> On Thu, May 14, 2015 at 7:32 PM, Richard Guy Briggs <rgb@redhat.com> wrote:
>>>>>> On 15/05/14, Paul Moore wrote:
>>>>>>> * Look at our existing audit records to determine which records should
>>>>>>> have
>>>>>>> namespace and container ID tokens added.  We may only want to add the
>>>>>>> additional fields in the case where the namespace/container ID tokens are
>>>>>>> not the init namespace.
>>>>>> If we have a record that ties a set of namespace IDs with a container
>>>>>> ID, then I expect we only need to list the containerID along with auid
>>>>>> and sessionID.
>>>>> The problem here is that the kernel has no concept of a "container", and I
>>>>> don't think it makes any sense to add one just for audit.  "Container" is a
>>>>> marketing term used by some userspace tools.
>>>>>
>>>>> I can imagine that both audit could benefit from a concept of a
>>>>> namespace *path* that understands nesting (e.g. root/2/5/1 or
>>>>> something along those lines).  Mapping these to "containers" belongs
>>>>> in userspace, I think.
>>>> It might be helpful to climb up a few levels in this thread ...
>>>>
>>>> I think we all agree that containers are not a kernel construct.  I further
>>>> believe that the kernel has no business generating container IDs, those should
>>>> come from userspace and will likely be different depending on how you define
>>>> "container".  However, what is less clear to me at this point is how the
>>>> kernel should handle the setting, reporting, and general management of this
>>>> container ID token.
>>>>
>>> Wouldn't the easiest thing be to just treat add a containerid to the
>>> process context like auid.
>>
>> I believe so.  At least that was the point I was trying to get across
>> when I first jumped into this thread.
>
> It sounds nice but containers are not just a per process construct.
> Sometimes you might know anamespace but not which process instigated
> action to happen on that namespace.

From an auditing perspective I'm not sure we will ever hit those
cases; did you have a particular example in mind?
Richard Guy Briggs May 19, 2015, 1:09 p.m. UTC | #29
On 15/05/16, Paul Moore wrote:
> On Sat, May 16, 2015 at 10:46 AM, Eric W. Biederman
> <ebiederm@xmission.com> wrote:
> > Paul Moore <paul@paul-moore.com> writes:
> >> On Sat, May 16, 2015 at 5:46 AM, Daniel J Walsh <dwalsh@redhat.com> wrote:
> >>> On 05/15/2015 05:05 PM, Paul Moore wrote:
> >>>> On Thursday, May 14, 2015 11:23:09 PM Andy Lutomirski wrote:
> >>>>> On Thu, May 14, 2015 at 7:32 PM, Richard Guy Briggs <rgb@redhat.com> wrote:
> >>>>>> On 15/05/14, Paul Moore wrote:
> >>>>>>> * Look at our existing audit records to determine which records should
> >>>>>>> have
> >>>>>>> namespace and container ID tokens added.  We may only want to add the
> >>>>>>> additional fields in the case where the namespace/container ID tokens are
> >>>>>>> not the init namespace.
> >>>>>> If we have a record that ties a set of namespace IDs with a container
> >>>>>> ID, then I expect we only need to list the containerID along with auid
> >>>>>> and sessionID.
> >>>>> The problem here is that the kernel has no concept of a "container", and I
> >>>>> don't think it makes any sense to add one just for audit.  "Container" is a
> >>>>> marketing term used by some userspace tools.
> >>>>>
> >>>>> I can imagine that both audit could benefit from a concept of a
> >>>>> namespace *path* that understands nesting (e.g. root/2/5/1 or
> >>>>> something along those lines).  Mapping these to "containers" belongs
> >>>>> in userspace, I think.
> >>>> It might be helpful to climb up a few levels in this thread ...
> >>>>
> >>>> I think we all agree that containers are not a kernel construct.  I further
> >>>> believe that the kernel has no business generating container IDs, those should
> >>>> come from userspace and will likely be different depending on how you define
> >>>> "container".  However, what is less clear to me at this point is how the
> >>>> kernel should handle the setting, reporting, and general management of this
> >>>> container ID token.
> >>>>
> >>> Wouldn't the easiest thing be to just treat add a containerid to the
> >>> process context like auid.
> >>
> >> I believe so.  At least that was the point I was trying to get across
> >> when I first jumped into this thread.
> >
> > It sounds nice but containers are not just a per process construct.
> > Sometimes you might know anamespace but not which process instigated
> > action to happen on that namespace.
> 
> >From an auditing perspective I'm not sure we will ever hit those
> cases; did you have a particular example in mind?

The example that immediately came to mind when I first read Eric's
comment was a packet coming in off a network in a particular network
namespace.  That could narrow it down to a subset of containers based on
which network namespace it inhabits, but since it isn't associated with
a particular task yet (other than a kernel thread) it will not be
possible to select the precise nsproxy, let alone the container.

> paul moore

- RGB

--
Richard Guy Briggs <rbriggs@redhat.com>
Senior Software Engineer, Kernel Security, AMER ENG Base Operating Systems, Red Hat
Remote, Ottawa, Canada
Voice: +1.647.777.2635, Internal: (81) 32635, Alt: +1.613.693.0684x3545
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Moore May 19, 2015, 2:27 p.m. UTC | #30
On Tue, May 19, 2015 at 9:09 AM, Richard Guy Briggs <rgb@redhat.com> wrote:
> On 15/05/16, Paul Moore wrote:
>> On Sat, May 16, 2015 at 10:46 AM, Eric W. Biederman wrote:
>> > It sounds nice but containers are not just a per process construct.
>> > Sometimes you might know anamespace but not which process instigated
>> > action to happen on that namespace.
>>
>> From an auditing perspective I'm not sure we will ever hit those
>> cases; did you have a particular example in mind?
>
> The example that immediately came to mind when I first read Eric's
> comment was a packet coming in off a network in a particular network
> namespace.  That could narrow it down to a subset of containers based on
> which network namespace it inhabits, but since it isn't associated with
> a particular task yet (other than a kernel thread) it will not be
> possible to select the precise nsproxy, let alone the container.

Thanks, I was stuck thinking about syscall based auditing and forgot
about the various LSM based audit records.  Of all people you would
think I would remember per-packet audit records ;)

Anyway, in this case I think including the namespace ID is sufficient,
largely because the container userspace doesn't have access to the
packet at this point.  In order to actually receive the data the
container's userspace will need to issue a syscall where we can
include the container ID.  An overly zealous security officer who
wants to trace all the kernel level audit events, like the one you
describe, can match up the namespace to a container in post-processing
if needed.
diff mbox

Patch

diff --git a/fs/namespace.c b/fs/namespace.c
index 182bc41..7b62543 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -24,6 +24,7 @@ 
 #include <linux/proc_ns.h>
 #include <linux/magic.h>
 #include <linux/bootmem.h>
+#include <linux/audit.h>
 #include "pnode.h"
 #include "internal.h"
 
@@ -2459,6 +2460,7 @@  dput_out:
 
 static void free_mnt_ns(struct mnt_namespace *ns)
 {
+	audit_log_ns_del(AUDIT_NS_DEL_MNT, ns->proc_inum);
 	proc_free_inum(ns->proc_inum);
 	put_user_ns(ns->user_ns);
 	kfree(ns);
@@ -2518,6 +2520,7 @@  struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 	new_ns = alloc_mnt_ns(user_ns);
 	if (IS_ERR(new_ns))
 		return new_ns;
+	audit_log_ns_init(AUDIT_NS_INIT_MNT, ns->proc_inum, new_ns->proc_inum);
 
 	namespace_lock();
 	/* First pass: copy the tree topology */
@@ -2830,6 +2833,16 @@  static void __init init_mount_tree(void)
 	set_fs_root(current->fs, &root);
 }
 
+/* log the ID of init mnt namespace after audit service starts */
+static int __init mnt_ns_init_log(void)
+{
+	struct mnt_namespace *init_mnt_ns = init_task.nsproxy->mnt_ns;
+
+	audit_log_ns_init(AUDIT_NS_INIT_MNT, 0, init_mnt_ns->proc_inum);
+	return 0;
+}
+late_initcall(mnt_ns_init_log);
+
 void __init mnt_init(void)
 {
 	unsigned u;
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 71698ec..b28dfb0 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -484,6 +484,9 @@  extern void		    audit_log_ns_info(struct task_struct *tsk);
 static inline void	    audit_log_ns_info(struct task_struct *tsk)
 { }
 #endif
+extern void		    audit_log_ns_init(int type, unsigned int old_inum,
+					      unsigned int inum);
+extern void		    audit_log_ns_del(int type, unsigned int inum);
 
 extern int		    audit_update_lsm_rules(void);
 
@@ -542,6 +545,11 @@  static inline void audit_log_task_info(struct audit_buffer *ab,
 { }
 static inline void audit_log_ns_info(struct task_struct *tsk)
 { }
+static inline int audit_log_ns_init(int type, unsigned int old_inum,
+				    unsigned int inum)
+{ }
+static inline int audit_log_ns_del(int type, unsigned int inum)
+{ }
 #define audit_enabled 0
 #endif /* CONFIG_AUDIT */
 static inline void audit_log_string(struct audit_buffer *ab, const char *buf)
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 1ffb151..487cad6 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -111,6 +111,18 @@ 
 #define AUDIT_PROCTITLE		1327	/* Proctitle emit event */
 #define AUDIT_FEATURE_CHANGE	1328	/* audit log listing feature changes */
 #define AUDIT_NS_INFO		1329	/* Record process namespace IDs */
+#define AUDIT_NS_INIT_MNT	1330	/* Record mount namespace instance creation */
+#define AUDIT_NS_INIT_UTS	1331	/* Record UTS namespace instance creation */
+#define AUDIT_NS_INIT_IPC	1332	/* Record IPC namespace instance creation */
+#define AUDIT_NS_INIT_USER	1333	/* Record USER namespace instance creation */
+#define AUDIT_NS_INIT_PID	1334	/* Record PID namespace instance creation */
+#define AUDIT_NS_INIT_NET	1335	/* Record NET namespace instance creation */
+#define AUDIT_NS_DEL_MNT	1336	/* Record mount namespace instance deletion */
+#define AUDIT_NS_DEL_UTS	1337	/* Record UTS namespace instance deletion */
+#define AUDIT_NS_DEL_IPC	1338	/* Record IPC namespace instance deletion */
+#define AUDIT_NS_DEL_USER	1339	/* Record USER namespace instance deletion */
+#define AUDIT_NS_DEL_PID	1340	/* Record PID namespace instance deletion */
+#define AUDIT_NS_DEL_NET	1341	/* Record NET namespace instance deletion */
 
 #define AUDIT_AVC		1400	/* SE Linux avc denial or grant */
 #define AUDIT_SELINUX_ERR	1401	/* Internal SE Linux Errors */
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 59451c1..73727ce 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -13,6 +13,7 @@ 
 #include <linux/mount.h>
 #include <linux/user_namespace.h>
 #include <linux/proc_ns.h>
+#include <linux/audit.h>
 
 #include "util.h"
 
@@ -41,6 +42,8 @@  static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	}
 	atomic_inc(&nr_ipc_ns);
 
+	audit_log_ns_init(AUDIT_NS_INIT_IPC, old_ns->proc_inum, ns->proc_inum);
+
 	sem_init_ns(ns);
 	msg_init_ns(ns);
 	shm_init_ns(ns);
@@ -119,6 +122,7 @@  static void free_ipc_ns(struct ipc_namespace *ns)
 	 */
 	ipcns_notify(IPCNS_REMOVED);
 	put_user_ns(ns->user_ns);
+	audit_log_ns_del(AUDIT_NS_DEL_IPC, ns->proc_inum);
 	proc_free_inum(ns->proc_inum);
 	kfree(ns);
 }
@@ -197,3 +201,11 @@  const struct proc_ns_operations ipcns_operations = {
 	.install	= ipcns_install,
 	.inum		= ipcns_inum,
 };
+
+/* log the ID of init IPC namespace after audit service starts */
+static int __init ipc_namespaces_init(void)
+{
+	audit_log_ns_init(AUDIT_NS_INIT_IPC, 0, init_ipc_ns.proc_inum);
+	return 0;
+}
+late_initcall(ipc_namespaces_init);
diff --git a/kernel/audit.c b/kernel/audit.c
index 63f32f4..e6230c4 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1978,6 +1978,70 @@  out:
 	kfree(name);
 }
 
+#ifdef CONFIG_NAMESPACES
+static char *ns_name[] = {
+	"mnt",
+	"uts",
+	"ipc",
+	"user",
+	"pid",
+	"net",
+};
+
+/**
+ * audit_log_ns_init - report a namespace instance creation
+ * @type: type of audit namespace instance created message
+ * @old_inum: the ID number of the cloned namespace instance
+ * @inum: the ID number of the new namespace instance
+ */
+void  audit_log_ns_init(int type, unsigned int old_inum, unsigned int inum)
+{
+	struct audit_buffer *ab;
+	char *audit_ns_name = ns_name[type - AUDIT_NS_INIT_MNT];
+	struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
+	struct super_block *sb = mnt->mnt_sb;
+	char old_ns[16];
+
+	if (type < AUDIT_NS_INIT_MNT || type > AUDIT_NS_INIT_NET) {
+		WARN(1, "audit_log_ns_init: type:%d out of range", type);
+		return;
+	}
+	if (!old_inum)
+		sprintf(old_ns, "(none)");
+	else
+		sprintf(old_ns, "%d", old_inum - PROC_DYNAMIC_FIRST);
+	audit_log_common_recv_msg(&ab, type);
+	audit_log_format(ab, " dev=%02x:%02x old_%sns=%s %sns=%d res=1",
+			 MAJOR(sb->s_dev), MINOR(sb->s_dev),
+			 audit_ns_name, old_ns,
+			 audit_ns_name, inum - PROC_DYNAMIC_FIRST);
+	audit_log_end(ab);
+}
+
+/**
+ * audit_log_ns_del - report a namespace instance deleted
+ * @type: type of audit namespace instance deleted message
+ * @inum: the ID number of the namespace instance
+ */
+void audit_log_ns_del(int type, unsigned int inum)
+{
+	struct audit_buffer *ab;
+	char *audit_ns_name = ns_name[type - AUDIT_NS_DEL_MNT];
+	struct vfsmount *mnt = task_active_pid_ns(current)->proc_mnt;
+	struct super_block *sb = mnt->mnt_sb;
+
+	if (type < AUDIT_NS_DEL_MNT || type > AUDIT_NS_DEL_NET) {
+		WARN(1, "audit_log_ns_del: type:%d out of range", type);
+		return;
+	}
+	audit_log_common_recv_msg(&ab, type);
+	audit_log_format(ab, " dev=%02x:%02x %sns=%d res=1",
+			 MAJOR(sb->s_dev), MINOR(sb->s_dev), audit_ns_name,
+			 inum - PROC_DYNAMIC_FIRST);
+	audit_log_end(ab);
+}
+#endif /* CONFIG_NAMESPACES */
+
 /**
  * audit_log_end - end one audit record
  * @ab: the audit_buffer
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index db95d8e..d28fd14 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -18,6 +18,7 @@ 
 #include <linux/proc_ns.h>
 #include <linux/reboot.h>
 #include <linux/export.h>
+#include <linux/audit.h>
 
 struct pid_cache {
 	int nr_ids;
@@ -109,6 +110,9 @@  static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	if (err)
 		goto out_free_map;
 
+	audit_log_ns_init(AUDIT_NS_INIT_PID, parent_pid_ns->proc_inum,
+			  ns->proc_inum);
+
 	kref_init(&ns->kref);
 	ns->level = level;
 	ns->parent = get_pid_ns(parent_pid_ns);
@@ -142,6 +146,7 @@  static void destroy_pid_namespace(struct pid_namespace *ns)
 {
 	int i;
 
+	audit_log_ns_del(AUDIT_NS_DEL_PID, ns->proc_inum);
 	proc_free_inum(ns->proc_inum);
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
@@ -388,3 +393,11 @@  static __init int pid_namespaces_init(void)
 }
 
 __initcall(pid_namespaces_init);
+
+/* log the ID of init PID namespace after audit service starts */
+static __init int pid_namespaces_late_init(void)
+{
+	audit_log_ns_init(AUDIT_NS_INIT_PID, 0, init_pid_ns.proc_inum);
+	return 0;
+}
+late_initcall(pid_namespaces_late_init);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index fcc0256..89c2517 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -22,6 +22,7 @@ 
 #include <linux/ctype.h>
 #include <linux/projid.h>
 #include <linux/fs_struct.h>
+#include <linux/audit.h>
 
 static struct kmem_cache *user_ns_cachep __read_mostly;
 
@@ -92,6 +93,9 @@  int create_user_ns(struct cred *new)
 		return ret;
 	}
 
+	audit_log_ns_init(AUDIT_NS_INIT_USER, parent_ns->proc_inum,
+			  ns->proc_inum);
+
 	atomic_set(&ns->count, 1);
 	/* Leave the new->user_ns reference with the new user namespace. */
 	ns->parent = parent_ns;
@@ -136,6 +140,7 @@  void free_user_ns(struct user_namespace *ns)
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 		key_put(ns->persistent_keyring_register);
 #endif
+		audit_log_ns_del(AUDIT_NS_DEL_USER, ns->proc_inum);
 		proc_free_inum(ns->proc_inum);
 		kmem_cache_free(user_ns_cachep, ns);
 		ns = parent;
@@ -909,3 +914,11 @@  static __init int user_namespaces_init(void)
 	return 0;
 }
 subsys_initcall(user_namespaces_init);
+
+/* log the ID of init user namespace after audit service starts */
+static __init int user_namespaces_late_init(void)
+{
+	audit_log_ns_init(AUDIT_NS_INIT_USER, 0, init_user_ns.proc_inum);
+	return 0;
+}
+late_initcall(user_namespaces_late_init);
diff --git a/kernel/utsname.c b/kernel/utsname.c
index fd39312..fa21e8d 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -16,6 +16,7 @@ 
 #include <linux/slab.h>
 #include <linux/user_namespace.h>
 #include <linux/proc_ns.h>
+#include <linux/audit.h>
 
 static struct uts_namespace *create_uts_ns(void)
 {
@@ -48,6 +49,8 @@  static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 		return ERR_PTR(err);
 	}
 
+	audit_log_ns_init(AUDIT_NS_INIT_UTS, old_ns->proc_inum, ns->proc_inum);
+
 	down_read(&uts_sem);
 	memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
 	ns->user_ns = get_user_ns(user_ns);
@@ -84,6 +87,7 @@  void free_uts_ns(struct kref *kref)
 
 	ns = container_of(kref, struct uts_namespace, kref);
 	put_user_ns(ns->user_ns);
+	audit_log_ns_del(AUDIT_NS_DEL_UTS, ns->proc_inum);
 	proc_free_inum(ns->proc_inum);
 	kfree(ns);
 }
@@ -138,3 +142,11 @@  const struct proc_ns_operations utsns_operations = {
 	.install	= utsns_install,
 	.inum		= utsns_inum,
 };
+
+/* log the ID of init UTS namespace after audit service starts */
+static int __init uts_namespaces_init(void)
+{
+	audit_log_ns_init(AUDIT_NS_INIT_UTS, 0, init_uts_ns.proc_inum);
+	return 0;
+}
+late_initcall(uts_namespaces_init);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 85b6269..562eb85 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -17,6 +17,7 @@ 
 #include <linux/user_namespace.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
+#include <linux/audit.h>
 
 /*
  *	Our network namespace constructor/destructor lists
@@ -253,6 +254,8 @@  struct net *copy_net_ns(unsigned long flags,
 	mutex_lock(&net_mutex);
 	rv = setup_net(net, user_ns);
 	if (rv == 0) {
+		audit_log_ns_init(AUDIT_NS_INIT_NET, old_net->proc_inum,
+				  net->proc_inum);
 		rtnl_lock();
 		list_add_tail_rcu(&net->list, &net_namespace_list);
 		rtnl_unlock();
@@ -389,6 +392,7 @@  static __net_init int net_ns_net_init(struct net *net)
 
 static __net_exit void net_ns_net_exit(struct net *net)
 {
+	audit_log_ns_del(AUDIT_NS_DEL_NET, net->proc_inum);
 	proc_free_inum(net->proc_inum);
 }
 
@@ -435,6 +439,14 @@  static int __init net_ns_init(void)
 
 pure_initcall(net_ns_init);
 
+/* log the ID of init_net namespace after audit service starts */
+static int __init net_ns_init_log(void)
+{
+	audit_log_ns_init(AUDIT_NS_INIT_NET, 0, init_net.proc_inum);
+	return 0;
+}
+late_initcall(net_ns_init_log);
+
 #ifdef CONFIG_NET_NS
 static int __register_pernet_operations(struct list_head *list,
 					struct pernet_operations *ops)