new file mode 100644
@@ -0,0 +1,13 @@
+What: Audit Container Identifier
+Date: 2020-??
+KernelVersion: 5.10?
+Contact: linux-audit@redhat.com
+Format: u64
+Users: auditd, libaudit, audit-testsuite, podman(?), container orchestrators
+Description:
+ The /proc/$pid/audit_containerid pseudofile it written
+ to set and read to get the audit container identifier of
+ process $pid. The accessor must have CAP_AUDIT_CONTROL
+ or have its own /proc/$pid/capcontainerid set to write
+ or read.
+
@@ -1244,7 +1244,7 @@ static const struct file_operations proc_oom_score_adj_operations = {
};
#ifdef CONFIG_AUDIT
-#define TMPBUFLEN 11
+#define TMPBUFLEN 21
static ssize_t proc_loginuid_read(struct file * file, char __user * buf,
size_t count, loff_t *ppos)
{
@@ -1331,6 +1331,58 @@ static const struct file_operations proc_sessionid_operations = {
.read = proc_sessionid_read,
.llseek = generic_file_llseek,
};
+
+static ssize_t proc_contid_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *task = get_proc_task(inode);
+ ssize_t length;
+ char tmpbuf[TMPBUFLEN];
+
+ if (!task)
+ return -ESRCH;
+ length = audit_get_contid_proc(tmpbuf, TMPBUFLEN, task);
+ put_task_struct(task);
+ if (length < 0)
+ return length;
+ return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
+}
+
+static ssize_t proc_contid_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ u64 contid;
+ int rv;
+ struct task_struct *task = get_proc_task(inode);
+
+ if (!task)
+ return -ESRCH;
+ if (*ppos != 0) {
+ /* No partial writes. */
+ put_task_struct(task);
+ return -EINVAL;
+ }
+
+ rv = kstrtou64_from_user(buf, count, 10, &contid);
+ if (rv < 0) {
+ put_task_struct(task);
+ return rv;
+ }
+
+ rv = audit_set_contid(task, contid);
+ put_task_struct(task);
+ if (rv < 0)
+ return rv;
+ return count;
+}
+
+static const struct file_operations proc_contid_operations = {
+ .read = proc_contid_read,
+ .write = proc_contid_write,
+ .llseek = generic_file_llseek,
+};
#endif
#ifdef CONFIG_FAULT_INJECTION
@@ -3233,6 +3285,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_AUDIT
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("audit_containerid", S_IWUSR|S_IRUSR, proc_contid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
@@ -3575,6 +3628,7 @@ static const struct pid_entry tid_base_stuff[] = {
#ifdef CONFIG_AUDIT
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("audit_containerid", S_IWUSR|S_IRUSR, proc_contid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
@@ -204,6 +204,11 @@ extern kuid_t audit_get_loginuid(struct task_struct *tsk);
extern unsigned int audit_get_sessionid(struct task_struct *tsk);
extern void audit_set_sessionid_iouring(unsigned int sessionid);
+extern int audit_get_contid_proc(char *tmpbuf, int TMPBUFLEN,
+ struct task_struct *task);
+
+extern int audit_set_contid(struct task_struct *tsk, u64 contid);
+
extern u32 audit_enabled;
extern int audit_signal_info(int sig, struct task_struct *t);
@@ -71,6 +71,7 @@
#define AUDIT_TTY_SET 1017 /* Set TTY auditing status */
#define AUDIT_SET_FEATURE 1018 /* Turn an audit feature on or off */
#define AUDIT_GET_FEATURE 1019 /* Get which features are enabled */
+#define AUDIT_CONTAINER_OP 1020 /* Define the container id and info */
#define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages mostly uninteresting to kernel */
#define AUDIT_USER_AVC 1107 /* We filter this differently */
@@ -495,6 +496,7 @@ struct audit_tty_status {
#define AUDIT_UID_UNSET (unsigned int)-1
#define AUDIT_SID_UNSET ((unsigned int)-1)
+#define AUDIT_CID_UNSET ((u64)-1)
/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
@@ -144,6 +144,15 @@ static atomic_t audit_backlog_wait_time_actual = ATOMIC_INIT(0);
/* Hash for inode-based rules */
struct list_head audit_inode_hash[AUDIT_INODE_BUCKETS];
+#define AUDIT_CONTID_BUCKETS 32
+/* Hash for contid object lists */
+static struct list_head audit_contid_hash[AUDIT_CONTID_BUCKETS];
+/* Lock all additions and deletions to the contid hash lists, assignment
+ * of container objects to tasks. There should be no need for
+ * interaction with tasklist_lock
+ */
+static DEFINE_SPINLOCK(_audit_contobj_list_lock);
+
static struct kmem_cache *audit_buffer_cache;
/* queue msgs to send via kauditd_task */
@@ -208,9 +217,18 @@ struct audit_reply {
struct sk_buff *skb;
};
+struct audit_contobj {
+ struct list_head list;
+ u64 id;
+ struct task_struct *owner;
+ refcount_t refcount;
+ struct rcu_head rcu;
+};
+
struct audit_task_info {
kuid_t loginuid;
unsigned int sessionid;
+ struct audit_contobj *cont;
#ifdef CONFIG_AUDITSYSCALL
struct audit_context *ctx;
#endif
@@ -261,6 +279,15 @@ inline void audit_set_sessionid_iouring(unsigned int sessionid)
info->sessionid = sessionid;
}
+static inline u64 audit_get_contid(struct task_struct *tsk)
+{
+ struct audit_task_info *info = tsk->audit;
+
+ if (!info || !info->cont)
+ return AUDIT_CID_UNSET;
+ return info->cont->id;
+}
+
inline struct audit_context *_audit_context(struct task_struct *tsk)
{
struct audit_task_info *info = tsk->audit;
@@ -276,6 +303,39 @@ struct audit_context *audit_context(void)
}
EXPORT_SYMBOL(audit_context);
+static struct audit_contobj *_audit_contobj_get(struct audit_contobj *cont)
+{
+ if (cont)
+ refcount_inc(&cont->refcount);
+ return cont;
+}
+
+static struct audit_contobj *_audit_contobj_get_bytask(struct task_struct *tsk)
+{
+ struct audit_task_info *info = tsk->audit;
+
+ if (!info)
+ return NULL;
+ return _audit_contobj_get(info->cont);
+}
+
+/* _audit_contobj_list_lock must be held by caller */
+static void _audit_contobj_put(struct audit_contobj *cont)
+{
+ if (!cont)
+ return;
+ if (refcount_dec_and_test(&cont->refcount)) {
+ put_task_struct(cont->owner);
+ list_del_rcu(&cont->list);
+ kfree_rcu(cont, rcu);
+ }
+}
+
+static inline int audit_hash_contid(u64 contid)
+{
+ return (contid & (AUDIT_CONTID_BUCKETS-1));
+}
+
static void audit_alloc_task(struct task_struct *tsk)
{
struct audit_task_info *info = tsk->audit;
@@ -289,6 +349,9 @@ static void audit_alloc_task(struct task_struct *tsk)
}
info->loginuid = audit_get_loginuid(current);
info->sessionid = audit_get_sessionid(current);
+ rcu_read_lock();
+ info->cont = _audit_contobj_get_bytask(current);
+ rcu_read_unlock();
tsk->audit = info;
}
@@ -343,6 +406,9 @@ void audit_free(struct task_struct *tsk)
struct audit_task_info *info = tsk->audit;
audit_free_syscall(tsk);
+ spin_lock(&_audit_contobj_list_lock);
+ _audit_contobj_put(info->cont);
+ spin_unlock(&_audit_contobj_list_lock);
/* Freeing the audit_task_info struct must be performed after
* audit_log_exit() due to need for loginuid and sessionid.
*/
@@ -1795,6 +1861,9 @@ static int __init audit_init(void)
for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
INIT_LIST_HEAD(&audit_inode_hash[i]);
+ for (i = 0; i < AUDIT_CONTID_BUCKETS; i++)
+ INIT_LIST_HEAD(&audit_contid_hash[i]);
+
mutex_init(&audit_cmd_mutex.lock);
audit_cmd_mutex.owner = NULL;
@@ -2509,6 +2578,147 @@ int audit_signal_info(int sig, struct task_struct *t)
return audit_signal_info_syscall(t);
}
+/*
+ * audit_set_contid - set current task's audit contid
+ * @tsk: target task
+ * @contid: contid value
+ *
+ * Returns 0 on success, -EPERM on permission failure.
+ *
+ * If the original container owner goes away, no task injection is
+ * possible to an existing container.
+ *
+ * Called (set) from fs/proc/base.c::proc_contid_write().
+ */
+int audit_set_contid(struct task_struct *tsk, u64 contid)
+{
+ int rc = 0;
+ struct audit_buffer *ab;
+ struct audit_contobj *oldcont = NULL;
+ struct audit_contobj *cont = NULL, *newcont = NULL;
+ int h;
+ struct audit_task_info *info = tsk->audit;
+
+ /* Can't set if audit disabled */
+ if (!info) {
+ task_unlock(tsk);
+ return -ENOPROTOOPT;
+ }
+ read_lock(&tasklist_lock);
+ task_lock(tsk);
+ if (contid == AUDIT_CID_UNSET) {
+ /* Don't allow the contid to be unset */
+ rc = -EINVAL;
+ } else if (!capable(CAP_AUDIT_CONTROL)) {
+ /* if we don't have caps, reject */
+ rc = -EPERM;
+ } else if (!list_empty(&tsk->children) ||
+ !(thread_group_leader(tsk) && thread_group_empty(tsk))) {
+ /* if task has children or is not single-threaded, deny */
+ rc = -EBUSY;
+ } else if (info->cont) {
+ /* if contid is already set, deny */
+ rc = -EEXIST;
+ }
+ rcu_read_lock();
+ oldcont = _audit_contobj_get_bytask(tsk);
+ if (rc)
+ goto error;
+
+ h = audit_hash_contid(contid);
+ spin_lock(&_audit_contobj_list_lock);
+ list_for_each_entry_rcu(cont, &audit_contid_hash[h], list)
+ if (cont->id == contid) {
+ /* task injection to existing container */
+ if (current == cont->owner) {
+ _audit_contobj_get(cont);
+ newcont = cont;
+ } else {
+ rc = -ENOTUNIQ;
+ spin_unlock(&_audit_contobj_list_lock);
+ goto error;
+ }
+ break;
+ }
+ if (!newcont) {
+ newcont = kmalloc(sizeof(*newcont), GFP_ATOMIC);
+ if (newcont) {
+ INIT_LIST_HEAD(&newcont->list);
+ newcont->id = contid;
+ newcont->owner = get_task_struct(current);
+ refcount_set(&newcont->refcount, 1);
+ list_add_rcu(&newcont->list,
+ &audit_contid_hash[h]);
+ } else {
+ rc = -ENOMEM;
+ spin_unlock(&_audit_contobj_list_lock);
+ goto error;
+ }
+ }
+ info->cont = newcont;
+ _audit_contobj_put(oldcont);
+ spin_unlock(&_audit_contobj_list_lock);
+error:
+ rcu_read_unlock();
+ task_unlock(tsk);
+ read_unlock(&tasklist_lock);
+
+ if (!audit_enabled)
+ return rc;
+
+ ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_CONTAINER_OP);
+ if (!ab)
+ return rc;
+
+ audit_log_format(ab,
+ "op=set opid=%d contid=%llu old-contid=%llu",
+ task_tgid_nr(tsk), contid, oldcont ? oldcont->id : -1);
+ spin_lock(&_audit_contobj_list_lock);
+ _audit_contobj_put(oldcont);
+ spin_unlock(&_audit_contobj_list_lock);
+ audit_log_end(ab);
+ return rc;
+}
+
+int audit_get_contid_proc(char *tmpbuf, int TMPBUFLEN,
+ struct task_struct *tsk)
+{
+ int length;
+
+ /* if we don't have caps, reject */
+ if (!capable(CAP_AUDIT_CONTROL)) {
+ length = -EPERM;
+ goto out;
+ }
+ length = scnprintf(tmpbuf, TMPBUFLEN, "%llu", audit_get_contid(tsk));
+out:
+ return length;
+}
+
+void audit_log_container_drop(void)
+{
+ struct audit_buffer *ab;
+ struct audit_contobj *cont;
+
+ rcu_read_lock();
+ cont = _audit_contobj_get_bytask(current);
+ rcu_read_unlock();
+ if (!cont)
+ return;
+ if (refcount_read(&cont->refcount) > 2)
+ goto out;
+ ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_CONTAINER_OP);
+ if (!ab)
+ goto out;
+ audit_log_format(ab, "op=drop opid=%d contid=-1 old-contid=%llu",
+ task_tgid_nr(current), cont->id);
+ audit_log_end(ab);
+out:
+ spin_lock(&_audit_contobj_list_lock);
+ _audit_contobj_put(cont);
+ spin_unlock(&_audit_contobj_list_lock);
+}
+
/**
* audit_log_end - end one audit record
* @ab: the audit_buffer
@@ -210,6 +210,8 @@ static inline int audit_hash_ino(u32 ino)
return (ino & (AUDIT_INODE_BUCKETS-1));
}
+extern void audit_log_container_drop(void);
+
/* Indicates that audit should log the full pathname. */
#define AUDIT_NAME_FULL -1
@@ -1596,6 +1596,8 @@ static void audit_log_exit(void)
audit_log_proctitle();
+ audit_log_container_drop();
+
/* Send end of event record to help user space know we are finished */
ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
if (ab)