Yama: fix double-spinlock and user access in atomic context
diff mbox

Message ID 1463868720-7706-1-git-send-email-jann@thejh.net
State New
Headers show

Commit Message

Jann Horn May 21, 2016, 10:12 p.m. UTC
Commit 8a56038c2aef ("Yama: consolidate error reporting") causes lockups
when someone hits a Yama denial. Call chain:

process_vm_readv -> process_vm_rw -> process_vm_rw_core -> mm_access
-> ptrace_may_access
task_lock(...) is taken
__ptrace_may_access -> security_ptrace_access_check
-> yama_ptrace_access_check -> report_access -> kstrdup_quotable_cmdline
-> get_cmdline -> access_process_vm -> get_task_mm
task_lock(...) is taken again

task_lock(p) just calls spin_lock(&p->alloc_lock), so at this point,
spin_lock() is called on a lock that is already held by the current
process.

Also: Since the alloc_lock is a spinlock, sleeping inside
security_ptrace_access_check hooks is probably not allowed at all? So it's
not even possible to print the cmdline from in there because that might
involve paging in userspace memory.

It would be tempting to rewrite ptrace_may_access() to drop the alloc_lock
before calling the LSM, but even then, ptrace_may_access() itself might be
called from various contexts in which you're not allowed to sleep; for
example, as far as I understand, to be able to hold a reference to another
task, usually an RCU read lock will be taken (see e.g. kcmp() and
get_robust_list()), so that also prohibits sleeping. (And using e.g. FUSE,
a user can cause pagefault handling to take arbitrary amounts of time -
see https://bugs.chromium.org/p/project-zero/issues/detail?id=808.)

Therefore, AFAIK, in order to print the name of a process below
security_ptrace_access_check(), you'd have to either grab a reference to
the mm_struct and defer the access violation reporting or just use the
"comm" value that's stored in kernelspace and accessible without big
complications. (Or you could try to use some kind of atomic remote VM
access that fails if the memory isn't paged in, similar to
copy_from_user_inatomic(), and if necessary fall back to comm, but
that'd be kind of ugly because the comm/cmdline choice would look
pretty random to the user.)

Fix it by deferring reporting of the access violation until current
exits kernelspace the next time.

Fixes: 8a56038c2aef ("Yama: consolidate error reporting")
Signed-off-by: Jann Horn <jann@thejh.net>
---
 security/yama/yama_lsm.c | 64 ++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 59 insertions(+), 5 deletions(-)

Comments

Kees Cook May 21, 2016, 10:16 p.m. UTC | #1
On Sat, May 21, 2016 at 3:12 PM, Jann Horn <jann@thejh.net> wrote:
> Commit 8a56038c2aef ("Yama: consolidate error reporting") causes lockups
> when someone hits a Yama denial. Call chain:
>
> process_vm_readv -> process_vm_rw -> process_vm_rw_core -> mm_access
> -> ptrace_may_access
> task_lock(...) is taken
> __ptrace_may_access -> security_ptrace_access_check
> -> yama_ptrace_access_check -> report_access -> kstrdup_quotable_cmdline
> -> get_cmdline -> access_process_vm -> get_task_mm
> task_lock(...) is taken again
>
> task_lock(p) just calls spin_lock(&p->alloc_lock), so at this point,
> spin_lock() is called on a lock that is already held by the current
> process.
>
> Also: Since the alloc_lock is a spinlock, sleeping inside
> security_ptrace_access_check hooks is probably not allowed at all? So it's
> not even possible to print the cmdline from in there because that might
> involve paging in userspace memory.
>
> It would be tempting to rewrite ptrace_may_access() to drop the alloc_lock
> before calling the LSM, but even then, ptrace_may_access() itself might be
> called from various contexts in which you're not allowed to sleep; for
> example, as far as I understand, to be able to hold a reference to another
> task, usually an RCU read lock will be taken (see e.g. kcmp() and
> get_robust_list()), so that also prohibits sleeping. (And using e.g. FUSE,
> a user can cause pagefault handling to take arbitrary amounts of time -
> see https://bugs.chromium.org/p/project-zero/issues/detail?id=808.)
>
> Therefore, AFAIK, in order to print the name of a process below
> security_ptrace_access_check(), you'd have to either grab a reference to
> the mm_struct and defer the access violation reporting or just use the
> "comm" value that's stored in kernelspace and accessible without big
> complications. (Or you could try to use some kind of atomic remote VM
> access that fails if the memory isn't paged in, similar to
> copy_from_user_inatomic(), and if necessary fall back to comm, but
> that'd be kind of ugly because the comm/cmdline choice would look
> pretty random to the user.)
>
> Fix it by deferring reporting of the access violation until current
> exits kernelspace the next time.
>
> Fixes: 8a56038c2aef ("Yama: consolidate error reporting")
> Signed-off-by: Jann Horn <jann@thejh.net>

Ah-ha! Thanks! James, can you please take this?

Acked-by: Kees Cook <keescook@chromium.org>

-Kees

> ---
>  security/yama/yama_lsm.c | 64 ++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 59 insertions(+), 5 deletions(-)
>
> diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
> index 9b756b1..9d55ade 100644
> --- a/security/yama/yama_lsm.c
> +++ b/security/yama/yama_lsm.c
> @@ -19,6 +19,9 @@
>  #include <linux/ratelimit.h>
>  #include <linux/workqueue.h>
>  #include <linux/string_helpers.h>
> +#include <linux/task_work.h>
> +#include <linux/sched.h>
> +#include <linux/spinlock.h>
>
>  #define YAMA_SCOPE_DISABLED    0
>  #define YAMA_SCOPE_RELATIONAL  1
> @@ -42,20 +45,71 @@ static DEFINE_SPINLOCK(ptracer_relations_lock);
>  static void yama_relation_cleanup(struct work_struct *work);
>  static DECLARE_WORK(yama_relation_work, yama_relation_cleanup);
>
> -static void report_access(const char *access, struct task_struct *target,
> -                         struct task_struct *agent)
> +struct access_report_info {
> +       struct callback_head work;
> +       const char *access;
> +       struct task_struct *target;
> +       struct task_struct *agent;
> +};
> +
> +static void __report_access(struct callback_head *work)
>  {
> +       struct access_report_info *info =
> +               container_of(work, struct access_report_info, work);
>         char *target_cmd, *agent_cmd;
>
> -       target_cmd = kstrdup_quotable_cmdline(target, GFP_ATOMIC);
> -       agent_cmd = kstrdup_quotable_cmdline(agent, GFP_ATOMIC);
> +       target_cmd = kstrdup_quotable_cmdline(info->target, GFP_ATOMIC);
> +       agent_cmd = kstrdup_quotable_cmdline(info->agent, GFP_ATOMIC);
>
>         pr_notice_ratelimited(
>                 "ptrace %s of \"%s\"[%d] was attempted by \"%s\"[%d]\n",
> -               access, target_cmd, target->pid, agent_cmd, agent->pid);
> +               info->access, target_cmd, info->target->pid, agent_cmd,
> +               info->agent->pid);
>
>         kfree(agent_cmd);
>         kfree(target_cmd);
> +
> +       put_task_struct(info->agent);
> +       put_task_struct(info->target);
> +       kfree(info);
> +}
> +
> +/* defers execution because cmdline access can sleep */
> +static void report_access(const char *access, struct task_struct *target,
> +                               struct task_struct *agent)
> +{
> +       struct access_report_info *info;
> +       char agent_comm[sizeof(agent->comm)];
> +
> +       assert_spin_locked(&target->alloc_lock); /* for agent->comm */
> +
> +       if (current->flags & PF_KTHREAD) {
> +               /* I don't think kthreads call task_work_run() before exiting.
> +                * Imagine angry ranting about procfs here.
> +                */
> +               pr_notice_ratelimited(
> +                   "ptrace %s of \"%s\"[%d] was attempted by \"%s\"[%d]\n",
> +                   access, target->comm, target->pid,
> +                   get_task_comm(agent_comm, agent), agent->pid);
> +               return;
> +       }
> +
> +       info = kmalloc(sizeof(*info), GFP_ATOMIC);
> +       if (!info)
> +               return;
> +       init_task_work(&info->work, __report_access);
> +       get_task_struct(target);
> +       get_task_struct(agent);
> +       info->access = access;
> +       info->target = target;
> +       info->agent = agent;
> +       if (task_work_add(current, &info->work, true) == 0)
> +               return; /* success */
> +
> +       WARN(1, "report_access called from exiting task");
> +       put_task_struct(target);
> +       put_task_struct(agent);
> +       kfree(info);
>  }
>
>  /**
> --
> 2.1.4
>
Jann Horn May 22, 2016, 3:34 a.m. UTC | #2
On Sat, May 21, 2016 at 03:16:48PM -0700, Kees Cook wrote:
> On Sat, May 21, 2016 at 3:12 PM, Jann Horn <jann@thejh.net> wrote:
> > Commit 8a56038c2aef ("Yama: consolidate error reporting") causes lockups
> > when someone hits a Yama denial. Call chain:
> >
> > process_vm_readv -> process_vm_rw -> process_vm_rw_core -> mm_access
> > -> ptrace_may_access
> > task_lock(...) is taken
> > __ptrace_may_access -> security_ptrace_access_check
> > -> yama_ptrace_access_check -> report_access -> kstrdup_quotable_cmdline
> > -> get_cmdline -> access_process_vm -> get_task_mm
> > task_lock(...) is taken again
> >
> > task_lock(p) just calls spin_lock(&p->alloc_lock), so at this point,
> > spin_lock() is called on a lock that is already held by the current
> > process.
> >
> > Also: Since the alloc_lock is a spinlock, sleeping inside
> > security_ptrace_access_check hooks is probably not allowed at all? So it's
> > not even possible to print the cmdline from in there because that might
> > involve paging in userspace memory.
> >
> > It would be tempting to rewrite ptrace_may_access() to drop the alloc_lock
> > before calling the LSM, but even then, ptrace_may_access() itself might be
> > called from various contexts in which you're not allowed to sleep; for
> > example, as far as I understand, to be able to hold a reference to another
> > task, usually an RCU read lock will be taken (see e.g. kcmp() and
> > get_robust_list()), so that also prohibits sleeping. (And using e.g. FUSE,
> > a user can cause pagefault handling to take arbitrary amounts of time -
> > see https://bugs.chromium.org/p/project-zero/issues/detail?id=808.)
> >
> > Therefore, AFAIK, in order to print the name of a process below
> > security_ptrace_access_check(), you'd have to either grab a reference to
> > the mm_struct and defer the access violation reporting or just use the
> > "comm" value that's stored in kernelspace and accessible without big
> > complications. (Or you could try to use some kind of atomic remote VM
> > access that fails if the memory isn't paged in, similar to
> > copy_from_user_inatomic(), and if necessary fall back to comm, but
> > that'd be kind of ugly because the comm/cmdline choice would look
> > pretty random to the user.)
> >
> > Fix it by deferring reporting of the access violation until current
> > exits kernelspace the next time.
> >
> > Fixes: 8a56038c2aef ("Yama: consolidate error reporting")
> > Signed-off-by: Jann Horn <jann@thejh.net>
> 
> Ah-ha! Thanks! James, can you please take this?
> 
> Acked-by: Kees Cook <keescook@chromium.org>

Actually... whoops, please don't merge this. I just remembered that the
yama_ptrace_traceme() case doesn't take the task_lock, and I only tested
the yama_ptrace_access_check() one.

Patch
diff mbox

diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index 9b756b1..9d55ade 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -19,6 +19,9 @@ 
 #include <linux/ratelimit.h>
 #include <linux/workqueue.h>
 #include <linux/string_helpers.h>
+#include <linux/task_work.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
 
 #define YAMA_SCOPE_DISABLED	0
 #define YAMA_SCOPE_RELATIONAL	1
@@ -42,20 +45,71 @@  static DEFINE_SPINLOCK(ptracer_relations_lock);
 static void yama_relation_cleanup(struct work_struct *work);
 static DECLARE_WORK(yama_relation_work, yama_relation_cleanup);
 
-static void report_access(const char *access, struct task_struct *target,
-			  struct task_struct *agent)
+struct access_report_info {
+	struct callback_head work;
+	const char *access;
+	struct task_struct *target;
+	struct task_struct *agent;
+};
+
+static void __report_access(struct callback_head *work)
 {
+	struct access_report_info *info =
+		container_of(work, struct access_report_info, work);
 	char *target_cmd, *agent_cmd;
 
-	target_cmd = kstrdup_quotable_cmdline(target, GFP_ATOMIC);
-	agent_cmd = kstrdup_quotable_cmdline(agent, GFP_ATOMIC);
+	target_cmd = kstrdup_quotable_cmdline(info->target, GFP_ATOMIC);
+	agent_cmd = kstrdup_quotable_cmdline(info->agent, GFP_ATOMIC);
 
 	pr_notice_ratelimited(
 		"ptrace %s of \"%s\"[%d] was attempted by \"%s\"[%d]\n",
-		access, target_cmd, target->pid, agent_cmd, agent->pid);
+		info->access, target_cmd, info->target->pid, agent_cmd,
+		info->agent->pid);
 
 	kfree(agent_cmd);
 	kfree(target_cmd);
+
+	put_task_struct(info->agent);
+	put_task_struct(info->target);
+	kfree(info);
+}
+
+/* defers execution because cmdline access can sleep */
+static void report_access(const char *access, struct task_struct *target,
+				struct task_struct *agent)
+{
+	struct access_report_info *info;
+	char agent_comm[sizeof(agent->comm)];
+
+	assert_spin_locked(&target->alloc_lock); /* for agent->comm */
+
+	if (current->flags & PF_KTHREAD) {
+		/* I don't think kthreads call task_work_run() before exiting.
+		 * Imagine angry ranting about procfs here.
+		 */
+		pr_notice_ratelimited(
+		    "ptrace %s of \"%s\"[%d] was attempted by \"%s\"[%d]\n",
+		    access, target->comm, target->pid,
+		    get_task_comm(agent_comm, agent), agent->pid);
+		return;
+	}
+
+	info = kmalloc(sizeof(*info), GFP_ATOMIC);
+	if (!info)
+		return;
+	init_task_work(&info->work, __report_access);
+	get_task_struct(target);
+	get_task_struct(agent);
+	info->access = access;
+	info->target = target;
+	info->agent = agent;
+	if (task_work_add(current, &info->work, true) == 0)
+		return; /* success */
+
+	WARN(1, "report_access called from exiting task");
+	put_task_struct(target);
+	put_task_struct(agent);
+	kfree(info);
 }
 
 /**