diff mbox

[05/22] audit: Fix sleep in atomic

Message ID 20161222091538.28702-6-jack@suse.cz (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Kara Dec. 22, 2016, 9:15 a.m. UTC
Audit tree code was happily adding new notification marks while holding
spinlocks. Since fsnotify_add_mark() acquires group->mark_mutex this can
lead to sleeping while holding a spinlock, deadlocks due to lock
inversion, and probably other fun. Fix the problem by acquiring
group->mark_mutex earlier.

CC: Paul Moore <paul@paul-moore.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 kernel/audit_tree.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

Comments

Paul Moore Dec. 22, 2016, 11:18 p.m. UTC | #1
On Thu, Dec 22, 2016 at 4:15 AM, Jan Kara <jack@suse.cz> wrote:
> Audit tree code was happily adding new notification marks while holding
> spinlocks. Since fsnotify_add_mark() acquires group->mark_mutex this can
> lead to sleeping while holding a spinlock, deadlocks due to lock
> inversion, and probably other fun. Fix the problem by acquiring
> group->mark_mutex earlier.
>
> CC: Paul Moore <paul@paul-moore.com>
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
>  kernel/audit_tree.c | 13 +++++++++++--
>  1 file changed, 11 insertions(+), 2 deletions(-)

[SIDE NOTE: this patch explains your comments and my earlier concern
about the locked/unlocked variants of fsnotify_add_mark() in
untag_chunk()]

Ouch.  Thanks for catching this ... what is your goal with these
patches, are you targeting this as a fix during the v4.10-rcX cycle?
If not, any objections if I pull this patch into the audit tree and
send this to Linus during the v4.10-rcX cycle (assuming it passes
testing, yadda yadda)?

> diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
> index f3130eb0a4bd..156b6a93f4fc 100644
> --- a/kernel/audit_tree.c
> +++ b/kernel/audit_tree.c
> @@ -231,6 +231,7 @@ static void untag_chunk(struct node *p)
>         if (size)
>                 new = alloc_chunk(size);
>
> +       mutex_lock(&entry->group->mark_mutex);
>         spin_lock(&entry->lock);
>         if (chunk->dead || !entry->inode) {
>                 spin_unlock(&entry->lock);
> @@ -258,7 +259,8 @@ static void untag_chunk(struct node *p)
>         if (!new)
>                 goto Fallback;
>
> -       if (fsnotify_add_mark(&new->mark, entry->group, entry->inode, NULL, 1)) {
> +       if (fsnotify_add_mark_locked(&new->mark, entry->group, entry->inode,
> +                                    NULL, 1)) {
>                 fsnotify_put_mark(&new->mark);
>                 goto Fallback;
>         }
> @@ -309,6 +311,7 @@ static void untag_chunk(struct node *p)
>         spin_unlock(&hash_lock);
>         spin_unlock(&entry->lock);
>  out:
> +       mutex_unlock(&entry->group->mark_mutex);
>         fsnotify_put_mark(entry);
>         spin_lock(&hash_lock);
>  }
> @@ -385,17 +388,21 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
>
>         chunk_entry = &chunk->mark;
>
> +       mutex_lock(&old_entry->group->mark_mutex);
>         spin_lock(&old_entry->lock);
>         if (!old_entry->inode) {
>                 /* old_entry is being shot, lets just lie */
>                 spin_unlock(&old_entry->lock);
> +               mutex_unlock(&old_entry->group->mark_mutex);
>                 fsnotify_put_mark(old_entry);
>                 free_chunk(chunk);
>                 return -ENOENT;
>         }
>
> -       if (fsnotify_add_mark(chunk_entry, old_entry->group, old_entry->inode, NULL, 1)) {
> +       if (fsnotify_add_mark_locked(chunk_entry, old_entry->group,
> +                                    old_entry->inode, NULL, 1)) {
>                 spin_unlock(&old_entry->lock);
> +               mutex_unlock(&old_entry->group->mark_mutex);
>                 fsnotify_put_mark(chunk_entry);
>                 fsnotify_put_mark(old_entry);
>                 return -ENOSPC;
> @@ -411,6 +418,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
>                 chunk->dead = 1;
>                 spin_unlock(&chunk_entry->lock);
>                 spin_unlock(&old_entry->lock);
> +               mutex_unlock(&old_entry->group->mark_mutex);
>
>                 fsnotify_destroy_mark(chunk_entry, audit_tree_group);
>
> @@ -443,6 +451,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
>         spin_unlock(&hash_lock);
>         spin_unlock(&chunk_entry->lock);
>         spin_unlock(&old_entry->lock);
> +       mutex_unlock(&old_entry->group->mark_mutex);
>         fsnotify_destroy_mark(old_entry, audit_tree_group);
>         fsnotify_put_mark(chunk_entry); /* drop initial reference */
>         fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
> --
> 2.10.2
>
Jan Kara Dec. 23, 2016, 1:24 p.m. UTC | #2
On Thu 22-12-16 18:18:36, Paul Moore wrote:
> On Thu, Dec 22, 2016 at 4:15 AM, Jan Kara <jack@suse.cz> wrote:
> > Audit tree code was happily adding new notification marks while holding
> > spinlocks. Since fsnotify_add_mark() acquires group->mark_mutex this can
> > lead to sleeping while holding a spinlock, deadlocks due to lock
> > inversion, and probably other fun. Fix the problem by acquiring
> > group->mark_mutex earlier.
> >
> > CC: Paul Moore <paul@paul-moore.com>
> > Signed-off-by: Jan Kara <jack@suse.cz>
> > ---
> >  kernel/audit_tree.c | 13 +++++++++++--
> >  1 file changed, 11 insertions(+), 2 deletions(-)
> 
> [SIDE NOTE: this patch explains your comments and my earlier concern
> about the locked/unlocked variants of fsnotify_add_mark() in
> untag_chunk()]
> 
> Ouch.  Thanks for catching this ... what is your goal with these
> patches, are you targeting this as a fix during the v4.10-rcX cycle?
> If not, any objections if I pull this patch into the audit tree and
> send this to Linus during the v4.10-rcX cycle (assuming it passes
> testing, yadda yadda)?

Sure, go ahead. I plan these patches for the next merge window. So I can
rebase the series once you merge audit fixes...

								Honza
> 
> 
> > diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
> > index f3130eb0a4bd..156b6a93f4fc 100644
> > --- a/kernel/audit_tree.c
> > +++ b/kernel/audit_tree.c
> > @@ -231,6 +231,7 @@ static void untag_chunk(struct node *p)
> >         if (size)
> >                 new = alloc_chunk(size);
> >
> > +       mutex_lock(&entry->group->mark_mutex);
> >         spin_lock(&entry->lock);
> >         if (chunk->dead || !entry->inode) {
> >                 spin_unlock(&entry->lock);
> > @@ -258,7 +259,8 @@ static void untag_chunk(struct node *p)
> >         if (!new)
> >                 goto Fallback;
> >
> > -       if (fsnotify_add_mark(&new->mark, entry->group, entry->inode, NULL, 1)) {
> > +       if (fsnotify_add_mark_locked(&new->mark, entry->group, entry->inode,
> > +                                    NULL, 1)) {
> >                 fsnotify_put_mark(&new->mark);
> >                 goto Fallback;
> >         }
> > @@ -309,6 +311,7 @@ static void untag_chunk(struct node *p)
> >         spin_unlock(&hash_lock);
> >         spin_unlock(&entry->lock);
> >  out:
> > +       mutex_unlock(&entry->group->mark_mutex);
> >         fsnotify_put_mark(entry);
> >         spin_lock(&hash_lock);
> >  }
> > @@ -385,17 +388,21 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
> >
> >         chunk_entry = &chunk->mark;
> >
> > +       mutex_lock(&old_entry->group->mark_mutex);
> >         spin_lock(&old_entry->lock);
> >         if (!old_entry->inode) {
> >                 /* old_entry is being shot, lets just lie */
> >                 spin_unlock(&old_entry->lock);
> > +               mutex_unlock(&old_entry->group->mark_mutex);
> >                 fsnotify_put_mark(old_entry);
> >                 free_chunk(chunk);
> >                 return -ENOENT;
> >         }
> >
> > -       if (fsnotify_add_mark(chunk_entry, old_entry->group, old_entry->inode, NULL, 1)) {
> > +       if (fsnotify_add_mark_locked(chunk_entry, old_entry->group,
> > +                                    old_entry->inode, NULL, 1)) {
> >                 spin_unlock(&old_entry->lock);
> > +               mutex_unlock(&old_entry->group->mark_mutex);
> >                 fsnotify_put_mark(chunk_entry);
> >                 fsnotify_put_mark(old_entry);
> >                 return -ENOSPC;
> > @@ -411,6 +418,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
> >                 chunk->dead = 1;
> >                 spin_unlock(&chunk_entry->lock);
> >                 spin_unlock(&old_entry->lock);
> > +               mutex_unlock(&old_entry->group->mark_mutex);
> >
> >                 fsnotify_destroy_mark(chunk_entry, audit_tree_group);
> >
> > @@ -443,6 +451,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
> >         spin_unlock(&hash_lock);
> >         spin_unlock(&chunk_entry->lock);
> >         spin_unlock(&old_entry->lock);
> > +       mutex_unlock(&old_entry->group->mark_mutex);
> >         fsnotify_destroy_mark(old_entry, audit_tree_group);
> >         fsnotify_put_mark(chunk_entry); /* drop initial reference */
> >         fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
> > --
> > 2.10.2
> >
> 
> 
> 
> -- 
> paul moore
> www.paul-moore.com
Paul Moore Dec. 23, 2016, 2:17 p.m. UTC | #3
On Fri, Dec 23, 2016 at 8:24 AM, Jan Kara <jack@suse.cz> wrote:
> On Thu 22-12-16 18:18:36, Paul Moore wrote:
>> On Thu, Dec 22, 2016 at 4:15 AM, Jan Kara <jack@suse.cz> wrote:
>> > Audit tree code was happily adding new notification marks while holding
>> > spinlocks. Since fsnotify_add_mark() acquires group->mark_mutex this can
>> > lead to sleeping while holding a spinlock, deadlocks due to lock
>> > inversion, and probably other fun. Fix the problem by acquiring
>> > group->mark_mutex earlier.
>> >
>> > CC: Paul Moore <paul@paul-moore.com>
>> > Signed-off-by: Jan Kara <jack@suse.cz>
>> > ---
>> >  kernel/audit_tree.c | 13 +++++++++++--
>> >  1 file changed, 11 insertions(+), 2 deletions(-)
>>
>> [SIDE NOTE: this patch explains your comments and my earlier concern
>> about the locked/unlocked variants of fsnotify_add_mark() in
>> untag_chunk()]
>>
>> Ouch.  Thanks for catching this ... what is your goal with these
>> patches, are you targeting this as a fix during the v4.10-rcX cycle?
>> If not, any objections if I pull this patch into the audit tree and
>> send this to Linus during the v4.10-rcX cycle (assuming it passes
>> testing, yadda yadda)?
>
> Sure, go ahead. I plan these patches for the next merge window. So I can
> rebase the series once you merge audit fixes...

Okay, great.  I'll merge this patch in the audit/stable-4.10 branch
for Linus but there will likely be some delays due to
holidays/vacation on my end.

Thanks again for your help fixing this, I really appreciate it.

>> > diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
>> > index f3130eb0a4bd..156b6a93f4fc 100644
>> > --- a/kernel/audit_tree.c
>> > +++ b/kernel/audit_tree.c
>> > @@ -231,6 +231,7 @@ static void untag_chunk(struct node *p)
>> >         if (size)
>> >                 new = alloc_chunk(size);
>> >
>> > +       mutex_lock(&entry->group->mark_mutex);
>> >         spin_lock(&entry->lock);
>> >         if (chunk->dead || !entry->inode) {
>> >                 spin_unlock(&entry->lock);
>> > @@ -258,7 +259,8 @@ static void untag_chunk(struct node *p)
>> >         if (!new)
>> >                 goto Fallback;
>> >
>> > -       if (fsnotify_add_mark(&new->mark, entry->group, entry->inode, NULL, 1)) {
>> > +       if (fsnotify_add_mark_locked(&new->mark, entry->group, entry->inode,
>> > +                                    NULL, 1)) {
>> >                 fsnotify_put_mark(&new->mark);
>> >                 goto Fallback;
>> >         }
>> > @@ -309,6 +311,7 @@ static void untag_chunk(struct node *p)
>> >         spin_unlock(&hash_lock);
>> >         spin_unlock(&entry->lock);
>> >  out:
>> > +       mutex_unlock(&entry->group->mark_mutex);
>> >         fsnotify_put_mark(entry);
>> >         spin_lock(&hash_lock);
>> >  }
>> > @@ -385,17 +388,21 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
>> >
>> >         chunk_entry = &chunk->mark;
>> >
>> > +       mutex_lock(&old_entry->group->mark_mutex);
>> >         spin_lock(&old_entry->lock);
>> >         if (!old_entry->inode) {
>> >                 /* old_entry is being shot, lets just lie */
>> >                 spin_unlock(&old_entry->lock);
>> > +               mutex_unlock(&old_entry->group->mark_mutex);
>> >                 fsnotify_put_mark(old_entry);
>> >                 free_chunk(chunk);
>> >                 return -ENOENT;
>> >         }
>> >
>> > -       if (fsnotify_add_mark(chunk_entry, old_entry->group, old_entry->inode, NULL, 1)) {
>> > +       if (fsnotify_add_mark_locked(chunk_entry, old_entry->group,
>> > +                                    old_entry->inode, NULL, 1)) {
>> >                 spin_unlock(&old_entry->lock);
>> > +               mutex_unlock(&old_entry->group->mark_mutex);
>> >                 fsnotify_put_mark(chunk_entry);
>> >                 fsnotify_put_mark(old_entry);
>> >                 return -ENOSPC;
>> > @@ -411,6 +418,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
>> >                 chunk->dead = 1;
>> >                 spin_unlock(&chunk_entry->lock);
>> >                 spin_unlock(&old_entry->lock);
>> > +               mutex_unlock(&old_entry->group->mark_mutex);
>> >
>> >                 fsnotify_destroy_mark(chunk_entry, audit_tree_group);
>> >
>> > @@ -443,6 +451,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
>> >         spin_unlock(&hash_lock);
>> >         spin_unlock(&chunk_entry->lock);
>> >         spin_unlock(&old_entry->lock);
>> > +       mutex_unlock(&old_entry->group->mark_mutex);
>> >         fsnotify_destroy_mark(old_entry, audit_tree_group);
>> >         fsnotify_put_mark(chunk_entry); /* drop initial reference */
>> >         fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
>> > --
>> > 2.10.2
>> >
Paul Moore Dec. 26, 2016, 4:33 p.m. UTC | #4
On Fri, Dec 23, 2016 at 9:17 AM, Paul Moore <paul@paul-moore.com> wrote:
> On Fri, Dec 23, 2016 at 8:24 AM, Jan Kara <jack@suse.cz> wrote:
>> On Thu 22-12-16 18:18:36, Paul Moore wrote:
>>> On Thu, Dec 22, 2016 at 4:15 AM, Jan Kara <jack@suse.cz> wrote:
>>> > Audit tree code was happily adding new notification marks while holding
>>> > spinlocks. Since fsnotify_add_mark() acquires group->mark_mutex this can
>>> > lead to sleeping while holding a spinlock, deadlocks due to lock
>>> > inversion, and probably other fun. Fix the problem by acquiring
>>> > group->mark_mutex earlier.
>>> >
>>> > CC: Paul Moore <paul@paul-moore.com>
>>> > Signed-off-by: Jan Kara <jack@suse.cz>
>>> > ---
>>> >  kernel/audit_tree.c | 13 +++++++++++--
>>> >  1 file changed, 11 insertions(+), 2 deletions(-)
>>>
>>> [SIDE NOTE: this patch explains your comments and my earlier concern
>>> about the locked/unlocked variants of fsnotify_add_mark() in
>>> untag_chunk()]
>>>
>>> Ouch.  Thanks for catching this ... what is your goal with these
>>> patches, are you targeting this as a fix during the v4.10-rcX cycle?
>>> If not, any objections if I pull this patch into the audit tree and
>>> send this to Linus during the v4.10-rcX cycle (assuming it passes
>>> testing, yadda yadda)?
>>
>> Sure, go ahead. I plan these patches for the next merge window. So I can
>> rebase the series once you merge audit fixes...
>
> Okay, great.  I'll merge this patch in the audit/stable-4.10 branch
> for Linus but there will likely be some delays due to
> holidays/vacation on my end.
>
> Thanks again for your help fixing this, I really appreciate it.

I merged this patch, as well as the "Remove fsnotify_duplicate_mark()"
patch (to make things cleaner when merging this patch) and did a quick
test using the audit-testsuite ... the test hung on the "file_create"
tests.  Unfortunately, I'm traveling right now for the holidays and
will not likely have a chance to debug this much further until after
the new year, but I thought I would mention it in case you had some
time to look into this failure.

For reference, here is the audit-testsuite again:

* https://github.com/linux-audit/audit-testsuite

... and if you have a Fedora test system, here is the Rawhide kernel I
used to test (it is basically my kernel-secnext test kernel with those
two patches mentioned above added on top):

* https://copr.fedorainfracloud.org/coprs/pcmoore/kernel-testing/build/492386
diff mbox

Patch

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index f3130eb0a4bd..156b6a93f4fc 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -231,6 +231,7 @@  static void untag_chunk(struct node *p)
 	if (size)
 		new = alloc_chunk(size);
 
+	mutex_lock(&entry->group->mark_mutex);
 	spin_lock(&entry->lock);
 	if (chunk->dead || !entry->inode) {
 		spin_unlock(&entry->lock);
@@ -258,7 +259,8 @@  static void untag_chunk(struct node *p)
 	if (!new)
 		goto Fallback;
 
-	if (fsnotify_add_mark(&new->mark, entry->group, entry->inode, NULL, 1)) {
+	if (fsnotify_add_mark_locked(&new->mark, entry->group, entry->inode,
+				     NULL, 1)) {
 		fsnotify_put_mark(&new->mark);
 		goto Fallback;
 	}
@@ -309,6 +311,7 @@  static void untag_chunk(struct node *p)
 	spin_unlock(&hash_lock);
 	spin_unlock(&entry->lock);
 out:
+	mutex_unlock(&entry->group->mark_mutex);
 	fsnotify_put_mark(entry);
 	spin_lock(&hash_lock);
 }
@@ -385,17 +388,21 @@  static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 
 	chunk_entry = &chunk->mark;
 
+	mutex_lock(&old_entry->group->mark_mutex);
 	spin_lock(&old_entry->lock);
 	if (!old_entry->inode) {
 		/* old_entry is being shot, lets just lie */
 		spin_unlock(&old_entry->lock);
+		mutex_unlock(&old_entry->group->mark_mutex);
 		fsnotify_put_mark(old_entry);
 		free_chunk(chunk);
 		return -ENOENT;
 	}
 
-	if (fsnotify_add_mark(chunk_entry, old_entry->group, old_entry->inode, NULL, 1)) {
+	if (fsnotify_add_mark_locked(chunk_entry, old_entry->group,
+				     old_entry->inode, NULL, 1)) {
 		spin_unlock(&old_entry->lock);
+		mutex_unlock(&old_entry->group->mark_mutex);
 		fsnotify_put_mark(chunk_entry);
 		fsnotify_put_mark(old_entry);
 		return -ENOSPC;
@@ -411,6 +418,7 @@  static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 		chunk->dead = 1;
 		spin_unlock(&chunk_entry->lock);
 		spin_unlock(&old_entry->lock);
+		mutex_unlock(&old_entry->group->mark_mutex);
 
 		fsnotify_destroy_mark(chunk_entry, audit_tree_group);
 
@@ -443,6 +451,7 @@  static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 	spin_unlock(&hash_lock);
 	spin_unlock(&chunk_entry->lock);
 	spin_unlock(&old_entry->lock);
+	mutex_unlock(&old_entry->group->mark_mutex);
 	fsnotify_destroy_mark(old_entry, audit_tree_group);
 	fsnotify_put_mark(chunk_entry);	/* drop initial reference */
 	fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */