[10/10] audit: Replace chunk attached to mark instead of replacing mark
diff mbox

Message ID 20180710100217.12866-11-jack@suse.cz
State New
Headers show

Commit Message

Jan Kara July 10, 2018, 10:02 a.m. UTC
Audit tree code currently associates new fsnotify mark with each new
chunk. As chunk attached to an inode is replaced when new tag is added /
removed, we also need to remove old fsnotify mark and add a new one on
such occasion.  This is cumbersome and makes locking rules somewhat
difficult to follow.

Fix these problems by allocating fsnotify mark independently of chunk
and keeping it all the time while there is some chunk attached to an
inode. Also add documentation about the locking rules so that things are
easier to follow.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 kernel/audit_tree.c | 163 +++++++++++++++++++++++++++-------------------------
 1 file changed, 85 insertions(+), 78 deletions(-)

Comments

Amir Goldstein July 11, 2018, 2:17 p.m. UTC | #1
On Tue, Jul 10, 2018 at 1:02 PM, Jan Kara <jack@suse.cz> wrote:
> Audit tree code currently associates new fsnotify mark with each new
> chunk. As chunk attached to an inode is replaced when new tag is added /
> removed, we also need to remove old fsnotify mark and add a new one on
> such occasion.  This is cumbersome and makes locking rules somewhat
> difficult to follow.
>
> Fix these problems by allocating fsnotify mark independently of chunk
> and keeping it all the time while there is some chunk attached to an
> inode. Also add documentation about the locking rules so that things are
> easier to follow.
>
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---

Well, if there are bugs here I can't find them, but they sure have a lot of
places to hide...

Cheers,
Amir.
Paul Moore July 27, 2018, 4:47 a.m. UTC | #2
On Tue, Jul 10, 2018 at 6:02 AM Jan Kara <jack@suse.cz> wrote:
> Audit tree code currently associates new fsnotify mark with each new
> chunk. As chunk attached to an inode is replaced when new tag is added /
> removed, we also need to remove old fsnotify mark and add a new one on
> such occasion.  This is cumbersome and makes locking rules somewhat
> difficult to follow.
>
> Fix these problems by allocating fsnotify mark independently of chunk
> and keeping it all the time while there is some chunk attached to an
> inode. Also add documentation about the locking rules so that things are
> easier to follow.
>
> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
>  kernel/audit_tree.c | 163 +++++++++++++++++++++++++++-------------------------
>  1 file changed, 85 insertions(+), 78 deletions(-)

This is a really nice improvement, thanks!

> diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
> index aec9b27a20ff..40f61de77dd0 100644
> --- a/kernel/audit_tree.c
> +++ b/kernel/audit_tree.c
> @@ -272,6 +273,20 @@ static struct audit_chunk *find_chunk(struct node *p)
>         return container_of(p, struct audit_chunk, owners[0]);
>  }
>
> +static void replace_mark_chunk(struct fsnotify_mark *entry,
> +                              struct audit_chunk *chunk)
> +{
> +       struct audit_chunk *old;
> +
> +       assert_spin_locked(&hash_lock);
> +       old = AUDIT_M(entry)->chunk;
> +       AUDIT_M(entry)->chunk = chunk;
> +       if (chunk)
> +               chunk->mark = entry;
> +       if (old)
> +               old->mark = NULL;

Is it necessary that we check to see if chunk and old are non-NULL?
It seems like we would always want to set chunk->mark to entry and set
old->mark to NULL, yes?

> @@ -321,29 +341,31 @@ static void untag_chunk(struct node *p)
>
>         mutex_lock(&entry->group->mark_mutex);
>         /*
> -        * mark_mutex protects mark from getting detached and thus also from
> -        * mark->connector->obj getting NULL.
> +        * mark_mutex protects mark stabilizes chunk attached to the mark so we
> +        * can check whether it didn't change while we've dropped hash_lock.

I think your new text could use some revision, the "protects mark
stabilizes chunk" is odd.

>          */
> -       if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
> +       if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
> +           AUDIT_M(entry)->chunk != chunk) {
>                 mutex_unlock(&entry->group->mark_mutex);
>                 if (new)
> -                       fsnotify_put_mark(new->mark);
> +                       kfree(new);

Since we are just calling kfree() now we can do away with the "if (new)" check.

--
paul moore
www.paul-moore.com
Jan Kara Sept. 4, 2018, 2:11 p.m. UTC | #3
On Fri 27-07-18 00:47:42, Paul Moore wrote:
> On Tue, Jul 10, 2018 at 6:02 AM Jan Kara <jack@suse.cz> wrote:
> > diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
> > index aec9b27a20ff..40f61de77dd0 100644
> > --- a/kernel/audit_tree.c
> > +++ b/kernel/audit_tree.c
> > @@ -272,6 +273,20 @@ static struct audit_chunk *find_chunk(struct node *p)
> >         return container_of(p, struct audit_chunk, owners[0]);
> >  }
> >
> > +static void replace_mark_chunk(struct fsnotify_mark *entry,
> > +                              struct audit_chunk *chunk)
> > +{
> > +       struct audit_chunk *old;
> > +
> > +       assert_spin_locked(&hash_lock);
> > +       old = AUDIT_M(entry)->chunk;
> > +       AUDIT_M(entry)->chunk = chunk;
> > +       if (chunk)
> > +               chunk->mark = entry;
> > +       if (old)
> > +               old->mark = NULL;
> 
> Is it necessary that we check to see if chunk and old are non-NULL?
> It seems like we would always want to set chunk->mark to entry and set
> old->mark to NULL, yes?

Both checks are needed - 'old' can be NULL if we use replace_mark_chunk()
to attach first chunk to mark. 'chunk' can be NULL if we use
replace_mark_chunk() to detach mark from current chunk when destroying it.

> > @@ -321,29 +341,31 @@ static void untag_chunk(struct node *p)
> >
> >         mutex_lock(&entry->group->mark_mutex);
> >         /*
> > -        * mark_mutex protects mark from getting detached and thus also from
> > -        * mark->connector->obj getting NULL.
> > +        * mark_mutex protects mark stabilizes chunk attached to the mark so we
> > +        * can check whether it didn't change while we've dropped hash_lock.
> 
> I think your new text could use some revision, the "protects mark
> stabilizes chunk" is odd.

Yup, I'll fix that.

> >          */
> > -       if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
> > +       if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
> > +           AUDIT_M(entry)->chunk != chunk) {
> >                 mutex_unlock(&entry->group->mark_mutex);
> >                 if (new)
> > -                       fsnotify_put_mark(new->mark);
> > +                       kfree(new);
> 
> Since we are just calling kfree() now we can do away with the "if (new)"
> check.

Right, I'll do that.

								Honza

Patch
diff mbox

diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index aec9b27a20ff..40f61de77dd0 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -27,7 +27,6 @@  struct audit_chunk {
 	unsigned long key;
 	struct fsnotify_mark *mark;
 	struct list_head trees;		/* with root here */
-	int dead;
 	int count;
 	atomic_long_t refs;
 	struct rcu_head head;
@@ -48,8 +47,15 @@  static LIST_HEAD(prune_list);
 static struct task_struct *prune_thread;
 
 /*
- * One struct chunk is attached to each inode of interest.
- * We replace struct chunk on tagging/untagging.
+ * One struct chunk is attached to each inode of interest through
+ * audit_tree_mark (fsnotify mark). We replace struct chunk on tagging /
+ * untagging, the mark is stable as long as there is chunk attached. The
+ * association between mark and chunk is protected by hash_lock and
+ * audit_tree_group->mark_mutex. Thus as long as we hold
+ * audit_tree_group->mark_mutex and check that the mark is alive by
+ * FSNOTIFY_MARK_FLAG_ATTACHED flag check, we are sure the mark points to
+ * the current chunk.
+ *
  * Rules have pointer to struct audit_tree.
  * Rules have struct list_head rlist forming a list of rules over
  * the same tree.
@@ -68,8 +74,12 @@  static struct task_struct *prune_thread;
  * tree is refcounted; one reference for "some rules on rules_list refer to
  * it", one for each chunk with pointer to it.
  *
- * chunk is refcounted by embedded fsnotify_mark + .refs (non-zero refcount
- * of watch contributes 1 to .refs).
+ * chunk is refcounted by embedded .refs. Mark associated with the chunk holds
+ * one chunk reference. This reference is dropped either when a mark is going
+ * to be freed (corresponding inode goes away) or when chunk attached to the
+ * mark gets replaced. This reference must be dropped using
+ * audit_mark_put_chunk() to make sure the reference is dropped only after RCU
+ * grace period as it protects RCU readers of the hash table.
  *
  * node.index allows to get from node.list to containing chunk.
  * MSB of that sucker is stolen to mark taggings that we might have to
@@ -155,8 +165,6 @@  static inline struct audit_tree_mark *AUDIT_M(struct fsnotify_mark *entry)
 
 static void audit_tree_destroy_watch(struct fsnotify_mark *entry)
 {
-	struct audit_chunk *chunk = AUDIT_M(entry)->chunk;
-	audit_mark_put_chunk(chunk);
 	kmem_cache_free(audit_tree_mark_cachep, entry);
 }
 
@@ -183,13 +191,6 @@  static struct audit_chunk *alloc_chunk(int count)
 	if (!chunk)
 		return NULL;
 
-	chunk->mark = alloc_fsnotify_mark();
-	if (!chunk->mark) {
-		kfree(chunk);
-		return NULL;
-	}
-	AUDIT_M(chunk->mark)->chunk = chunk;
-
 	INIT_LIST_HEAD(&chunk->hash);
 	INIT_LIST_HEAD(&chunk->trees);
 	chunk->count = count;
@@ -272,6 +273,20 @@  static struct audit_chunk *find_chunk(struct node *p)
 	return container_of(p, struct audit_chunk, owners[0]);
 }
 
+static void replace_mark_chunk(struct fsnotify_mark *entry,
+			       struct audit_chunk *chunk)
+{
+	struct audit_chunk *old;
+
+	assert_spin_locked(&hash_lock);
+	old = AUDIT_M(entry)->chunk;
+	AUDIT_M(entry)->chunk = chunk;
+	if (chunk)
+		chunk->mark = entry;
+	if (old)
+		old->mark = NULL;
+}
+
 static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
 			  struct node *skip)
 {
@@ -295,6 +310,7 @@  static void replace_chunk(struct audit_chunk *new, struct audit_chunk *old,
 		get_tree(owner);
 		list_replace_init(&old->owners[j].list, &new->owners[i].list);
 	}
+	replace_mark_chunk(old->mark, new);
 	/*
 	 * Make sure chunk is fully initialized before making it visible in the
 	 * hash. Pairs with a data dependency barrier in READ_ONCE() in
@@ -312,6 +328,10 @@  static void untag_chunk(struct node *p)
 	struct audit_tree *owner;
 	int size = chunk->count - 1;
 
+	/* Racing with audit_tree_freeing_mark()? */
+	if (!entry)
+		return;
+
 	fsnotify_get_mark(entry);
 
 	spin_unlock(&hash_lock);
@@ -321,29 +341,31 @@  static void untag_chunk(struct node *p)
 
 	mutex_lock(&entry->group->mark_mutex);
 	/*
-	 * mark_mutex protects mark from getting detached and thus also from
-	 * mark->connector->obj getting NULL.
+	 * mark_mutex protects mark stabilizes chunk attached to the mark so we
+	 * can check whether it didn't change while we've dropped hash_lock.
 	 */
-	if (chunk->dead || !(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
+	if (!(entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED) ||
+	    AUDIT_M(entry)->chunk != chunk) {
 		mutex_unlock(&entry->group->mark_mutex);
 		if (new)
-			fsnotify_put_mark(new->mark);
+			kfree(new);
 		goto out;
 	}
 
 	owner = p->owner;
 
 	if (!size) {
-		chunk->dead = 1;
 		spin_lock(&hash_lock);
 		list_del_init(&chunk->trees);
 		if (owner->root == chunk)
 			owner->root = NULL;
 		list_del_init(&p->list);
 		list_del_rcu(&chunk->hash);
+		replace_mark_chunk(entry, NULL);
 		spin_unlock(&hash_lock);
 		fsnotify_detach_mark(entry);
 		mutex_unlock(&entry->group->mark_mutex);
+		audit_mark_put_chunk(chunk);
 		fsnotify_free_mark(entry);
 		goto out;
 	}
@@ -351,13 +373,6 @@  static void untag_chunk(struct node *p)
 	if (!new)
 		goto Fallback;
 
-	if (fsnotify_add_mark_locked(new->mark, entry->connector->obj,
-				     FSNOTIFY_OBJ_TYPE_INODE, 1)) {
-		fsnotify_put_mark(new->mark);
-		goto Fallback;
-	}
-
-	chunk->dead = 1;
 	spin_lock(&hash_lock);
 	if (owner->root == chunk) {
 		list_del_init(&owner->same_root);
@@ -370,10 +385,8 @@  static void untag_chunk(struct node *p)
 	 */
 	replace_chunk(new, chunk, p);
 	spin_unlock(&hash_lock);
-	fsnotify_detach_mark(entry);
 	mutex_unlock(&entry->group->mark_mutex);
-	fsnotify_free_mark(entry);
-	fsnotify_put_mark(new->mark);	/* drop initial reference */
+	audit_mark_put_chunk(chunk);
 	goto out;
 
 Fallback:
@@ -404,23 +417,31 @@  static int create_chunk(struct inode *inode, struct audit_tree *tree)
 		return -ENOMEM;
 	}
 
-	entry = chunk->mark;
+	entry = alloc_fsnotify_mark();
+	if (!entry) {
+		mutex_unlock(&audit_tree_group->mark_mutex);
+		kfree(chunk);
+		return -ENOMEM;
+	}
+
 	if (fsnotify_add_inode_mark_locked(entry, inode, 0)) {
 		mutex_unlock(&audit_tree_group->mark_mutex);
 		fsnotify_put_mark(entry);
+		kfree(chunk);
 		return -ENOSPC;
 	}
 
 	spin_lock(&hash_lock);
 	if (tree->goner) {
 		spin_unlock(&hash_lock);
-		chunk->dead = 1;
 		fsnotify_detach_mark(entry);
 		mutex_unlock(&audit_tree_group->mark_mutex);
 		fsnotify_free_mark(entry);
 		fsnotify_put_mark(entry);
+		kfree(chunk);
 		return 0;
 	}
+	replace_mark_chunk(entry, chunk);
 	chunk->owners[0].index = (1U << 31);
 	chunk->owners[0].owner = tree;
 	get_tree(tree);
@@ -437,33 +458,41 @@  static int create_chunk(struct inode *inode, struct audit_tree *tree)
 	insert_hash(chunk);
 	spin_unlock(&hash_lock);
 	mutex_unlock(&audit_tree_group->mark_mutex);
-	fsnotify_put_mark(entry);	/* drop initial reference */
+	/*
+	 * Drop our initial reference. When mark we point to is getting freed,
+	 * we get notification through ->freeing_mark callback and cleanup
+	 * chunk pointing to this mark.
+	 */
+	fsnotify_put_mark(entry);
 	return 0;
 }
 
 /* the first tagged inode becomes root of tree */
 static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 {
-	struct fsnotify_mark *old_entry, *chunk_entry;
+	struct fsnotify_mark *entry;
 	struct audit_chunk *chunk, *old;
 	struct node *p;
 	int n;
 
 	mutex_lock(&audit_tree_group->mark_mutex);
-	old_entry = fsnotify_find_mark(&inode->i_fsnotify_marks,
-				       audit_tree_group);
-	if (!old_entry)
+	entry = fsnotify_find_mark(&inode->i_fsnotify_marks, audit_tree_group);
+	if (!entry)
 		return create_chunk(inode, tree);
 
-	old = AUDIT_M(old_entry)->chunk;
-
+	/*
+	 * Found mark is guaranteed to be attached and mark_mutex protects mark
+	 * from getting detached and thus it makes sure there is chunk attached
+	 * to the mark.
+	 */
 	/* are we already there? */
 	spin_lock(&hash_lock);
+	old = AUDIT_M(entry)->chunk;
 	for (n = 0; n < old->count; n++) {
 		if (old->owners[n].owner == tree) {
 			spin_unlock(&hash_lock);
 			mutex_unlock(&audit_tree_group->mark_mutex);
-			fsnotify_put_mark(old_entry);
+			fsnotify_put_mark(entry);
 			return 0;
 		}
 	}
@@ -472,41 +501,16 @@  static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 	chunk = alloc_chunk(old->count + 1);
 	if (!chunk) {
 		mutex_unlock(&audit_tree_group->mark_mutex);
-		fsnotify_put_mark(old_entry);
+		fsnotify_put_mark(entry);
 		return -ENOMEM;
 	}
 
-	chunk_entry = chunk->mark;
-
-	/*
-	 * mark_mutex protects mark from getting detached and thus also from
-	 * mark->connector->obj getting NULL.
-	 */
-	if (!(old_entry->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) {
-		/* old_entry is being shot, lets just lie */
-		mutex_unlock(&audit_tree_group->mark_mutex);
-		fsnotify_put_mark(old_entry);
-		fsnotify_put_mark(chunk->mark);
-		return -ENOENT;
-	}
-
-	if (fsnotify_add_mark_locked(chunk_entry, old_entry->connector->obj,
-				     FSNOTIFY_OBJ_TYPE_INODE, 1)) {
-		mutex_unlock(&audit_tree_group->mark_mutex);
-		fsnotify_put_mark(chunk_entry);
-		fsnotify_put_mark(old_entry);
-		return -ENOSPC;
-	}
-
 	spin_lock(&hash_lock);
 	if (tree->goner) {
 		spin_unlock(&hash_lock);
-		chunk->dead = 1;
-		fsnotify_detach_mark(chunk_entry);
 		mutex_unlock(&audit_tree_group->mark_mutex);
-		fsnotify_free_mark(chunk_entry);
-		fsnotify_put_mark(chunk_entry);
-		fsnotify_put_mark(old_entry);
+		fsnotify_put_mark(entry);
+		kfree(chunk);
 		return 0;
 	}
 	p = &chunk->owners[chunk->count - 1];
@@ -514,7 +518,6 @@  static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 	p->owner = tree;
 	get_tree(tree);
 	list_add(&p->list, &tree->chunks);
-	old->dead = 1;
 	if (!tree->root) {
 		tree->root = chunk;
 		list_add(&tree->same_root, &chunk->trees);
@@ -525,11 +528,10 @@  static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 	 */
 	replace_chunk(chunk, old, NULL);
 	spin_unlock(&hash_lock);
-	fsnotify_detach_mark(old_entry);
 	mutex_unlock(&audit_tree_group->mark_mutex);
-	fsnotify_free_mark(old_entry);
-	fsnotify_put_mark(chunk_entry);	/* drop initial reference */
-	fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */
+	fsnotify_put_mark(entry); /* pair to fsnotify_find mark_entry */
+	audit_mark_put_chunk(old);
+
 	return 0;
 }
 
@@ -996,10 +998,6 @@  static void evict_chunk(struct audit_chunk *chunk)
 	int need_prune = 0;
 	int n;
 
-	if (chunk->dead)
-		return;
-
-	chunk->dead = 1;
 	mutex_lock(&audit_filter_mutex);
 	spin_lock(&hash_lock);
 	while (!list_empty(&chunk->trees)) {
@@ -1038,9 +1036,18 @@  static int audit_tree_handle_event(struct fsnotify_group *group,
 
 static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify_group *group)
 {
-	struct audit_chunk *chunk = AUDIT_M(entry)->chunk;
+	struct audit_chunk *chunk;
 
-	evict_chunk(chunk);
+	mutex_lock(&entry->group->mark_mutex);
+	spin_lock(&hash_lock);
+	chunk = AUDIT_M(entry)->chunk;
+	replace_mark_chunk(entry, NULL);
+	spin_unlock(&hash_lock);
+	mutex_unlock(&entry->group->mark_mutex);
+	if (chunk) {
+		evict_chunk(chunk);
+		audit_mark_put_chunk(chunk);
+	}
 
 	/*
 	 * We are guaranteed to have at least one reference to the mark from