Message ID | 20170328161332.29736-21-jack@suse.cz (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Tue, Mar 28, 2017 at 06:13:19PM +0200, Jan Kara wrote: > Instead of removing mark from object list from fsnotify_detach_mark(), > remove the mark when last reference to the mark is dropped. This will > allow fanotify to wait for userspace response to event without having to > hold onto fsnotify_mark_srcu. > > To avoid pinning inodes by elevated refcount (and thus e.g. delaying > file deletion) while someone holds mark reference, we detach connector > from the object also from fsnotify_destroy_marks() and not only after > removing last mark from the list as it was now. > > Reviewed-by: Amir Goldstein <amir73il@gmail.com> > Signed-off-by: Jan Kara <jack@suse.cz> > --- > fs/notify/mark.c | 185 ++++++++++++++++++++++++--------------- > include/linux/fsnotify_backend.h | 4 +- > kernel/audit_tree.c | 24 ++--- > 3 files changed, 121 insertions(+), 92 deletions(-) > > diff --git a/fs/notify/mark.c b/fs/notify/mark.c > index 2bd9cdb094e1..7eef1824a76c 100644 > --- a/fs/notify/mark.c > +++ b/fs/notify/mark.c > @@ -49,7 +49,13 @@ > * > * A list of notification marks relating to inode / mnt is contained in > * fsnotify_mark_connector. That structure is alive as long as there are any > - * marks in the list and is also protected by fsnotify_mark_srcu. > + * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets > + * detached from fsnotify_mark_connector when last reference to the mark is > + * dropped. Thus having mark reference is enough to protect mark->connector > + * pointer and to make sure fsnotify_mark_connector cannot disappear. Also > + * because we remove mark from g_list before dropping mark reference associated > + * with that, any mark found through g_list is guaranteed to have > + * mark->connector set until we drop group->mark_mutex. > * > * LIFETIME: > * Inode marks survive between when they are added to an inode and when their > @@ -103,26 +109,16 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) > atomic_inc(&mark->refcnt); > } > > -void fsnotify_put_mark(struct fsnotify_mark *mark) > -{ > - if (atomic_dec_and_test(&mark->refcnt)) { > - spin_lock(&destroy_lock); > - list_add(&mark->g_list, &destroy_list); > - spin_unlock(&destroy_lock); > - queue_delayed_work(system_unbound_wq, &reaper_work, > - FSNOTIFY_REAPER_DELAY); > - } > -} > - > static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) > { > u32 new_mask = 0; > struct fsnotify_mark *mark; > > assert_spin_locked(&conn->lock); > - hlist_for_each_entry(mark, &conn->list, obj_list) > - new_mask |= mark->mask; > - > + hlist_for_each_entry(mark, &conn->list, obj_list) { > + if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) > + new_mask |= mark->mask; > + } > if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) > conn->inode->i_fsnotify_mask = new_mask; > else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) > @@ -131,8 +127,9 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) > > /* > * Calculate mask of events for a list of marks. The caller must make sure > - * connector cannot disappear under us (usually by holding a mark->lock or > - * mark->group->mark_mutex for a mark on this list). > + * connector and connector->inode cannot disappear under us. Callers achieve > + * this by holding a mark->lock or mark->group->mark_mutex for a mark on this > + * list. > */ > void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) > { > @@ -164,30 +161,59 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) > } > } > > -static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark) > +static struct inode *fsnotify_detach_connector_from_object( > + struct fsnotify_mark_connector *conn) > +{ > + struct inode *inode = NULL; > + > + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { > + inode = conn->inode; > + rcu_assign_pointer(inode->i_fsnotify_marks, NULL); > + inode->i_fsnotify_mask = 0; > + conn->inode = NULL; > + conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; > + } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { > + rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks, > + NULL); > + real_mount(conn->mnt)->mnt_fsnotify_mask = 0; > + conn->mnt = NULL; > + conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; > + } > + > + return inode; > +} Could this helper been added in the previous patch where the code was introduced? > + > +static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) > +{ > + if (mark->group) > + fsnotify_put_group(mark->group); > + mark->free_mark(mark); > +} > + > +void fsnotify_put_mark(struct fsnotify_mark *mark) > { > struct fsnotify_mark_connector *conn; > struct inode *inode = NULL; > bool free_conn = false; > > + /* Catch marks that were actually never attached to object */ > + if (!mark->connector) { > + if (atomic_dec_and_test(&mark->refcnt)) > + fsnotify_final_mark_destroy(mark); > + return; > + } > + > + /* > + * We have to be careful so that traversals of obj_list under lock can > + * safely grab mark reference. > + */ > + if (!atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock)) > + return; > + > conn = mark->connector; > - spin_lock(&conn->lock); > hlist_del_init_rcu(&mark->obj_list); > if (hlist_empty(&conn->list)) { > - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { > - inode = conn->inode; > - rcu_assign_pointer(inode->i_fsnotify_marks, NULL); > - inode->i_fsnotify_mask = 0; > - conn->inode = NULL; > - conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; > - } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { > - rcu_assign_pointer( > - real_mount(conn->mnt)->mnt_fsnotify_marks, > - NULL); > - real_mount(conn->mnt)->mnt_fsnotify_mask = 0; > - conn->mnt = NULL; > - conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; > - } > + inode = fsnotify_detach_connector_from_object(conn); > free_conn = true; > } else { > __fsnotify_recalc_mask(conn); > @@ -195,6 +221,9 @@ static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark) > mark->connector = NULL; > spin_unlock(&conn->lock); > > + if (inode) > + iput(inode); > + iput() checks for non-NULL inode. > if (free_conn) { > spin_lock(&destroy_lock); > conn->destroy_next = connector_destroy_list; > @@ -202,20 +231,31 @@ static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark) > spin_unlock(&destroy_lock); > queue_work(system_unbound_wq, &connector_reaper_work); > } > - > - return inode; > + /* > + * Note that we didn't update flags telling whether inode cares about > + * what's happening with children. We update these flags from > + * __fsnotify_parent() lazily when next event happens on one of our > + * children. > + */ > + spin_lock(&destroy_lock); > + list_add(&mark->g_list, &destroy_list); > + spin_unlock(&destroy_lock); > + queue_delayed_work(system_unbound_wq, &reaper_work, > + FSNOTIFY_REAPER_DELAY); > } > > /* > - * Remove mark from inode / vfsmount list, group list, drop inode reference > - * if we got one. > + * Mark mark as detached, remove it from group list. Mark still stays in object > + * list until its last reference is dropped. Note that we rely on mark being > + * removed from group list before corresponding reference to it is dropped. In > + * particular we rely on mark->connector being valid while we hold > + * group->mark_mutex if we found the mark through g_list. > * > * Must be called with group->mark_mutex held. The caller must either hold > * reference to the mark or be protected by fsnotify_mark_srcu. > */ > void fsnotify_detach_mark(struct fsnotify_mark *mark) > { > - struct inode *inode = NULL; > struct fsnotify_group *group = mark->group; > > WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); > @@ -224,31 +264,15 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) > !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); > > spin_lock(&mark->lock); > - > /* something else already called this function on this mark */ > if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { > spin_unlock(&mark->lock); > return; > } > - > mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; > - > - inode = fsnotify_detach_from_object(mark); > - > - /* > - * Note that we didn't update flags telling whether inode cares about > - * what's happening with children. We update these flags from > - * __fsnotify_parent() lazily when next event happens on one of our > - * children. > - */ > - > list_del_init(&mark->g_list); > - > spin_unlock(&mark->lock); > > - if (inode) > - iput(inode); > - > atomic_dec(&group->num_marks); > > /* Drop mark reference acquired in fsnotify_add_mark_locked() */ > @@ -448,7 +472,9 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, > hlist_for_each_entry(lmark, &conn->list, obj_list) { > last = lmark; > > - if ((lmark->group == mark->group) && !allow_dups) { > + if ((lmark->group == mark->group) && > + (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && > + !allow_dups) { > err = -EEXIST; > goto out_err; > } > @@ -499,7 +525,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, > mark->group = group; > list_add(&mark->g_list, &group->marks_list); > atomic_inc(&group->num_marks); > - fsnotify_get_mark(mark); /* for i_list and g_list */ > + fsnotify_get_mark(mark); /* for g_list */ > spin_unlock(&mark->lock); > > ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups); > @@ -547,7 +573,8 @@ struct fsnotify_mark *fsnotify_find_mark( > return NULL; > > hlist_for_each_entry(mark, &conn->list, obj_list) { > - if (mark->group == group) { > + if (mark->group == group && > + (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { > fsnotify_get_mark(mark); > spin_unlock(&conn->lock); > return mark; > @@ -627,23 +654,39 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group) > void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp) > { > struct fsnotify_mark_connector *conn; > - struct fsnotify_mark *mark; > + struct fsnotify_mark *mark, *old_mark = NULL; > + struct inode *inode; > > - while ((conn = fsnotify_grab_connector(connp))) { > - /* > - * We have to be careful since we can race with e.g. > - * fsnotify_clear_marks_by_group() and once we drop the list > - * lock, mark can get removed from the obj_list and destroyed. > - * But we are holding mark reference so mark cannot be freed > - * and calling fsnotify_destroy_mark() more than once is fine. > - */ > - mark = hlist_entry(conn->list.first, struct fsnotify_mark, > - obj_list); > + conn = fsnotify_grab_connector(connp); > + if (!conn) > + return; > + /* > + * We have to be careful since we can race with e.g. > + * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the > + * list can get modified. However we are holding mark reference and > + * thus our mark cannot be removed from obj_list so we can continue > + * iteration after regaining conn->lock. > + */ > + hlist_for_each_entry(mark, &conn->list, obj_list) { > fsnotify_get_mark(mark); > spin_unlock(&conn->lock); > + if (old_mark) > + fsnotify_put_mark(old_mark); > + old_mark = mark; > fsnotify_destroy_mark(mark, mark->group); > - fsnotify_put_mark(mark); > + spin_lock(&conn->lock); > } > + /* > + * Detach list from object now so that we don't pin inode until all > + * mark references get dropped. It would lead to strange results such > + * as delaying inode deletion or blocking unmount. > + */ > + inode = fsnotify_detach_connector_from_object(conn); > + spin_unlock(&conn->lock); > + if (old_mark) > + fsnotify_put_mark(old_mark); > + if (inode) > + iput(inode); > } > > /* > @@ -676,9 +719,7 @@ void fsnotify_mark_destroy_list(void) > > list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { > list_del_init(&mark->g_list); > - if (mark->group) > - fsnotify_put_group(mark->group); > - mark->free_mark(mark); > + fsnotify_final_mark_destroy(mark); > } > } > > diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h > index 84d71b6f75f6..a483614b25d0 100644 > --- a/include/linux/fsnotify_backend.h > +++ b/include/linux/fsnotify_backend.h > @@ -245,9 +245,9 @@ struct fsnotify_mark { > struct list_head g_list; > /* Protects inode / mnt pointers, flags, masks */ > spinlock_t lock; > - /* List of marks for inode / vfsmount [connector->lock] */ > + /* List of marks for inode / vfsmount [connector->lock, mark ref] */ > struct hlist_node obj_list; > - /* Head of list of marks for an object [mark->lock, group->mark_mutex] */ > + /* Head of list of marks for an object [mark ref] */ > struct fsnotify_mark_connector *connector; > /* Events types to ignore [mark->lock, group->mark_mutex] */ > __u32 ignored_mask; > diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c > index c0e494fd8eca..152400e8d077 100644 > --- a/kernel/audit_tree.c > +++ b/kernel/audit_tree.c > @@ -172,27 +172,15 @@ static unsigned long inode_to_key(const struct inode *inode) > /* > * Function to return search key in our hash from chunk. Key 0 is special and > * should never be present in the hash. > - * > - * Must be called with chunk->mark.lock held to protect from connector > - * becoming NULL. > */ > -static unsigned long __chunk_to_key(struct audit_chunk *chunk) > +static unsigned long chunk_to_key(struct audit_chunk *chunk) > { > - if (!chunk->mark.connector) > + /* We have a reference to the mark so it should be attached. */ > + if (WARN_ON_ONCE(!chunk->mark.connector)) > return 0; Hmm, lifetime of mark previously: - created but not attached (connector is NULL, no FSNOTIFY_MARK_FLAG_ATTACHED) - attached (connector is non-NULL, FSNOTIFY_MARK_FLAG_ATTACHED) - detached (connector is NULL, no FSNOTIFY_MARK_FLAG_ATTACHED) The created, and attached states remain the same but detached now changed to: - detached (connector is non-NULL, no FSNOTIFY_MARK_FLAG_ATTACHED) Considering that, the warning seems right but the comment above not so. Maybe something like: /* We have a reference to the mark and it has been attached so we should have a valid connector. */ > return (unsigned long)chunk->mark.connector->inode; > } > > -static unsigned long chunk_to_key(struct audit_chunk *chunk) > -{ > - unsigned long key; > - > - spin_lock(&chunk->mark.lock); > - key = __chunk_to_key(chunk); > - spin_unlock(&chunk->mark.lock); > - return key; > -} > - > static inline struct list_head *chunk_hash(unsigned long key) > { > unsigned long n = key / L1_CACHE_BYTES; > @@ -202,7 +190,7 @@ static inline struct list_head *chunk_hash(unsigned long key) > /* hash_lock & entry->lock is held by caller */ > static void insert_hash(struct audit_chunk *chunk) > { > - unsigned long key = __chunk_to_key(chunk); > + unsigned long key = chunk_to_key(chunk); > struct list_head *list; > > if (!key) > @@ -263,7 +251,7 @@ static void untag_chunk(struct node *p) > > mutex_lock(&entry->group->mark_mutex); > spin_lock(&entry->lock); > - if (chunk->dead || !entry->connector) { > + if (chunk->dead || !entry->connector || !entry->connector->inode) { So should we be testing FSNOTIFY_MARK_FLAG_ATTACHED instead? Without understanding what audit is trying to do, that would be a lot more logical. Or maybe there is a reason this is correct, it just needs an explanation. > spin_unlock(&entry->lock); > mutex_unlock(&entry->group->mark_mutex); > if (new) > @@ -423,7 +411,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) > > mutex_lock(&old_entry->group->mark_mutex); > spin_lock(&old_entry->lock); > - if (!old_entry->connector) { > + if (!old_entry->connector || !old_entry->connector->inode) { > /* old_entry is being shot, lets just lie */ > spin_unlock(&old_entry->lock); > mutex_unlock(&old_entry->group->mark_mutex); > -- > 2.10.2 >
On Fri 31-03-17 17:42:17, Miklos Szeredi wrote: > > On Tue, Mar 28, 2017 at 06:13:19PM +0200, Jan Kara wrote: > > Instead of removing mark from object list from fsnotify_detach_mark(), > > remove the mark when last reference to the mark is dropped. This will > > allow fanotify to wait for userspace response to event without having to > > hold onto fsnotify_mark_srcu. > > > > To avoid pinning inodes by elevated refcount (and thus e.g. delaying > > file deletion) while someone holds mark reference, we detach connector > > from the object also from fsnotify_destroy_marks() and not only after > > removing last mark from the list as it was now. > > > > Reviewed-by: Amir Goldstein <amir73il@gmail.com> > > Signed-off-by: Jan Kara <jack@suse.cz> ... > > -static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark) > > +static struct inode *fsnotify_detach_connector_from_object( > > + struct fsnotify_mark_connector *conn) > > +{ > > + struct inode *inode = NULL; > > + > > + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { > > + inode = conn->inode; > > + rcu_assign_pointer(inode->i_fsnotify_marks, NULL); > > + inode->i_fsnotify_mask = 0; > > + conn->inode = NULL; > > + conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; > > + } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { > > + rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks, > > + NULL); > > + real_mount(conn->mnt)->mnt_fsnotify_mask = 0; > > + conn->mnt = NULL; > > + conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; > > + } > > + > > + return inode; > > +} > > Could this helper been added in the previous patch where the code was > introduced? Yeah, possibly. I'll do that. > > @@ -195,6 +221,9 @@ static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark) > > mark->connector = NULL; > > spin_unlock(&conn->lock); > > > > + if (inode) > > + iput(inode); > > + > > iput() checks for non-NULL inode. OK. > > diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c > > index c0e494fd8eca..152400e8d077 100644 > > --- a/kernel/audit_tree.c > > +++ b/kernel/audit_tree.c > > @@ -172,27 +172,15 @@ static unsigned long inode_to_key(const struct inode *inode) > > /* > > * Function to return search key in our hash from chunk. Key 0 is special and > > * should never be present in the hash. > > - * > > - * Must be called with chunk->mark.lock held to protect from connector > > - * becoming NULL. > > */ > > -static unsigned long __chunk_to_key(struct audit_chunk *chunk) > > +static unsigned long chunk_to_key(struct audit_chunk *chunk) > > { > > - if (!chunk->mark.connector) > > + /* We have a reference to the mark so it should be attached. */ > > + if (WARN_ON_ONCE(!chunk->mark.connector)) > > return 0; > > Hmm, lifetime of mark previously: > > - created but not attached (connector is NULL, no FSNOTIFY_MARK_FLAG_ATTACHED) > - attached (connector is non-NULL, FSNOTIFY_MARK_FLAG_ATTACHED) > - detached (connector is NULL, no FSNOTIFY_MARK_FLAG_ATTACHED) > > The created, and attached states remain the same but detached now changed to: > > - detached (connector is non-NULL, no FSNOTIFY_MARK_FLAG_ATTACHED) > > Considering that, the warning seems right but the comment above not so. > Maybe something like: > > /* We have a reference to the mark and it has been attached so we should > have a valid connector. */ Good point. The mark can be attached to group (that is what FSNOTIFY_MARK_FLAG_ATTACHED reflects) and to connector which is what I was speaking about in my comment. And I tend to mix these two in comments which is confusing. I'll clarify. > > @@ -202,7 +190,7 @@ static inline struct list_head *chunk_hash(unsigned long key) > > /* hash_lock & entry->lock is held by caller */ > > static void insert_hash(struct audit_chunk *chunk) > > { > > - unsigned long key = __chunk_to_key(chunk); > > + unsigned long key = chunk_to_key(chunk); > > struct list_head *list; > > > > if (!key) > > @@ -263,7 +251,7 @@ static void untag_chunk(struct node *p) > > > > mutex_lock(&entry->group->mark_mutex); > > spin_lock(&entry->lock); > > - if (chunk->dead || !entry->connector) { > > + if (chunk->dead || !entry->connector || !entry->connector->inode) { > > So should we be testing FSNOTIFY_MARK_FLAG_ATTACHED instead? Without > understanding what audit is trying to do, that would be a lot more > logical. Or maybe there is a reason this is correct, it just needs an > explanation. That's an interesting idea. AFAIU checking FSNOTIFY_MARK_FLAG_ATTACHED should be good and doing so would somewhat simplify the patches as well. I'll try to do that. Honza
On Mon 03-04-17 12:04:06, Jan Kara wrote: > On Fri 31-03-17 17:42:17, Miklos Szeredi wrote: > > > @@ -202,7 +190,7 @@ static inline struct list_head *chunk_hash(unsigned long key) > > > /* hash_lock & entry->lock is held by caller */ > > > static void insert_hash(struct audit_chunk *chunk) > > > { > > > - unsigned long key = __chunk_to_key(chunk); > > > + unsigned long key = chunk_to_key(chunk); > > > struct list_head *list; > > > > > > if (!key) > > > @@ -263,7 +251,7 @@ static void untag_chunk(struct node *p) > > > > > > mutex_lock(&entry->group->mark_mutex); > > > spin_lock(&entry->lock); > > > - if (chunk->dead || !entry->connector) { > > > + if (chunk->dead || !entry->connector || !entry->connector->inode) { > > > > So should we be testing FSNOTIFY_MARK_FLAG_ATTACHED instead? Without > > understanding what audit is trying to do, that would be a lot more > > logical. Or maybe there is a reason this is correct, it just needs an > > explanation. > > That's an interesting idea. AFAIU checking FSNOTIFY_MARK_FLAG_ATTACHED > should be good and doing so would somewhat simplify the patches as well. > I'll try to do that. OK, I did this and also realized that this actually fixes a possible NULL pointer dereference bug introduced by my series (entry->connector->inode can become NULL if 'entry' is already detached but inode is still valid at the time of the check). So I also added a comment about this. Honza
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 2bd9cdb094e1..7eef1824a76c 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -49,7 +49,13 @@ * * A list of notification marks relating to inode / mnt is contained in * fsnotify_mark_connector. That structure is alive as long as there are any - * marks in the list and is also protected by fsnotify_mark_srcu. + * marks in the list and is also protected by fsnotify_mark_srcu. A mark gets + * detached from fsnotify_mark_connector when last reference to the mark is + * dropped. Thus having mark reference is enough to protect mark->connector + * pointer and to make sure fsnotify_mark_connector cannot disappear. Also + * because we remove mark from g_list before dropping mark reference associated + * with that, any mark found through g_list is guaranteed to have + * mark->connector set until we drop group->mark_mutex. * * LIFETIME: * Inode marks survive between when they are added to an inode and when their @@ -103,26 +109,16 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) atomic_inc(&mark->refcnt); } -void fsnotify_put_mark(struct fsnotify_mark *mark) -{ - if (atomic_dec_and_test(&mark->refcnt)) { - spin_lock(&destroy_lock); - list_add(&mark->g_list, &destroy_list); - spin_unlock(&destroy_lock); - queue_delayed_work(system_unbound_wq, &reaper_work, - FSNOTIFY_REAPER_DELAY); - } -} - static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { u32 new_mask = 0; struct fsnotify_mark *mark; assert_spin_locked(&conn->lock); - hlist_for_each_entry(mark, &conn->list, obj_list) - new_mask |= mark->mask; - + hlist_for_each_entry(mark, &conn->list, obj_list) { + if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) + new_mask |= mark->mask; + } if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) conn->inode->i_fsnotify_mask = new_mask; else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) @@ -131,8 +127,9 @@ static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) /* * Calculate mask of events for a list of marks. The caller must make sure - * connector cannot disappear under us (usually by holding a mark->lock or - * mark->group->mark_mutex for a mark on this list). + * connector and connector->inode cannot disappear under us. Callers achieve + * this by holding a mark->lock or mark->group->mark_mutex for a mark on this + * list. */ void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { @@ -164,30 +161,59 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) } } -static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark) +static struct inode *fsnotify_detach_connector_from_object( + struct fsnotify_mark_connector *conn) +{ + struct inode *inode = NULL; + + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { + inode = conn->inode; + rcu_assign_pointer(inode->i_fsnotify_marks, NULL); + inode->i_fsnotify_mask = 0; + conn->inode = NULL; + conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; + } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + rcu_assign_pointer(real_mount(conn->mnt)->mnt_fsnotify_marks, + NULL); + real_mount(conn->mnt)->mnt_fsnotify_mask = 0; + conn->mnt = NULL; + conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; + } + + return inode; +} + +static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) +{ + if (mark->group) + fsnotify_put_group(mark->group); + mark->free_mark(mark); +} + +void fsnotify_put_mark(struct fsnotify_mark *mark) { struct fsnotify_mark_connector *conn; struct inode *inode = NULL; bool free_conn = false; + /* Catch marks that were actually never attached to object */ + if (!mark->connector) { + if (atomic_dec_and_test(&mark->refcnt)) + fsnotify_final_mark_destroy(mark); + return; + } + + /* + * We have to be careful so that traversals of obj_list under lock can + * safely grab mark reference. + */ + if (!atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock)) + return; + conn = mark->connector; - spin_lock(&conn->lock); hlist_del_init_rcu(&mark->obj_list); if (hlist_empty(&conn->list)) { - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { - inode = conn->inode; - rcu_assign_pointer(inode->i_fsnotify_marks, NULL); - inode->i_fsnotify_mask = 0; - conn->inode = NULL; - conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; - } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { - rcu_assign_pointer( - real_mount(conn->mnt)->mnt_fsnotify_marks, - NULL); - real_mount(conn->mnt)->mnt_fsnotify_mask = 0; - conn->mnt = NULL; - conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; - } + inode = fsnotify_detach_connector_from_object(conn); free_conn = true; } else { __fsnotify_recalc_mask(conn); @@ -195,6 +221,9 @@ static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark) mark->connector = NULL; spin_unlock(&conn->lock); + if (inode) + iput(inode); + if (free_conn) { spin_lock(&destroy_lock); conn->destroy_next = connector_destroy_list; @@ -202,20 +231,31 @@ static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark) spin_unlock(&destroy_lock); queue_work(system_unbound_wq, &connector_reaper_work); } - - return inode; + /* + * Note that we didn't update flags telling whether inode cares about + * what's happening with children. We update these flags from + * __fsnotify_parent() lazily when next event happens on one of our + * children. + */ + spin_lock(&destroy_lock); + list_add(&mark->g_list, &destroy_list); + spin_unlock(&destroy_lock); + queue_delayed_work(system_unbound_wq, &reaper_work, + FSNOTIFY_REAPER_DELAY); } /* - * Remove mark from inode / vfsmount list, group list, drop inode reference - * if we got one. + * Mark mark as detached, remove it from group list. Mark still stays in object + * list until its last reference is dropped. Note that we rely on mark being + * removed from group list before corresponding reference to it is dropped. In + * particular we rely on mark->connector being valid while we hold + * group->mark_mutex if we found the mark through g_list. * * Must be called with group->mark_mutex held. The caller must either hold * reference to the mark or be protected by fsnotify_mark_srcu. */ void fsnotify_detach_mark(struct fsnotify_mark *mark) { - struct inode *inode = NULL; struct fsnotify_group *group = mark->group; WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); @@ -224,31 +264,15 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); spin_lock(&mark->lock); - /* something else already called this function on this mark */ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { spin_unlock(&mark->lock); return; } - mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; - - inode = fsnotify_detach_from_object(mark); - - /* - * Note that we didn't update flags telling whether inode cares about - * what's happening with children. We update these flags from - * __fsnotify_parent() lazily when next event happens on one of our - * children. - */ - list_del_init(&mark->g_list); - spin_unlock(&mark->lock); - if (inode) - iput(inode); - atomic_dec(&group->num_marks); /* Drop mark reference acquired in fsnotify_add_mark_locked() */ @@ -448,7 +472,9 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, hlist_for_each_entry(lmark, &conn->list, obj_list) { last = lmark; - if ((lmark->group == mark->group) && !allow_dups) { + if ((lmark->group == mark->group) && + (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && + !allow_dups) { err = -EEXIST; goto out_err; } @@ -499,7 +525,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, mark->group = group; list_add(&mark->g_list, &group->marks_list); atomic_inc(&group->num_marks); - fsnotify_get_mark(mark); /* for i_list and g_list */ + fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups); @@ -547,7 +573,8 @@ struct fsnotify_mark *fsnotify_find_mark( return NULL; hlist_for_each_entry(mark, &conn->list, obj_list) { - if (mark->group == group) { + if (mark->group == group && + (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { fsnotify_get_mark(mark); spin_unlock(&conn->lock); return mark; @@ -627,23 +654,39 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group) void fsnotify_destroy_marks(struct fsnotify_mark_connector __rcu **connp) { struct fsnotify_mark_connector *conn; - struct fsnotify_mark *mark; + struct fsnotify_mark *mark, *old_mark = NULL; + struct inode *inode; - while ((conn = fsnotify_grab_connector(connp))) { - /* - * We have to be careful since we can race with e.g. - * fsnotify_clear_marks_by_group() and once we drop the list - * lock, mark can get removed from the obj_list and destroyed. - * But we are holding mark reference so mark cannot be freed - * and calling fsnotify_destroy_mark() more than once is fine. - */ - mark = hlist_entry(conn->list.first, struct fsnotify_mark, - obj_list); + conn = fsnotify_grab_connector(connp); + if (!conn) + return; + /* + * We have to be careful since we can race with e.g. + * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the + * list can get modified. However we are holding mark reference and + * thus our mark cannot be removed from obj_list so we can continue + * iteration after regaining conn->lock. + */ + hlist_for_each_entry(mark, &conn->list, obj_list) { fsnotify_get_mark(mark); spin_unlock(&conn->lock); + if (old_mark) + fsnotify_put_mark(old_mark); + old_mark = mark; fsnotify_destroy_mark(mark, mark->group); - fsnotify_put_mark(mark); + spin_lock(&conn->lock); } + /* + * Detach list from object now so that we don't pin inode until all + * mark references get dropped. It would lead to strange results such + * as delaying inode deletion or blocking unmount. + */ + inode = fsnotify_detach_connector_from_object(conn); + spin_unlock(&conn->lock); + if (old_mark) + fsnotify_put_mark(old_mark); + if (inode) + iput(inode); } /* @@ -676,9 +719,7 @@ void fsnotify_mark_destroy_list(void) list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { list_del_init(&mark->g_list); - if (mark->group) - fsnotify_put_group(mark->group); - mark->free_mark(mark); + fsnotify_final_mark_destroy(mark); } } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 84d71b6f75f6..a483614b25d0 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -245,9 +245,9 @@ struct fsnotify_mark { struct list_head g_list; /* Protects inode / mnt pointers, flags, masks */ spinlock_t lock; - /* List of marks for inode / vfsmount [connector->lock] */ + /* List of marks for inode / vfsmount [connector->lock, mark ref] */ struct hlist_node obj_list; - /* Head of list of marks for an object [mark->lock, group->mark_mutex] */ + /* Head of list of marks for an object [mark ref] */ struct fsnotify_mark_connector *connector; /* Events types to ignore [mark->lock, group->mark_mutex] */ __u32 ignored_mask; diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index c0e494fd8eca..152400e8d077 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -172,27 +172,15 @@ static unsigned long inode_to_key(const struct inode *inode) /* * Function to return search key in our hash from chunk. Key 0 is special and * should never be present in the hash. - * - * Must be called with chunk->mark.lock held to protect from connector - * becoming NULL. */ -static unsigned long __chunk_to_key(struct audit_chunk *chunk) +static unsigned long chunk_to_key(struct audit_chunk *chunk) { - if (!chunk->mark.connector) + /* We have a reference to the mark so it should be attached. */ + if (WARN_ON_ONCE(!chunk->mark.connector)) return 0; return (unsigned long)chunk->mark.connector->inode; } -static unsigned long chunk_to_key(struct audit_chunk *chunk) -{ - unsigned long key; - - spin_lock(&chunk->mark.lock); - key = __chunk_to_key(chunk); - spin_unlock(&chunk->mark.lock); - return key; -} - static inline struct list_head *chunk_hash(unsigned long key) { unsigned long n = key / L1_CACHE_BYTES; @@ -202,7 +190,7 @@ static inline struct list_head *chunk_hash(unsigned long key) /* hash_lock & entry->lock is held by caller */ static void insert_hash(struct audit_chunk *chunk) { - unsigned long key = __chunk_to_key(chunk); + unsigned long key = chunk_to_key(chunk); struct list_head *list; if (!key) @@ -263,7 +251,7 @@ static void untag_chunk(struct node *p) mutex_lock(&entry->group->mark_mutex); spin_lock(&entry->lock); - if (chunk->dead || !entry->connector) { + if (chunk->dead || !entry->connector || !entry->connector->inode) { spin_unlock(&entry->lock); mutex_unlock(&entry->group->mark_mutex); if (new) @@ -423,7 +411,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree) mutex_lock(&old_entry->group->mark_mutex); spin_lock(&old_entry->lock); - if (!old_entry->connector) { + if (!old_entry->connector || !old_entry->connector->inode) { /* old_entry is being shot, lets just lie */ spin_unlock(&old_entry->lock); mutex_unlock(&old_entry->group->mark_mutex);