Message ID | 20170106104401.5894-10-jack@suse.cz (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Jan 6, 2017 at 12:43 PM, Jan Kara <jack@suse.cz> wrote: > Instead of removing mark from object list from fsnotify_detach_mark(), > remove the mark when last reference to the mark is dropped. This will > allow fanotify to wait for userspace response to event without having to > hold onto fsnotify_mark_srcu. > > Signed-off-by: Jan Kara <jack@suse.cz> Some nits here. and re-echoing of comments from the patch that introduces fsnotify_mark_connector, because IMO, this code makes a good example for these arguments. > --- > fs/notify/mark.c | 198 +++++++++++++++++++++++---------------- > include/linux/fsnotify_backend.h | 4 +- > kernel/audit_tree.c | 20 +--- > 3 files changed, 124 insertions(+), 98 deletions(-) > > diff --git a/fs/notify/mark.c b/fs/notify/mark.c > index 521c4bd9b497..dc94b5df7627 100644 > --- a/fs/notify/mark.c > +++ b/fs/notify/mark.c > @@ -49,7 +49,13 @@ > * > * A list of notification marks relating to inode / mnt is contained in > * fsnotify_mark_connector. That structure is alive as long as there are any marks > - * in the list and is also protected by fsnotify_mark_srcu. > + * in the list and is also protected by fsnotify_mark_srcu. A mark gets > + * detached from fsnotify_mark_connector when last reference to the mark is dropped. > + * Thus having mark reference is enough to protect mark->connector pointer > + * and to make sure fsnotify_mark_connector cannot disappear. Also because we remove > + * mark from g_list before dropping mark reference associated with that, any > + * mark found through g_list is guaranteed to have mark->connector set > + * until we drop group->mark_mutex. > * > * LIFETIME: > * Inode marks survive between when they are added to an inode and when their > @@ -103,17 +109,6 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) > atomic_inc(&mark->refcnt); > } > > -void fsnotify_put_mark(struct fsnotify_mark *mark) > -{ > - if (atomic_dec_and_test(&mark->refcnt)) { > - spin_lock(&destroy_lock); > - list_add(&mark->g_list, &destroy_list); > - spin_unlock(&destroy_lock); > - queue_delayed_work(system_unbound_wq, &reaper_work, > - FSNOTIFY_REAPER_DELAY); > - } > -} > - > static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) > { > u32 new_mask = 0; > @@ -168,55 +163,97 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) > } > } > > -static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark) > +static struct inode *fsnotify_detach_connector_from_object( > + struct fsnotify_mark_connector *conn) > { > - struct fsnotify_mark_connector *conn; > struct inode *inode = NULL; > - bool free_conn = false; > > - conn = mark->connector; > - spin_lock(&conn->lock); > - hlist_del_init_rcu(&mark->obj_list); > - if (hlist_empty(&conn->list)) { > - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { > - inode = conn->inode; > - inode->i_fsnotify_marks = NULL; > - inode->i_fsnotify_mask = 0; > - conn->inode = NULL; > - conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; > - } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { > - real_mount(conn->mnt)->mnt_fsnotify_marks = NULL; > - real_mount(conn->mnt)->mnt_fsnotify_mask = 0; > - conn->mnt = NULL; > - conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; > - } > - free_conn = true; > - } else > - __fsnotify_recalc_mask(conn); > - mark->connector = NULL; > - spin_unlock(&conn->lock); > + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { > + inode = conn->inode; > + inode->i_fsnotify_marks = NULL; Repeating question about the need to use rcu_assign_pointer here, so you may address it in this patch instead of the patch that introduced the connector struct. > + inode->i_fsnotify_mask = 0; > + conn->inode = NULL; > + conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; > + } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { > + real_mount(conn->mnt)->mnt_fsnotify_marks = NULL; > + real_mount(conn->mnt)->mnt_fsnotify_mask = 0; > + conn->mnt = NULL; > + conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; > + } > + > + return inode; > +} > + > +static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) > +{ > + if (mark->group) > + fsnotify_put_group(mark->group); > + mark->free_mark(mark); > +} > > - if (free_conn) { > +void fsnotify_put_mark(struct fsnotify_mark *mark) > +{ > + /* Catch marks that were actually never attached to object */ > + if (!mark->connector && atomic_dec_and_test(&mark->refcnt)) { > + fsnotify_final_mark_destroy(mark); > + return; > + } > + > + /* > + * We have to be careful so that traversals of obj_list under lock can > + * safely grab mark reference. > + */ > + if (atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock)) { Style nit: maybe if (!atomic_dec_and_lock(...)) return; ? > + struct fsnotify_mark_connector *conn; > + struct inode *inode = NULL; > + bool free_conn = false; > + > + conn = mark->connector; > + hlist_del_init_rcu(&mark->obj_list); > + if (hlist_empty(&conn->list)) { > + inode = fsnotify_detach_connector_from_object(conn); > + free_conn = true; > + } else > + __fsnotify_recalc_mask(conn); > + mark->connector = NULL; > + spin_unlock(&conn->lock); > + > + if (inode) > + iput(inode); > + > + if (free_conn) { > + spin_lock(&destroy_lock); > + conn->destroy_next = connector_destroy_list; > + connector_destroy_list = conn; > + spin_unlock(&destroy_lock); > + queue_work(system_unbound_wq, &connector_reaper_work); > + } > + /* > + * Note that we didn't update flags telling whether inode cares > + * about what's happening with children. We update these flags > + * from __fsnotify_parent() lazily when next event happens on > + * one of our children. > + */ > spin_lock(&destroy_lock); > - conn->destroy_next = connector_destroy_list; > - connector_destroy_list = conn; > + list_add(&mark->g_list, &destroy_list); > spin_unlock(&destroy_lock); > - queue_work(system_unbound_wq, &connector_reaper_work); > + queue_delayed_work(system_unbound_wq, &reaper_work, > + FSNOTIFY_REAPER_DELAY); > } > - > - return inode; > } > > /* > - * Remove mark from inode / vfsmount list, group list, drop inode reference > - * if we got one. > + * Mark mark as dead, remove it from group list. Mark still stays in object Nit: marking mark as "dead" would imply mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE which does not happen here. need to be careful with wording... You probably meant to write "Mark mark as detached...". > + * list until its last reference is dropped. Note that we rely on mark being > + * removed from group list before corresponding reference to it is dropped. In > + * particular we rely on mark->connector being valid while we hold > + * group->mark_mutex if we found the mark through g_list. > * > * Must be called with group->mark_mutex held. The caller must either hold > * reference to the mark or be protected by fsnotify_mark_srcu. > */ > void fsnotify_detach_mark(struct fsnotify_mark *mark) > { > - struct inode *inode = NULL; > struct fsnotify_group *group = mark->group; > > WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); > @@ -225,31 +262,15 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) > !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); > > spin_lock(&mark->lock); > - > /* something else already called this function on this mark */ > if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { > spin_unlock(&mark->lock); > return; > } > - > mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; > - > - inode = fsnotify_detach_from_object(mark); > - > - /* > - * Note that we didn't update flags telling whether inode cares about > - * what's happening with children. We update these flags from > - * __fsnotify_parent() lazily when next event happens on one of our > - * children. > - */ > - > list_del_init(&mark->g_list); > - > spin_unlock(&mark->lock); > > - if (inode) > - iput(inode); > - > atomic_dec(&group->num_marks); > > /* Drop mark reference acquired in fsnotify_add_mark_locked() */ > @@ -436,7 +457,9 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, > hlist_for_each_entry(lmark, &conn->list, obj_list) { > last = lmark; > > - if ((lmark->group == mark->group) && !allow_dups) { > + if ((lmark->group == mark->group) && > + (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && > + !allow_dups) { > err = -EEXIST; > goto out_err; > } > @@ -487,7 +510,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, > mark->group = group; > list_add(&mark->g_list, &group->marks_list); > atomic_inc(&group->num_marks); > - fsnotify_get_mark(mark); /* for i_list and g_list */ > + fsnotify_get_mark(mark); /* for g_list */ > spin_unlock(&mark->lock); > > ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups); > @@ -533,7 +556,8 @@ struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_mark_connector **connp, > if (!conn) > return NULL; > hlist_for_each_entry(mark, &conn->list, obj_list) { > - if (mark->group == group) { > + if (mark->group == group && > + (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { > fsnotify_get_mark(mark); > fsnotify_put_connector(conn); > return mark; > @@ -613,23 +637,39 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group) > void fsnotify_destroy_marks(struct fsnotify_mark_connector **connp) > { > struct fsnotify_mark_connector *conn; > - struct fsnotify_mark *mark; > + struct fsnotify_mark *mark, *old_mark = NULL; > + struct inode *inode; > > - while ((conn = fsnotify_grab_connector(connp))) { > - /* > - * We have to be careful since we can race with e.g. > - * fsnotify_clear_marks_by_group() and once we drop the list > - * lock, mark can get removed from the obj_list and destroyed. > - * But we are holding mark reference so mark cannot be freed > - * and calling fsnotify_destroy_mark() more than once is fine. > - */ > - mark = hlist_entry(conn->list.first, struct fsnotify_mark, > - obj_list); > + conn = fsnotify_grab_connector(connp); > + if (!conn) > + return; > + /* > + * We have to be careful since we can race with e.g. > + * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the > + * list can get modified. However we are holding mark reference and > + * thus our mark cannot be removed from obj_list so we can continue > + * iteration after regaining conn->lock. > + */ > + hlist_for_each_entry(mark, &conn->list, obj_list) { > fsnotify_get_mark(mark); > - fsnotify_put_connector(conn); > + spin_unlock(&conn->lock); > + if (old_mark) > + fsnotify_put_mark(old_mark); > + old_mark = mark; > fsnotify_destroy_mark(mark, mark->group); > - fsnotify_put_mark(mark); > + spin_lock(&conn->lock); > } > + /* > + * Detach list from object now so that we don't pin inode until all > + * mark references get dropped. It would lead to strange results such > + * as delaying inode deletion or blocking unmount. > + */ > + inode = fsnotify_detach_connector_from_object(conn); > + fsnotify_put_connector(conn); In this code, it is extra dissonant that spin_unlock(&conn->lock); is obfuscated by fsnotify_put_connector(conn); since you (rightfully) used spin_unlock() inside the for loop. As I suggested in earlier patch - get rid of fsnotify_put_connector() helper is the easy road. > + if (inode) > + iput(inode); > + if (old_mark) > + fsnotify_put_mark(old_mark); It is not clear to me from the comment above if the order of iput(inode) and fsnotify_put_mark(old_mark) is meaningful or arbitrary, because af far as I understand "until all mark references get dropped" does not refer to "old_mark". If the order is arbitrary, to me it would have looked better to see the snippet missing from within the for loop here: spin_unlock(&conn->lock); if (old_mark) fsnotify_put_mark(old_mark); And iput(inode) as the final chord of this function. But if order is not arbitrary, please elaborate in comment. > } > > /* > @@ -662,9 +702,7 @@ void fsnotify_mark_destroy_list(void) > > list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { > list_del_init(&mark->g_list); > - if (mark->group) > - fsnotify_put_group(mark->group); > - mark->free_mark(mark); > + fsnotify_final_mark_destroy(mark); > } > } > -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Sun 08-01-17 14:10:42, Amir Goldstein wrote: > > +void fsnotify_put_mark(struct fsnotify_mark *mark) > > +{ > > + /* Catch marks that were actually never attached to object */ > > + if (!mark->connector && atomic_dec_and_test(&mark->refcnt)) { > > + fsnotify_final_mark_destroy(mark); > > + return; > > + } > > + > > + /* > > + * We have to be careful so that traversals of obj_list under lock can > > + * safely grab mark reference. > > + */ > > + if (atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock)) { > > Style nit: maybe if (!atomic_dec_and_lock(...)) return; ? OK, done. > > /* > > - * Remove mark from inode / vfsmount list, group list, drop inode reference > > - * if we got one. > > + * Mark mark as dead, remove it from group list. Mark still stays in object > > Nit: marking mark as "dead" would imply > mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE > which does not happen here. need to be careful with wording... > > You probably meant to write "Mark mark as detached...". Yeah, good catch. Thanks. > > @@ -613,23 +637,39 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group) > > void fsnotify_destroy_marks(struct fsnotify_mark_connector **connp) > > { > > struct fsnotify_mark_connector *conn; > > - struct fsnotify_mark *mark; > > + struct fsnotify_mark *mark, *old_mark = NULL; > > + struct inode *inode; > > > > - while ((conn = fsnotify_grab_connector(connp))) { > > - /* > > - * We have to be careful since we can race with e.g. > > - * fsnotify_clear_marks_by_group() and once we drop the list > > - * lock, mark can get removed from the obj_list and destroyed. > > - * But we are holding mark reference so mark cannot be freed > > - * and calling fsnotify_destroy_mark() more than once is fine. > > - */ > > - mark = hlist_entry(conn->list.first, struct fsnotify_mark, > > - obj_list); > > + conn = fsnotify_grab_connector(connp); > > + if (!conn) > > + return; > > + /* > > + * We have to be careful since we can race with e.g. > > + * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the > > + * list can get modified. However we are holding mark reference and > > + * thus our mark cannot be removed from obj_list so we can continue > > + * iteration after regaining conn->lock. > > + */ > > + hlist_for_each_entry(mark, &conn->list, obj_list) { > > fsnotify_get_mark(mark); > > - fsnotify_put_connector(conn); > > + spin_unlock(&conn->lock); > > + if (old_mark) > > + fsnotify_put_mark(old_mark); > > + old_mark = mark; > > fsnotify_destroy_mark(mark, mark->group); > > - fsnotify_put_mark(mark); > > + spin_lock(&conn->lock); > > } > > + /* > > + * Detach list from object now so that we don't pin inode until all > > + * mark references get dropped. It would lead to strange results such > > + * as delaying inode deletion or blocking unmount. > > + */ > > + inode = fsnotify_detach_connector_from_object(conn); > > + fsnotify_put_connector(conn); > > In this code, it is extra dissonant that spin_unlock(&conn->lock); > is obfuscated by fsnotify_put_connector(conn); > since you (rightfully) used spin_unlock() inside the for loop. > As I suggested in earlier patch - get rid of fsnotify_put_connector() > helper is the easy road. Done. > > + if (inode) > > + iput(inode); > > + if (old_mark) > > + fsnotify_put_mark(old_mark); > > It is not clear to me from the comment above if the order of > iput(inode) and fsnotify_put_mark(old_mark) is meaningful > or arbitrary, because af far as I understand "until all mark > references get dropped" does not refer to "old_mark". > > If the order is arbitrary, to me it would have looked better > to see the snippet missing from within the for loop here: > > spin_unlock(&conn->lock); > if (old_mark) > fsnotify_put_mark(old_mark); > > And iput(inode) as the final chord of this function. > > But if order is not arbitrary, please elaborate in comment. The order is arbitrary. Frankly, I can see reasons for any ordering - my original thinking was to have iput() closer to where we get inode pointer. But whatever. Reordered. Honza
diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 521c4bd9b497..dc94b5df7627 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -49,7 +49,13 @@ * * A list of notification marks relating to inode / mnt is contained in * fsnotify_mark_connector. That structure is alive as long as there are any marks - * in the list and is also protected by fsnotify_mark_srcu. + * in the list and is also protected by fsnotify_mark_srcu. A mark gets + * detached from fsnotify_mark_connector when last reference to the mark is dropped. + * Thus having mark reference is enough to protect mark->connector pointer + * and to make sure fsnotify_mark_connector cannot disappear. Also because we remove + * mark from g_list before dropping mark reference associated with that, any + * mark found through g_list is guaranteed to have mark->connector set + * until we drop group->mark_mutex. * * LIFETIME: * Inode marks survive between when they are added to an inode and when their @@ -103,17 +109,6 @@ void fsnotify_get_mark(struct fsnotify_mark *mark) atomic_inc(&mark->refcnt); } -void fsnotify_put_mark(struct fsnotify_mark *mark) -{ - if (atomic_dec_and_test(&mark->refcnt)) { - spin_lock(&destroy_lock); - list_add(&mark->g_list, &destroy_list); - spin_unlock(&destroy_lock); - queue_delayed_work(system_unbound_wq, &reaper_work, - FSNOTIFY_REAPER_DELAY); - } -} - static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) { u32 new_mask = 0; @@ -168,55 +163,97 @@ static void fsnotify_connector_destroy_workfn(struct work_struct *work) } } -static struct inode *fsnotify_detach_from_object(struct fsnotify_mark *mark) +static struct inode *fsnotify_detach_connector_from_object( + struct fsnotify_mark_connector *conn) { - struct fsnotify_mark_connector *conn; struct inode *inode = NULL; - bool free_conn = false; - conn = mark->connector; - spin_lock(&conn->lock); - hlist_del_init_rcu(&mark->obj_list); - if (hlist_empty(&conn->list)) { - if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { - inode = conn->inode; - inode->i_fsnotify_marks = NULL; - inode->i_fsnotify_mask = 0; - conn->inode = NULL; - conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; - } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { - real_mount(conn->mnt)->mnt_fsnotify_marks = NULL; - real_mount(conn->mnt)->mnt_fsnotify_mask = 0; - conn->mnt = NULL; - conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; - } - free_conn = true; - } else - __fsnotify_recalc_mask(conn); - mark->connector = NULL; - spin_unlock(&conn->lock); + if (conn->flags & FSNOTIFY_OBJ_TYPE_INODE) { + inode = conn->inode; + inode->i_fsnotify_marks = NULL; + inode->i_fsnotify_mask = 0; + conn->inode = NULL; + conn->flags &= ~FSNOTIFY_OBJ_TYPE_INODE; + } else if (conn->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + real_mount(conn->mnt)->mnt_fsnotify_marks = NULL; + real_mount(conn->mnt)->mnt_fsnotify_mask = 0; + conn->mnt = NULL; + conn->flags &= ~FSNOTIFY_OBJ_TYPE_VFSMOUNT; + } + + return inode; +} + +static void fsnotify_final_mark_destroy(struct fsnotify_mark *mark) +{ + if (mark->group) + fsnotify_put_group(mark->group); + mark->free_mark(mark); +} - if (free_conn) { +void fsnotify_put_mark(struct fsnotify_mark *mark) +{ + /* Catch marks that were actually never attached to object */ + if (!mark->connector && atomic_dec_and_test(&mark->refcnt)) { + fsnotify_final_mark_destroy(mark); + return; + } + + /* + * We have to be careful so that traversals of obj_list under lock can + * safely grab mark reference. + */ + if (atomic_dec_and_lock(&mark->refcnt, &mark->connector->lock)) { + struct fsnotify_mark_connector *conn; + struct inode *inode = NULL; + bool free_conn = false; + + conn = mark->connector; + hlist_del_init_rcu(&mark->obj_list); + if (hlist_empty(&conn->list)) { + inode = fsnotify_detach_connector_from_object(conn); + free_conn = true; + } else + __fsnotify_recalc_mask(conn); + mark->connector = NULL; + spin_unlock(&conn->lock); + + if (inode) + iput(inode); + + if (free_conn) { + spin_lock(&destroy_lock); + conn->destroy_next = connector_destroy_list; + connector_destroy_list = conn; + spin_unlock(&destroy_lock); + queue_work(system_unbound_wq, &connector_reaper_work); + } + /* + * Note that we didn't update flags telling whether inode cares + * about what's happening with children. We update these flags + * from __fsnotify_parent() lazily when next event happens on + * one of our children. + */ spin_lock(&destroy_lock); - conn->destroy_next = connector_destroy_list; - connector_destroy_list = conn; + list_add(&mark->g_list, &destroy_list); spin_unlock(&destroy_lock); - queue_work(system_unbound_wq, &connector_reaper_work); + queue_delayed_work(system_unbound_wq, &reaper_work, + FSNOTIFY_REAPER_DELAY); } - - return inode; } /* - * Remove mark from inode / vfsmount list, group list, drop inode reference - * if we got one. + * Mark mark as dead, remove it from group list. Mark still stays in object + * list until its last reference is dropped. Note that we rely on mark being + * removed from group list before corresponding reference to it is dropped. In + * particular we rely on mark->connector being valid while we hold + * group->mark_mutex if we found the mark through g_list. * * Must be called with group->mark_mutex held. The caller must either hold * reference to the mark or be protected by fsnotify_mark_srcu. */ void fsnotify_detach_mark(struct fsnotify_mark *mark) { - struct inode *inode = NULL; struct fsnotify_group *group = mark->group; WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex)); @@ -225,31 +262,15 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark) !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)); spin_lock(&mark->lock); - /* something else already called this function on this mark */ if (!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { spin_unlock(&mark->lock); return; } - mark->flags &= ~FSNOTIFY_MARK_FLAG_ATTACHED; - - inode = fsnotify_detach_from_object(mark); - - /* - * Note that we didn't update flags telling whether inode cares about - * what's happening with children. We update these flags from - * __fsnotify_parent() lazily when next event happens on one of our - * children. - */ - list_del_init(&mark->g_list); - spin_unlock(&mark->lock); - if (inode) - iput(inode); - atomic_dec(&group->num_marks); /* Drop mark reference acquired in fsnotify_add_mark_locked() */ @@ -436,7 +457,9 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, hlist_for_each_entry(lmark, &conn->list, obj_list) { last = lmark; - if ((lmark->group == mark->group) && !allow_dups) { + if ((lmark->group == mark->group) && + (lmark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) && + !allow_dups) { err = -EEXIST; goto out_err; } @@ -487,7 +510,7 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark, mark->group = group; list_add(&mark->g_list, &group->marks_list); atomic_inc(&group->num_marks); - fsnotify_get_mark(mark); /* for i_list and g_list */ + fsnotify_get_mark(mark); /* for g_list */ spin_unlock(&mark->lock); ret = fsnotify_add_mark_list(mark, inode, mnt, allow_dups); @@ -533,7 +556,8 @@ struct fsnotify_mark *fsnotify_find_mark(struct fsnotify_mark_connector **connp, if (!conn) return NULL; hlist_for_each_entry(mark, &conn->list, obj_list) { - if (mark->group == group) { + if (mark->group == group && + (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED)) { fsnotify_get_mark(mark); fsnotify_put_connector(conn); return mark; @@ -613,23 +637,39 @@ void fsnotify_detach_group_marks(struct fsnotify_group *group) void fsnotify_destroy_marks(struct fsnotify_mark_connector **connp) { struct fsnotify_mark_connector *conn; - struct fsnotify_mark *mark; + struct fsnotify_mark *mark, *old_mark = NULL; + struct inode *inode; - while ((conn = fsnotify_grab_connector(connp))) { - /* - * We have to be careful since we can race with e.g. - * fsnotify_clear_marks_by_group() and once we drop the list - * lock, mark can get removed from the obj_list and destroyed. - * But we are holding mark reference so mark cannot be freed - * and calling fsnotify_destroy_mark() more than once is fine. - */ - mark = hlist_entry(conn->list.first, struct fsnotify_mark, - obj_list); + conn = fsnotify_grab_connector(connp); + if (!conn) + return; + /* + * We have to be careful since we can race with e.g. + * fsnotify_clear_marks_by_group() and once we drop the conn->lock, the + * list can get modified. However we are holding mark reference and + * thus our mark cannot be removed from obj_list so we can continue + * iteration after regaining conn->lock. + */ + hlist_for_each_entry(mark, &conn->list, obj_list) { fsnotify_get_mark(mark); - fsnotify_put_connector(conn); + spin_unlock(&conn->lock); + if (old_mark) + fsnotify_put_mark(old_mark); + old_mark = mark; fsnotify_destroy_mark(mark, mark->group); - fsnotify_put_mark(mark); + spin_lock(&conn->lock); } + /* + * Detach list from object now so that we don't pin inode until all + * mark references get dropped. It would lead to strange results such + * as delaying inode deletion or blocking unmount. + */ + inode = fsnotify_detach_connector_from_object(conn); + fsnotify_put_connector(conn); + if (inode) + iput(inode); + if (old_mark) + fsnotify_put_mark(old_mark); } /* @@ -662,9 +702,7 @@ void fsnotify_mark_destroy_list(void) list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) { list_del_init(&mark->g_list); - if (mark->group) - fsnotify_put_group(mark->group); - mark->free_mark(mark); + fsnotify_final_mark_destroy(mark); } } diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index f3df68a64ee6..64ce249288ad 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -244,9 +244,9 @@ struct fsnotify_mark { struct list_head g_list; /* Protects inode / mnt pointers, flags, masks */ spinlock_t lock; - /* List of marks for inode / vfsmount [connector->lock] */ + /* List of marks for inode / vfsmount [connector->lock, mark ref] */ struct hlist_node obj_list; - /* Head of list of marks for an object [mark->lock, group->mark_mutex] */ + /* Head of list of marks for an object [mark ref] */ struct fsnotify_mark_connector *connector; /* Events types to ignore [mark->lock, group->mark_mutex] */ __u32 ignored_mask; diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index a878a98f55b3..6962443f2216 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -172,27 +172,15 @@ static unsigned long inode_to_key(const struct inode *inode) /* * Function to return search key in our hash from chunk. Key 0 is special and * should never be present in the hash. - * - * Must be called with chunk->mark.lock held to protect from connector - * becoming NULL. */ -static unsigned long __chunk_to_key(struct audit_chunk *chunk) +static unsigned long chunk_to_key(struct audit_chunk *chunk) { - if (!chunk->mark.connector) + /* We have a reference to the mark so it should be attached. */ + if (WARN_ON_ONCE(!chunk->mark.connector)) return 0; return (unsigned long)chunk->mark.connector->inode; } -static unsigned long chunk_to_key(struct audit_chunk *chunk) -{ - unsigned long key; - - spin_lock(&chunk->mark.lock); - key = __chunk_to_key(chunk); - spin_unlock(&chunk->mark.lock); - return key; -} - static inline struct list_head *chunk_hash(unsigned long key) { unsigned long n = key / L1_CACHE_BYTES; @@ -202,7 +190,7 @@ static inline struct list_head *chunk_hash(unsigned long key) /* hash_lock & entry->lock is held by caller */ static void insert_hash(struct audit_chunk *chunk) { - unsigned long key = __chunk_to_key(chunk); + unsigned long key = chunk_to_key(chunk); struct list_head *list; if (!key)
Instead of removing mark from object list from fsnotify_detach_mark(), remove the mark when last reference to the mark is dropped. This will allow fanotify to wait for userspace response to event without having to hold onto fsnotify_mark_srcu. Signed-off-by: Jan Kara <jack@suse.cz> --- fs/notify/mark.c | 198 +++++++++++++++++++++++---------------- include/linux/fsnotify_backend.h | 4 +- kernel/audit_tree.c | 20 +--- 3 files changed, 124 insertions(+), 98 deletions(-)