diff mbox

[12/22] fsnotify: Move queueing of mark for destruction into fsnotify_put_mark()

Message ID 20161222091538.28702-13-jack@suse.cz (mailing list archive)
State New, archived
Headers show

Commit Message

Jan Kara Dec. 22, 2016, 9:15 a.m. UTC
Currently we queue mark into a list of marks for destruction in
__fsnotify_free_mark() and keep last mark reference dangling. After the
worker waits for SRCU period, it drops the last reference to the mark
which frees it. This scheme has the disadvantage that if we hold
reference to mark and drop and reacquire SRCU lock, the mark can get
freed immediately which is slightly inconvenient and we will need to
avoid this in the future.

Move to a scheme where queueing of mark into a list of marks for
destruction happens when the last reference to the mark is dropped. Also
drop reference to the mark held by group list already when mark is
removed from that list instead of dropping it only from the destruction
worker.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/mark.c | 77 ++++++++++++++++++++++----------------------------------
 1 file changed, 30 insertions(+), 47 deletions(-)

Comments

Amir Goldstein Dec. 26, 2016, 2:15 p.m. UTC | #1
On Thu, Dec 22, 2016 at 11:15 AM, Jan Kara <jack@suse.cz> wrote:
> Currently we queue mark into a list of marks for destruction in
> __fsnotify_free_mark() and keep last mark reference dangling. After the
> worker waits for SRCU period, it drops the last reference to the mark
> which frees it. This scheme has the disadvantage that if we hold
> reference to mark and drop and reacquire SRCU lock, the mark can get
> freed immediately which is slightly inconvenient and we will need to
> avoid this in the future.
>
> Move to a scheme where queueing of mark into a list of marks for
> destruction happens when the last reference to the mark is dropped. Also
> drop reference to the mark held by group list already when mark is
> removed from that list instead of dropping it only from the destruction
> worker.
>

The BEFORE section refers to what SRCU protects, which this patch
slightly changes. Can you please add to AFTER section, what is protected
by SRCU after this patch. IIUC, SRCU protects from freeing the mark,
but it does not protect from removing mark from group list, so after
drop and reacquire SRCU with elevated mark refcount, mark can find
itself not on any list.
For example in inotify_handle_event() when calling fsnotify_destroy_mark()
for IN_ONESHOT mark.

Please clarify.

> Signed-off-by: Jan Kara <jack@suse.cz>
> ---
>  fs/notify/mark.c | 77 ++++++++++++++++++++++----------------------------------
>  1 file changed, 30 insertions(+), 47 deletions(-)
>
> diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> index 60f5754ce5ed..fee4255e9227 100644
> --- a/fs/notify/mark.c
> +++ b/fs/notify/mark.c
> @@ -105,6 +105,7 @@ static DECLARE_WORK(list_reaper_work, fsnotify_list_destroy_workfn);
>
>  void fsnotify_get_mark(struct fsnotify_mark *mark)
>  {
> +       WARN_ON_ONCE(!atomic_read(&mark->refcnt));
>         atomic_inc(&mark->refcnt);
>  }
>
> @@ -239,26 +240,32 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
>                  * from __fsnotify_parent() lazily when next event happens on
>                  * one of our children.
>                  */
> -               fsnotify_final_mark_destroy(mark);
> +               spin_lock(&destroy_lock);
> +               list_add(&mark->g_list, &destroy_list);
> +               spin_unlock(&destroy_lock);
> +               queue_delayed_work(system_unbound_wq, &reaper_work,
> +                                  FSNOTIFY_REAPER_DELAY);
>         }
>  }
>
>  /*
>   * Mark mark as dead, remove it from group list. Mark still stays in object
> - * list until its last reference is dropped. The reference corresponding to
> - * group list gets dropped after SRCU period ends from
> - * fsnotify_mark_destroy_list(). Note that we rely on mark being removed from
> - * group list before corresponding reference to it is dropped. In particular we
> - * rely on mark->obj_list_head being valid while we hold group->mark_mutex if
> - * we found the mark through g_list.
> + * list until its last reference is dropped.  Note that we rely on mark being
> + * removed from group list before corresponding reference to it is dropped. In
> + * particular we rely on mark->obj_list_head being valid while we hold
> + * group->mark_mutex if we found the mark through g_list.
>   *
> - * Must be called with group->mark_mutex held.
> + * Must be called with group->mark_mutex held. The caller must either hold
> + * reference to the mark or be protected by fsnotify_mark_srcu.
>   */
>  void fsnotify_detach_mark(struct fsnotify_mark *mark)
>  {
>         struct fsnotify_group *group = mark->group;
>
> -       BUG_ON(!mutex_is_locked(&group->mark_mutex));
> +       WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
> +       WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) &&
> +                    atomic_read(&mark->refcnt) < 1 +
> +                       !!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED));
>
>         spin_lock(&mark->lock);
>         /* something else already called this function on this mark */
> @@ -271,18 +278,20 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark)
>         spin_unlock(&mark->lock);
>
>         atomic_dec(&group->num_marks);
> +
> +       /* Drop mark reference acquired in fsnotify_add_mark_locked() */
> +       fsnotify_put_mark(mark);
>  }
>
>  /*
> - * Prepare mark for freeing and add it to the list of marks prepared for
> - * freeing. The actual freeing must happen after SRCU period ends and the
> - * caller is responsible for this.
> + * Free fsnotify mark. The mark is actually only marked as being freed.  The
> + * freeing is actually happening only once last reference to the mark is
> + * dropped from a workqueue which first waits for srcu period end.
>   *
> - * The function returns true if the mark was added to the list of marks for
> - * freeing. The function returns false if someone else has already called
> - * __fsnotify_free_mark() for the mark.
> + * Caller must have a reference to the mark or be protected by
> + * fsnotify_mark_srcu.
>   */
> -static bool __fsnotify_free_mark(struct fsnotify_mark *mark)
> +void fsnotify_free_mark(struct fsnotify_mark *mark)
>  {
>         struct fsnotify_group *group = mark->group;
>
> @@ -290,7 +299,7 @@ static bool __fsnotify_free_mark(struct fsnotify_mark *mark)
>         /* something else already called this function on this mark */
>         if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
>                 spin_unlock(&mark->lock);
> -               return false;
> +               return;
>         }
>         mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
>         spin_unlock(&mark->lock);
> @@ -302,25 +311,6 @@ static bool __fsnotify_free_mark(struct fsnotify_mark *mark)
>          */
>         if (group->ops->freeing_mark)
>                 group->ops->freeing_mark(mark, group);
> -
> -       spin_lock(&destroy_lock);
> -       list_add(&mark->g_list, &destroy_list);
> -       spin_unlock(&destroy_lock);
> -
> -       return true;
> -}
> -
> -/*
> - * Free fsnotify mark. The freeing is actually happening from a workqueue which
> - * first waits for srcu period end. Caller must have a reference to the mark
> - * or be protected by fsnotify_mark_srcu.
> - */
> -void fsnotify_free_mark(struct fsnotify_mark *mark)
> -{
> -       if (__fsnotify_free_mark(mark)) {
> -               queue_delayed_work(system_unbound_wq, &reaper_work,
> -                                  FSNOTIFY_REAPER_DELAY);
> -       }
>  }
>
>  void fsnotify_destroy_mark(struct fsnotify_mark *mark,
> @@ -537,20 +527,13 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
>
>         return ret;
>  err:
> -       mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
> +       mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE |
> +                        FSNOTIFY_MARK_FLAG_ATTACHED);
>         list_del_init(&mark->g_list);
> -       fsnotify_put_group(group);

fsnotify_put_group() is removed by this patch here and not added anywhere else
nor is any fsnotify_get_group() call removed. Is this a leak or maybe
fixed later on??

> -       mark->group = NULL;
>         atomic_dec(&group->num_marks);
> -
>         spin_unlock(&mark->lock);
>
> -       spin_lock(&destroy_lock);
> -       list_add(&mark->g_list, &destroy_list);
> -       spin_unlock(&destroy_lock);
> -       queue_delayed_work(system_unbound_wq, &reaper_work,
> -                               FSNOTIFY_REAPER_DELAY);
> -
> +       fsnotify_put_mark(mark);
>         return ret;
>  }
>
> @@ -724,7 +707,7 @@ static void fsnotify_mark_destroy_workfn(struct work_struct *work)
>
>         list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
>                 list_del_init(&mark->g_list);
> -               fsnotify_put_mark(mark);
> +               fsnotify_final_mark_destroy(mark);
>         }
>  }
>
> --
> 2.10.2
>
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Jan Kara Jan. 4, 2017, 10:28 a.m. UTC | #2
On Mon 26-12-16 16:15:23, Amir Goldstein wrote:
> On Thu, Dec 22, 2016 at 11:15 AM, Jan Kara <jack@suse.cz> wrote:
> > Currently we queue mark into a list of marks for destruction in
> > __fsnotify_free_mark() and keep last mark reference dangling. After the
> > worker waits for SRCU period, it drops the last reference to the mark
> > which frees it. This scheme has the disadvantage that if we hold
> > reference to mark and drop and reacquire SRCU lock, the mark can get
> > freed immediately which is slightly inconvenient and we will need to
> > avoid this in the future.
> >
> > Move to a scheme where queueing of mark into a list of marks for
> > destruction happens when the last reference to the mark is dropped. Also
> > drop reference to the mark held by group list already when mark is
> > removed from that list instead of dropping it only from the destruction
> > worker.
> >
> 
> The BEFORE section refers to what SRCU protects, which this patch
> slightly changes. Can you please add to AFTER section, what is protected
> by SRCU after this patch.

Ah, the changelog is a victim of me reshuffling patches. It was written in
a situation when mark reference did not protect from any list removal yet.
I'll rewrite it. Thanks for noticing.

> IIUC, SRCU protects from freeing the mark,
> but it does not protect from removing mark from group list, so after
> drop and reacquire SRCU with elevated mark refcount, mark can find
> itself not on any list.

You are correct that SRCU only protects from freeing the mark and it also
makes sure the inode / vfsmount list traversal is fine (because of using
_rcu list primitives during list manipulation) although the mark can be
removed from that list in parallel. It does not give any guarantee for
group list as you correctly note. But mark reference pins mark in the inode
/ vfsmount list (after patch 10), so once we have that reference we are
sure mark stays in inode / vfsmount list.

> For example in inotify_handle_event() when calling fsnotify_destroy_mark()
> for IN_ONESHOT mark.

So in that place we don't hold any mark reference ourselves so mark can
indeed get removed from all the lists - but we still hold SRCU lock so we
are sure mark cannot get freed and inode list traversal can still continue.

> > @@ -537,20 +527,13 @@ int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
> >
> >         return ret;
> >  err:
> > -       mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
> > +       mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE |
> > +                        FSNOTIFY_MARK_FLAG_ATTACHED);
> >         list_del_init(&mark->g_list);
> > -       fsnotify_put_group(group);
> 
> fsnotify_put_group() is removed by this patch here and not added anywhere else
> nor is any fsnotify_get_group() call removed. Is this a leak or maybe
> fixed later on??

Note that I also removed mark->group = NULL below. So fsnotify_put_mark()
and followup mark destruction will take care of properly freeing group
reference. There's no reason to special-case this in
fsnotify_add_mark_locked()...

> 
> > -       mark->group = NULL;
> >         atomic_dec(&group->num_marks);
> > -
> >         spin_unlock(&mark->lock);
> >
> > -       spin_lock(&destroy_lock);
> > -       list_add(&mark->g_list, &destroy_list);
> > -       spin_unlock(&destroy_lock);
> > -       queue_delayed_work(system_unbound_wq, &reaper_work,
> > -                               FSNOTIFY_REAPER_DELAY);
> > -
> > +       fsnotify_put_mark(mark);
> >         return ret;
> >  }

								Honza
Jan Kara Jan. 4, 2017, 12:22 p.m. UTC | #3
On Wed 04-01-17 11:28:21, Jan Kara wrote:
> On Mon 26-12-16 16:15:23, Amir Goldstein wrote:
> > On Thu, Dec 22, 2016 at 11:15 AM, Jan Kara <jack@suse.cz> wrote:
> > > Currently we queue mark into a list of marks for destruction in
> > > __fsnotify_free_mark() and keep last mark reference dangling. After the
> > > worker waits for SRCU period, it drops the last reference to the mark
> > > which frees it. This scheme has the disadvantage that if we hold
> > > reference to mark and drop and reacquire SRCU lock, the mark can get
> > > freed immediately which is slightly inconvenient and we will need to
> > > avoid this in the future.
> > >
> > > Move to a scheme where queueing of mark into a list of marks for
> > > destruction happens when the last reference to the mark is dropped. Also
> > > drop reference to the mark held by group list already when mark is
> > > removed from that list instead of dropping it only from the destruction
> > > worker.
> > >
> > 
> > The BEFORE section refers to what SRCU protects, which this patch
> > slightly changes. Can you please add to AFTER section, what is protected
> > by SRCU after this patch.
> 
> Ah, the changelog is a victim of me reshuffling patches. It was written in
> a situation when mark reference did not protect from any list removal yet.
> I'll rewrite it. Thanks for noticing.

So in the end I've realized this patch needs to go before patch 10 for SRCU
guarantees to work - otherwise we could free mark under the hands of
process iterating inode list under SRCU after patch 10 and before this
patch. And then the changelog is correct. So I've just reshuffled these
patches.

								Honza
diff mbox

Patch

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 60f5754ce5ed..fee4255e9227 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -105,6 +105,7 @@  static DECLARE_WORK(list_reaper_work, fsnotify_list_destroy_workfn);
 
 void fsnotify_get_mark(struct fsnotify_mark *mark)
 {
+	WARN_ON_ONCE(!atomic_read(&mark->refcnt));
 	atomic_inc(&mark->refcnt);
 }
 
@@ -239,26 +240,32 @@  void fsnotify_put_mark(struct fsnotify_mark *mark)
 		 * from __fsnotify_parent() lazily when next event happens on
 		 * one of our children.
 		 */
-		fsnotify_final_mark_destroy(mark);
+		spin_lock(&destroy_lock);
+		list_add(&mark->g_list, &destroy_list);
+		spin_unlock(&destroy_lock);
+		queue_delayed_work(system_unbound_wq, &reaper_work,
+				   FSNOTIFY_REAPER_DELAY);
 	}
 }
 
 /*
  * Mark mark as dead, remove it from group list. Mark still stays in object
- * list until its last reference is dropped. The reference corresponding to
- * group list gets dropped after SRCU period ends from
- * fsnotify_mark_destroy_list(). Note that we rely on mark being removed from
- * group list before corresponding reference to it is dropped. In particular we
- * rely on mark->obj_list_head being valid while we hold group->mark_mutex if
- * we found the mark through g_list.
+ * list until its last reference is dropped.  Note that we rely on mark being
+ * removed from group list before corresponding reference to it is dropped. In
+ * particular we rely on mark->obj_list_head being valid while we hold
+ * group->mark_mutex if we found the mark through g_list.
  *
- * Must be called with group->mark_mutex held.
+ * Must be called with group->mark_mutex held. The caller must either hold
+ * reference to the mark or be protected by fsnotify_mark_srcu.
  */
 void fsnotify_detach_mark(struct fsnotify_mark *mark)
 {
 	struct fsnotify_group *group = mark->group;
 
-	BUG_ON(!mutex_is_locked(&group->mark_mutex));
+	WARN_ON_ONCE(!mutex_is_locked(&group->mark_mutex));
+	WARN_ON_ONCE(!srcu_read_lock_held(&fsnotify_mark_srcu) &&
+		     atomic_read(&mark->refcnt) < 1 +
+			!!(mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED));
 
 	spin_lock(&mark->lock);
 	/* something else already called this function on this mark */
@@ -271,18 +278,20 @@  void fsnotify_detach_mark(struct fsnotify_mark *mark)
 	spin_unlock(&mark->lock);
 
 	atomic_dec(&group->num_marks);
+
+	/* Drop mark reference acquired in fsnotify_add_mark_locked() */
+	fsnotify_put_mark(mark);
 }
 
 /*
- * Prepare mark for freeing and add it to the list of marks prepared for
- * freeing. The actual freeing must happen after SRCU period ends and the
- * caller is responsible for this.
+ * Free fsnotify mark. The mark is actually only marked as being freed.  The
+ * freeing is actually happening only once last reference to the mark is
+ * dropped from a workqueue which first waits for srcu period end.
  *
- * The function returns true if the mark was added to the list of marks for
- * freeing. The function returns false if someone else has already called
- * __fsnotify_free_mark() for the mark.
+ * Caller must have a reference to the mark or be protected by
+ * fsnotify_mark_srcu.
  */
-static bool __fsnotify_free_mark(struct fsnotify_mark *mark)
+void fsnotify_free_mark(struct fsnotify_mark *mark)
 {
 	struct fsnotify_group *group = mark->group;
 
@@ -290,7 +299,7 @@  static bool __fsnotify_free_mark(struct fsnotify_mark *mark)
 	/* something else already called this function on this mark */
 	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE)) {
 		spin_unlock(&mark->lock);
-		return false;
+		return;
 	}
 	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
 	spin_unlock(&mark->lock);
@@ -302,25 +311,6 @@  static bool __fsnotify_free_mark(struct fsnotify_mark *mark)
 	 */
 	if (group->ops->freeing_mark)
 		group->ops->freeing_mark(mark, group);
-
-	spin_lock(&destroy_lock);
-	list_add(&mark->g_list, &destroy_list);
-	spin_unlock(&destroy_lock);
-
-	return true;
-}
-
-/*
- * Free fsnotify mark. The freeing is actually happening from a workqueue which
- * first waits for srcu period end. Caller must have a reference to the mark
- * or be protected by fsnotify_mark_srcu.
- */
-void fsnotify_free_mark(struct fsnotify_mark *mark)
-{
-	if (__fsnotify_free_mark(mark)) {
-		queue_delayed_work(system_unbound_wq, &reaper_work,
-				   FSNOTIFY_REAPER_DELAY);
-	}
 }
 
 void fsnotify_destroy_mark(struct fsnotify_mark *mark,
@@ -537,20 +527,13 @@  int fsnotify_add_mark_locked(struct fsnotify_mark *mark,
 
 	return ret;
 err:
-	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
+	mark->flags &= ~(FSNOTIFY_MARK_FLAG_ALIVE |
+			 FSNOTIFY_MARK_FLAG_ATTACHED);
 	list_del_init(&mark->g_list);
-	fsnotify_put_group(group);
-	mark->group = NULL;
 	atomic_dec(&group->num_marks);
-
 	spin_unlock(&mark->lock);
 
-	spin_lock(&destroy_lock);
-	list_add(&mark->g_list, &destroy_list);
-	spin_unlock(&destroy_lock);
-	queue_delayed_work(system_unbound_wq, &reaper_work,
-				FSNOTIFY_REAPER_DELAY);
-
+	fsnotify_put_mark(mark);
 	return ret;
 }
 
@@ -724,7 +707,7 @@  static void fsnotify_mark_destroy_workfn(struct work_struct *work)
 
 	list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
 		list_del_init(&mark->g_list);
-		fsnotify_put_mark(mark);
+		fsnotify_final_mark_destroy(mark);
 	}
 }