diff mbox

[1/4] fsnotify: fix pinning of marks and groups

Message ID 1508421517-22678-2-git-send-email-mszeredi@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Miklos Szeredi Oct. 19, 2017, 1:58 p.m. UTC
We may fail to pin one of the marks in fsnotify_prepare_user_wait() when
dropping the srcu read lock, resulting in use after free at the next
iteration.

Solution is to store both marks in iter_info instead of just the one we'll
be sending the event for, as well as pinning the group for both (if they
have the same group: fine, pinnig it twice doesn't hurt).

Also blind increment of group's user_waits is not enough, we could be far
enough in the group's destruction that it isn't taken into account.
Instead we need to check (under lock) that the mark is still attached to
the group after having obtained a ref to the mark.  If not, skip it.

In the process of fixing the above, clean up fsnotify_prepare_user_wait()/
fsnotify_finish_user_wait().

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 9385a84d7e1f ("fsnotify: Pass fsnotify_iter_info into handle_event handler")
Cc: <stable@vger.kernel.org> # v4.12
---
 fs/notify/fsnotify.c |   6 ++--
 fs/notify/mark.c     | 100 +++++++++++++++++++++++----------------------------
 2 files changed, 48 insertions(+), 58 deletions(-)

Comments

Amir Goldstein Oct. 20, 2017, 11:37 a.m. UTC | #1
On Thu, Oct 19, 2017 at 4:58 PM, Miklos Szeredi <mszeredi@redhat.com> wrote:
> We may fail to pin one of the marks in fsnotify_prepare_user_wait() when
> dropping the srcu read lock, resulting in use after free at the next
> iteration.
>
> Solution is to store both marks in iter_info instead of just the one we'll
> be sending the event for, as well as pinning the group for both (if they
> have the same group: fine, pinnig it twice doesn't hurt).
>
> Also blind increment of group's user_waits is not enough, we could be far
> enough in the group's destruction that it isn't taken into account.
> Instead we need to check (under lock) that the mark is still attached to
> the group after having obtained a ref to the mark.  If not, skip it.
>
> In the process of fixing the above, clean up fsnotify_prepare_user_wait()/
> fsnotify_finish_user_wait().
>

Change looks correct, but it was so hard to follow this patch.
The moving of helpers around created a completely garbled diff
where it is impossible to see the logic changed.

Suggest to separate to 3 patches: create helpers (move them around),
pass the 2 marks in iter_info and fix increment of group's user_waits.

> Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
> Fixes: 9385a84d7e1f ("fsnotify: Pass fsnotify_iter_info into handle_event handler")
> Cc: <stable@vger.kernel.org> # v4.12
> ---
>  fs/notify/fsnotify.c |   6 ++--
>  fs/notify/mark.c     | 100 +++++++++++++++++++++++----------------------------
>  2 files changed, 48 insertions(+), 58 deletions(-)
>
> diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
> index 0c4583b61717..48ec61f4c4d5 100644
> --- a/fs/notify/fsnotify.c
> +++ b/fs/notify/fsnotify.c
> @@ -336,6 +336,9 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
>                         vfsmount_group = vfsmount_mark->group;
>                 }
>
> +               iter_info.inode_mark = inode_mark;
> +               iter_info.vfsmount_mark = vfsmount_mark;
> +
>                 if (inode_group && vfsmount_group) {
>                         int cmp = fsnotify_compare_groups(inode_group,
>                                                           vfsmount_group);
> @@ -348,9 +351,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
>                         }
>                 }
>
> -               iter_info.inode_mark = inode_mark;
> -               iter_info.vfsmount_mark = vfsmount_mark;
> -
>                 ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
>                                     data, data_is, cookie, file_name,
>                                     &iter_info);
> diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> index 9991f8826734..9a7c8dbeeb59 100644
> --- a/fs/notify/mark.c
> +++ b/fs/notify/mark.c
> @@ -109,16 +109,6 @@ void fsnotify_get_mark(struct fsnotify_mark *mark)
>         atomic_inc(&mark->refcnt);
>  }
>
> -/*
> - * Get mark reference when we found the mark via lockless traversal of object
> - * list. Mark can be already removed from the list by now and on its way to be
> - * destroyed once SRCU period ends.
> - */
> -static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
> -{
> -       return atomic_inc_not_zero(&mark->refcnt);
> -}
> -
>  static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
>  {
>         u32 new_mask = 0;
> @@ -256,32 +246,53 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
>                            FSNOTIFY_REAPER_DELAY);
>  }
>
> -bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
> +/*
> + * Get mark reference when we found the mark via lockless traversal of object
> + * list. Mark can be already removed from the list by now and on its way to be
> + * destroyed once SRCU period ends.
> + */
> +static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
>  {
> -       struct fsnotify_group *group;
> -
> -       if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark))
> -               return false;
> -
> -       if (iter_info->inode_mark)
> -               group = iter_info->inode_mark->group;
> -       else
> -               group = iter_info->vfsmount_mark->group;
> +       if (!mark)
> +               return true;
> +
> +       if (atomic_inc_not_zero(&mark->refcnt)) {
> +               spin_lock(&mark->lock);
> +               if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) {
> +                       /* mark is attached, group is still alive then */
> +                       atomic_inc(&mark->group->user_waits);
> +                       spin_unlock(&mark->lock);
> +                       return true;
> +               }
> +               spin_unlock(&mark->lock);
> +               fsnotify_put_mark(mark);
> +       }
> +       return false;
> +}
>
> -       /*
> -        * Since acquisition of mark reference is an atomic op as well, we can
> -        * be sure this inc is seen before any effect of refcount increment.
> -        */
> -       atomic_inc(&group->user_waits);
> +static void fsnotify_put_mark_wait(struct fsnotify_mark *mark)
> +{
> +       if (mark) {
> +               struct fsnotify_group *group = mark->group;
>
> -       if (iter_info->inode_mark) {
> -               /* This can fail if mark is being removed */
> -               if (!fsnotify_get_mark_safe(iter_info->inode_mark))
> -                       goto out_wait;
> +               fsnotify_put_mark(mark);
> +               /*
> +                * We abuse notification_waitq on group shutdown for waiting for all
> +                * marks pinned when waiting for userspace.
> +                */
> +               if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
> +                       wake_up(&group->notification_waitq);
>         }
> -       if (iter_info->vfsmount_mark) {
> -               if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark))
> -                       goto out_inode;
> +}
> +
> +bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
> +{
> +       /* This can fail if mark is being removed */
> +       if (!fsnotify_get_mark_safe(iter_info->inode_mark))
> +               return false;
> +       if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) {
> +               fsnotify_put_mark_wait(iter_info->inode_mark);
> +               return false;
>         }
>
>         /*
> @@ -292,34 +303,13 @@ bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
>         srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx);
>
>         return true;
> -out_inode:
> -       if (iter_info->inode_mark)
> -               fsnotify_put_mark(iter_info->inode_mark);
> -out_wait:
> -       if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
> -               wake_up(&group->notification_waitq);
> -       return false;
>  }
>
>  void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
>  {
> -       struct fsnotify_group *group = NULL;
> -
>         iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
> -       if (iter_info->inode_mark) {
> -               group = iter_info->inode_mark->group;
> -               fsnotify_put_mark(iter_info->inode_mark);
> -       }
> -       if (iter_info->vfsmount_mark) {
> -               group = iter_info->vfsmount_mark->group;
> -               fsnotify_put_mark(iter_info->vfsmount_mark);
> -       }
> -       /*
> -        * We abuse notification_waitq on group shutdown for waiting for all
> -        * marks pinned when waiting for userspace.
> -        */
> -       if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
> -               wake_up(&group->notification_waitq);
> +       fsnotify_put_mark_wait(iter_info->inode_mark);
> +       fsnotify_put_mark_wait(iter_info->vfsmount_mark);
>  }
>
>  /*
> --
> 2.5.5
>
Miklos Szeredi Oct. 20, 2017, 11:56 a.m. UTC | #2
On Fri, Oct 20, 2017 at 1:37 PM, Amir Goldstein <amir73il@gmail.com> wrote:
> On Thu, Oct 19, 2017 at 4:58 PM, Miklos Szeredi <mszeredi@redhat.com> wrote:
>> We may fail to pin one of the marks in fsnotify_prepare_user_wait() when
>> dropping the srcu read lock, resulting in use after free at the next
>> iteration.
>>
>> Solution is to store both marks in iter_info instead of just the one we'll
>> be sending the event for, as well as pinning the group for both (if they
>> have the same group: fine, pinnig it twice doesn't hurt).

I was thinking about this one and actually pinning only the single
group for the used mark was correct, since the other, unused mark
won't have its group dereferenced while pinned.

Only there doesn't appear to be a way to make this work without adding
complexity :(


>>
>> Also blind increment of group's user_waits is not enough, we could be far
>> enough in the group's destruction that it isn't taken into account.
>> Instead we need to check (under lock) that the mark is still attached to
>> the group after having obtained a ref to the mark.  If not, skip it.
>>
>> In the process of fixing the above, clean up fsnotify_prepare_user_wait()/
>> fsnotify_finish_user_wait().
>>
>
> Change looks correct, but it was so hard to follow this patch.
> The moving of helpers around created a completely garbled diff
> where it is impossible to see the logic changed.
>
> Suggest to separate to 3 patches: create helpers (move them around),
> pass the 2 marks in iter_info and fix increment of group's user_waits.

Yeah, I'll do that.

Thanks,
Miklos
diff mbox

Patch

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 0c4583b61717..48ec61f4c4d5 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -336,6 +336,9 @@  int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 			vfsmount_group = vfsmount_mark->group;
 		}
 
+		iter_info.inode_mark = inode_mark;
+		iter_info.vfsmount_mark = vfsmount_mark;
+
 		if (inode_group && vfsmount_group) {
 			int cmp = fsnotify_compare_groups(inode_group,
 							  vfsmount_group);
@@ -348,9 +351,6 @@  int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 			}
 		}
 
-		iter_info.inode_mark = inode_mark;
-		iter_info.vfsmount_mark = vfsmount_mark;
-
 		ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask,
 				    data, data_is, cookie, file_name,
 				    &iter_info);
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 9991f8826734..9a7c8dbeeb59 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -109,16 +109,6 @@  void fsnotify_get_mark(struct fsnotify_mark *mark)
 	atomic_inc(&mark->refcnt);
 }
 
-/*
- * Get mark reference when we found the mark via lockless traversal of object
- * list. Mark can be already removed from the list by now and on its way to be
- * destroyed once SRCU period ends.
- */
-static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
-{
-	return atomic_inc_not_zero(&mark->refcnt);
-}
-
 static void __fsnotify_recalc_mask(struct fsnotify_mark_connector *conn)
 {
 	u32 new_mask = 0;
@@ -256,32 +246,53 @@  void fsnotify_put_mark(struct fsnotify_mark *mark)
 			   FSNOTIFY_REAPER_DELAY);
 }
 
-bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
+/*
+ * Get mark reference when we found the mark via lockless traversal of object
+ * list. Mark can be already removed from the list by now and on its way to be
+ * destroyed once SRCU period ends.
+ */
+static bool fsnotify_get_mark_safe(struct fsnotify_mark *mark)
 {
-	struct fsnotify_group *group;
-
-	if (WARN_ON_ONCE(!iter_info->inode_mark && !iter_info->vfsmount_mark))
-		return false;
-
-	if (iter_info->inode_mark)
-		group = iter_info->inode_mark->group;
-	else
-		group = iter_info->vfsmount_mark->group;
+	if (!mark)
+		return true;
+
+	if (atomic_inc_not_zero(&mark->refcnt)) {
+		spin_lock(&mark->lock);
+		if (mark->flags & FSNOTIFY_MARK_FLAG_ATTACHED) {
+			/* mark is attached, group is still alive then */
+			atomic_inc(&mark->group->user_waits);
+			spin_unlock(&mark->lock);
+			return true;
+		}
+		spin_unlock(&mark->lock);
+		fsnotify_put_mark(mark);
+	}
+	return false;
+}
 
-	/*
-	 * Since acquisition of mark reference is an atomic op as well, we can
-	 * be sure this inc is seen before any effect of refcount increment.
-	 */
-	atomic_inc(&group->user_waits);
+static void fsnotify_put_mark_wait(struct fsnotify_mark *mark)
+{
+	if (mark) {
+		struct fsnotify_group *group = mark->group;
 
-	if (iter_info->inode_mark) {
-		/* This can fail if mark is being removed */
-		if (!fsnotify_get_mark_safe(iter_info->inode_mark))
-			goto out_wait;
+		fsnotify_put_mark(mark);
+		/*
+		 * We abuse notification_waitq on group shutdown for waiting for all
+		 * marks pinned when waiting for userspace.
+		 */
+		if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
+			wake_up(&group->notification_waitq);
 	}
-	if (iter_info->vfsmount_mark) {
-		if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark))
-			goto out_inode;
+}
+
+bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
+{
+	/* This can fail if mark is being removed */
+	if (!fsnotify_get_mark_safe(iter_info->inode_mark))
+		return false;
+	if (!fsnotify_get_mark_safe(iter_info->vfsmount_mark)) {
+		fsnotify_put_mark_wait(iter_info->inode_mark);
+		return false;
 	}
 
 	/*
@@ -292,34 +303,13 @@  bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info)
 	srcu_read_unlock(&fsnotify_mark_srcu, iter_info->srcu_idx);
 
 	return true;
-out_inode:
-	if (iter_info->inode_mark)
-		fsnotify_put_mark(iter_info->inode_mark);
-out_wait:
-	if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
-		wake_up(&group->notification_waitq);
-	return false;
 }
 
 void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info)
 {
-	struct fsnotify_group *group = NULL;
-
 	iter_info->srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
-	if (iter_info->inode_mark) {
-		group = iter_info->inode_mark->group;
-		fsnotify_put_mark(iter_info->inode_mark);
-	}
-	if (iter_info->vfsmount_mark) {
-		group = iter_info->vfsmount_mark->group;
-		fsnotify_put_mark(iter_info->vfsmount_mark);
-	}
-	/*
-	 * We abuse notification_waitq on group shutdown for waiting for all
-	 * marks pinned when waiting for userspace.
-	 */
-	if (atomic_dec_and_test(&group->user_waits) && group->shutdown)
-		wake_up(&group->notification_waitq);
+	fsnotify_put_mark_wait(iter_info->inode_mark);
+	fsnotify_put_mark_wait(iter_info->vfsmount_mark);
 }
 
 /*