diff mbox series

[2/2] fanotify: allow "weak" fsid when watching a single filesystem

Message ID 20231118183018.2069899-3-amir73il@gmail.com (mailing list archive)
State New
Headers show
Series Support fanotify FAN_REPORT_FID on all filesystems | expand

Commit Message

Amir Goldstein Nov. 18, 2023, 6:30 p.m. UTC
So far, fanotify returns -ENODEV or -EXDEV when trying to set a mark
on a filesystem with a "weak" fsid, namely, zero fsid (e.g. fuse), or
non-uniform fsid (e.g. btrfs non-root subvol).

When group is watching inodes all from the same filesystem (or subvol),
allow adding inode marks with "weak" fsid, because there is no ambiguity
regarding which filesystem reports the event.

The first mark added to a group determines if this group is single or
multi filesystem, depending on the fsid at the path of the added mark.

If the first mark added has a "strong" fsid, marks with "weak" fsid
cannot be added and vice versa.

If the first mark added has a "weak" fsid, following marks must have
the same "weak" fsid and the same sb as the first mark.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/notify/fanotify/fanotify.c      | 15 +----
 fs/notify/fanotify/fanotify.h      |  6 ++
 fs/notify/fanotify/fanotify_user.c | 97 ++++++++++++++++++++++++------
 include/linux/fsnotify_backend.h   |  1 +
 4 files changed, 90 insertions(+), 29 deletions(-)

Comments

Amir Goldstein Nov. 20, 2023, 7:42 a.m. UTC | #1
On Sat, Nov 18, 2023 at 8:30 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> So far, fanotify returns -ENODEV or -EXDEV when trying to set a mark
> on a filesystem with a "weak" fsid, namely, zero fsid (e.g. fuse), or
> non-uniform fsid (e.g. btrfs non-root subvol).
>
> When group is watching inodes all from the same filesystem (or subvol),
> allow adding inode marks with "weak" fsid, because there is no ambiguity
> regarding which filesystem reports the event.
>
> The first mark added to a group determines if this group is single or
> multi filesystem, depending on the fsid at the path of the added mark.
>
> If the first mark added has a "strong" fsid, marks with "weak" fsid
> cannot be added and vice versa.
>
> If the first mark added has a "weak" fsid, following marks must have
> the same "weak" fsid and the same sb as the first mark.
>
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  fs/notify/fanotify/fanotify.c      | 15 +----
>  fs/notify/fanotify/fanotify.h      |  6 ++
>  fs/notify/fanotify/fanotify_user.c | 97 ++++++++++++++++++++++++------
>  include/linux/fsnotify_backend.h   |  1 +
>  4 files changed, 90 insertions(+), 29 deletions(-)
>
> diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> index aff1ab3c32aa..1e4def21811e 100644
> --- a/fs/notify/fanotify/fanotify.c
> +++ b/fs/notify/fanotify/fanotify.c
> @@ -29,12 +29,6 @@ static unsigned int fanotify_hash_path(const struct path *path)
>                 hash_ptr(path->mnt, FANOTIFY_EVENT_HASH_BITS);
>  }
>
> -static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1,
> -                                      __kernel_fsid_t *fsid2)
> -{
> -       return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1];
> -}
> -
>  static unsigned int fanotify_hash_fsid(__kernel_fsid_t *fsid)
>  {
>         return hash_32(fsid->val[0], FANOTIFY_EVENT_HASH_BITS) ^
> @@ -851,7 +845,8 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
>                 if (!(mark->flags & FSNOTIFY_MARK_FLAG_HAS_FSID))
>                         continue;
>                 fsid = FANOTIFY_MARK(mark)->fsid;
> -               if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
> +               if (!(mark->flags & FSNOTIFY_MARK_FLAG_WEAK_FSID) &&
> +                   WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
>                         continue;
>                 return fsid;
>         }
> @@ -933,12 +928,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
>                         return 0;
>         }
>
> -       if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) {
> +       if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS))
>                 fsid = fanotify_get_fsid(iter_info);
> -               /* Racing with mark destruction or creation? */
> -               if (!fsid.val[0] && !fsid.val[1])
> -                       return 0;
> -       }
>
>         event = fanotify_alloc_event(group, mask, data, data_type, dir,
>                                      file_name, &fsid, match_mask);
> diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> index 2847fa564298..9c97ae638ac5 100644
> --- a/fs/notify/fanotify/fanotify.h
> +++ b/fs/notify/fanotify/fanotify.h
> @@ -504,6 +504,12 @@ static inline __kernel_fsid_t *fanotify_mark_fsid(struct fsnotify_mark *mark)
>         return &FANOTIFY_MARK(mark)->fsid;
>  }
>
> +static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1,
> +                                      __kernel_fsid_t *fsid2)
> +{
> +       return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1];
> +}
> +
>  static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
>  {
>         unsigned int mflags = 0;
> diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
> index e3d836d4d156..1cc68ea56c17 100644
> --- a/fs/notify/fanotify/fanotify_user.c
> +++ b/fs/notify/fanotify/fanotify_user.c
> @@ -23,7 +23,7 @@
>
>  #include <asm/ioctls.h>
>
> -#include "../../mount.h"
> +#include "../fsnotify.h"
>  #include "../fdinfo.h"
>  #include "fanotify.h"
>
> @@ -1192,11 +1192,68 @@ static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
>         return recalc;
>  }
>
> +struct fan_fsid {
> +       struct super_block *sb;
> +       __kernel_fsid_t id;
> +       bool weak;
> +};
> +
> +static int fanotify_check_mark_fsid(struct fsnotify_group *group,
> +                                   struct fsnotify_mark *mark,
> +                                   struct fan_fsid *fsid)
> +{
> +       struct fsnotify_mark_connector *conn;
> +       struct fsnotify_mark *old;
> +       struct super_block *old_sb = NULL;
> +
> +       *fanotify_mark_fsid(mark) = fsid->id;
> +       mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID;
> +       if (fsid->weak)
> +               mark->flags |= FSNOTIFY_MARK_FLAG_WEAK_FSID;
> +
> +       /* First mark added will determine if group is single or multi fsid */
> +       if (list_empty(&group->marks_list))
> +               return 0;
> +
> +       /* Find sb of an existing mark */
> +       list_for_each_entry(old, &group->marks_list, g_list) {
> +               conn = READ_ONCE(old->connector);
> +               if (!conn)
> +                       continue;
> +               old_sb = fsnotify_connector_sb(conn);
> +               if (old_sb)
> +                       break;
> +       }
> +
> +       /* Only detached marks left? */
> +       if (!old_sb)
> +               return 0;
> +
> +       /* Do not allow mixing of marks with weak and strong fsid */
> +       if ((mark->flags ^ old->flags) & FSNOTIFY_MARK_FLAG_WEAK_FSID)
> +               return -EXDEV;
> +
> +       /* Allow mixing of marks with strong fsid from different fs */
> +       if (!fsid->weak)
> +               return 0;
> +
> +       /* Do not allow mixing marks with weak fsid from different fs */
> +       if (old_sb != fsid->sb)
> +               return -EXDEV;
> +
> +       /* Do not allow mixing marks from different btrfs sub-volumes */
> +       if (!fanotify_fsid_equal(fanotify_mark_fsid(old),
> +                                fanotify_mark_fsid(mark)))
> +               return -EXDEV;
> +
> +       return 0;
> +}
> +
>  static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
>                                                    fsnotify_connp_t *connp,
>                                                    unsigned int obj_type,
>                                                    unsigned int fan_flags,
> -                                                  __kernel_fsid_t *fsid)
> +                                                  struct fan_fsid *fsid)
>  {
>         struct ucounts *ucounts = group->fanotify_data.ucounts;
>         struct fanotify_mark *fan_mark;
> @@ -1225,8 +1282,9 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
>
>         /* Cache fsid of filesystem containing the marked object */
>         if (fsid) {
> -               fan_mark->fsid = *fsid;
> -               mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID;
> +               ret = fanotify_check_mark_fsid(group, mark, fsid);
> +               if (ret)

OOPS, missed fsnotify_put_mark(mark);
better add a goto target out_put_mark as this is the second case now.

> +                       goto out_dec_ucounts;
>         } else {
>                 fan_mark->fsid.val[0] = fan_mark->fsid.val[1] = 0;
>         }
> @@ -1289,7 +1347,7 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
>  static int fanotify_add_mark(struct fsnotify_group *group,
>                              fsnotify_connp_t *connp, unsigned int obj_type,
>                              __u32 mask, unsigned int fan_flags,
> -                            __kernel_fsid_t *fsid)
> +                            struct fan_fsid *fsid)
>  {
>         struct fsnotify_mark *fsn_mark;
>         bool recalc;
> @@ -1337,7 +1395,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
>
>  static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
>                                       struct vfsmount *mnt, __u32 mask,
> -                                     unsigned int flags, __kernel_fsid_t *fsid)
> +                                     unsigned int flags, struct fan_fsid *fsid)
>  {
>         return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
>                                  FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid);
> @@ -1345,7 +1403,7 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
>
>  static int fanotify_add_sb_mark(struct fsnotify_group *group,
>                                 struct super_block *sb, __u32 mask,
> -                               unsigned int flags, __kernel_fsid_t *fsid)
> +                               unsigned int flags, struct fan_fsid *fsid)
>  {
>         return fanotify_add_mark(group, &sb->s_fsnotify_marks,
>                                  FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid);
> @@ -1353,7 +1411,7 @@ static int fanotify_add_sb_mark(struct fsnotify_group *group,
>
>  static int fanotify_add_inode_mark(struct fsnotify_group *group,
>                                    struct inode *inode, __u32 mask,
> -                                  unsigned int flags, __kernel_fsid_t *fsid)
> +                                  unsigned int flags, struct fan_fsid *fsid)
>  {
>         pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
>
> @@ -1564,20 +1622,25 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
>         return fd;
>  }
>
> -static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
> +static int fanotify_test_fsid(struct dentry *dentry, unsigned int flags,
> +                             struct fan_fsid *fsid)
>  {
> +       unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
>         __kernel_fsid_t root_fsid;
>         int err;
>
>         /*
>          * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse).
>          */
> -       err = vfs_get_fsid(dentry, fsid);
> +       err = vfs_get_fsid(dentry, &fsid->id);
>         if (err)
>                 return err;
>
> -       if (!fsid->val[0] && !fsid->val[1])
> -               return -ENODEV;
> +       /* Allow weak fsid when marking inodes */
> +       fsid->sb = dentry->d_sb;
> +       fsid->weak = mark_type == FAN_MARK_INODE;
> +       if (!fsid->id.val[0] && !fsid->id.val[1])
> +               return fsid->weak ? 0 : -ENODEV;
>
>         /*
>          * Make sure dentry is not of a filesystem subvolume (e.g. btrfs)
> @@ -1587,10 +1650,10 @@ static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
>         if (err)
>                 return err;
>
> -       if (root_fsid.val[0] != fsid->val[0] ||
> -           root_fsid.val[1] != fsid->val[1])
> -               return -EXDEV;
> +       if (!fanotify_fsid_equal(&root_fsid, &fsid->id))
> +               return fsid->weak ? 0 : -EXDEV;
>
> +       fsid->weak = false;
>         return 0;
>  }
>
> @@ -1675,7 +1738,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
>         struct fsnotify_group *group;
>         struct fd f;
>         struct path path;
> -       __kernel_fsid_t __fsid, *fsid = NULL;
> +       struct fan_fsid __fsid, *fsid = NULL;
>         u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
>         unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
>         unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
> @@ -1827,7 +1890,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
>         }
>
>         if (fid_mode) {
> -               ret = fanotify_test_fsid(path.dentry, &__fsid);
> +               ret = fanotify_test_fsid(path.dentry, flags, &__fsid);
>                 if (ret)
>                         goto path_put_and_out;
>
> diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
> index a80b525ca653..7f63be5ca0f1 100644
> --- a/include/linux/fsnotify_backend.h
> +++ b/include/linux/fsnotify_backend.h
> @@ -529,6 +529,7 @@ struct fsnotify_mark {
>  #define FSNOTIFY_MARK_FLAG_NO_IREF             0x0200
>  #define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS    0x0400
>  #define FSNOTIFY_MARK_FLAG_HAS_FSID            0x0800
> +#define FSNOTIFY_MARK_FLAG_WEAK_FSID           0x1000
>         unsigned int flags;             /* flags [mark->lock] */
>  };
>
> --
> 2.34.1
>
Christian Brauner Nov. 20, 2023, 3:48 p.m. UTC | #2
> OOPS, missed fsnotify_put_mark(mark);
> better add a goto target out_put_mark as this is the second case now.

I want to point out that going forward we should be able to make use of
scoped cleanup macros. Please see include/linux/cleanup.h. I think we
should start making liberal use of this. I know that Peter Ziljstra is
already doing so for kernel/sched/.
Amir Goldstein Nov. 20, 2023, 4:20 p.m. UTC | #3
On Mon, Nov 20, 2023 at 5:48 PM Christian Brauner <brauner@kernel.org> wrote:
>
> > OOPS, missed fsnotify_put_mark(mark);
> > better add a goto target out_put_mark as this is the second case now.
>
> I want to point out that going forward we should be able to make use of
> scoped cleanup macros. Please see include/linux/cleanup.h. I think we
> should start making liberal use of this. I know that Peter Ziljstra is
> already doing so for kernel/sched/.

Yeh, I like them, but not sure if I would use them in this case.

Thanks,
Amir.
Amir Goldstein Nov. 21, 2023, 1:33 p.m. UTC | #4
On Mon, Nov 20, 2023 at 9:42 AM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Sat, Nov 18, 2023 at 8:30 PM Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > So far, fanotify returns -ENODEV or -EXDEV when trying to set a mark
> > on a filesystem with a "weak" fsid, namely, zero fsid (e.g. fuse), or
> > non-uniform fsid (e.g. btrfs non-root subvol).
> >
> > When group is watching inodes all from the same filesystem (or subvol),
> > allow adding inode marks with "weak" fsid, because there is no ambiguity
> > regarding which filesystem reports the event.
> >
> > The first mark added to a group determines if this group is single or
> > multi filesystem, depending on the fsid at the path of the added mark.
> >
> > If the first mark added has a "strong" fsid, marks with "weak" fsid
> > cannot be added and vice versa.
> >
> > If the first mark added has a "weak" fsid, following marks must have
> > the same "weak" fsid and the same sb as the first mark.
> >
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > ---
> >  fs/notify/fanotify/fanotify.c      | 15 +----
> >  fs/notify/fanotify/fanotify.h      |  6 ++
> >  fs/notify/fanotify/fanotify_user.c | 97 ++++++++++++++++++++++++------
> >  include/linux/fsnotify_backend.h   |  1 +
> >  4 files changed, 90 insertions(+), 29 deletions(-)
> >
> > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> > index aff1ab3c32aa..1e4def21811e 100644
> > --- a/fs/notify/fanotify/fanotify.c
> > +++ b/fs/notify/fanotify/fanotify.c
> > @@ -29,12 +29,6 @@ static unsigned int fanotify_hash_path(const struct path *path)
> >                 hash_ptr(path->mnt, FANOTIFY_EVENT_HASH_BITS);
> >  }
> >
> > -static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1,
> > -                                      __kernel_fsid_t *fsid2)
> > -{
> > -       return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1];
> > -}
> > -
> >  static unsigned int fanotify_hash_fsid(__kernel_fsid_t *fsid)
> >  {
> >         return hash_32(fsid->val[0], FANOTIFY_EVENT_HASH_BITS) ^
> > @@ -851,7 +845,8 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
> >                 if (!(mark->flags & FSNOTIFY_MARK_FLAG_HAS_FSID))
> >                         continue;
> >                 fsid = FANOTIFY_MARK(mark)->fsid;
> > -               if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
> > +               if (!(mark->flags & FSNOTIFY_MARK_FLAG_WEAK_FSID) &&
> > +                   WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
> >                         continue;
> >                 return fsid;
> >         }
> > @@ -933,12 +928,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
> >                         return 0;
> >         }
> >
> > -       if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) {
> > +       if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS))
> >                 fsid = fanotify_get_fsid(iter_info);
> > -               /* Racing with mark destruction or creation? */
> > -               if (!fsid.val[0] && !fsid.val[1])
> > -                       return 0;
> > -       }
> >
> >         event = fanotify_alloc_event(group, mask, data, data_type, dir,
> >                                      file_name, &fsid, match_mask);
> > diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> > index 2847fa564298..9c97ae638ac5 100644
> > --- a/fs/notify/fanotify/fanotify.h
> > +++ b/fs/notify/fanotify/fanotify.h
> > @@ -504,6 +504,12 @@ static inline __kernel_fsid_t *fanotify_mark_fsid(struct fsnotify_mark *mark)
> >         return &FANOTIFY_MARK(mark)->fsid;
> >  }
> >
> > +static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1,
> > +                                      __kernel_fsid_t *fsid2)
> > +{
> > +       return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1];
> > +}
> > +
> >  static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
> >  {
> >         unsigned int mflags = 0;
> > diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
> > index e3d836d4d156..1cc68ea56c17 100644
> > --- a/fs/notify/fanotify/fanotify_user.c
> > +++ b/fs/notify/fanotify/fanotify_user.c
> > @@ -23,7 +23,7 @@
> >
> >  #include <asm/ioctls.h>
> >
> > -#include "../../mount.h"
> > +#include "../fsnotify.h"
> >  #include "../fdinfo.h"
> >  #include "fanotify.h"
> >
> > @@ -1192,11 +1192,68 @@ static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
> >         return recalc;
> >  }
> >
> > +struct fan_fsid {
> > +       struct super_block *sb;
> > +       __kernel_fsid_t id;
> > +       bool weak;
> > +};
> > +
> > +static int fanotify_check_mark_fsid(struct fsnotify_group *group,
> > +                                   struct fsnotify_mark *mark,
> > +                                   struct fan_fsid *fsid)
> > +{
> > +       struct fsnotify_mark_connector *conn;
> > +       struct fsnotify_mark *old;
> > +       struct super_block *old_sb = NULL;
> > +
> > +       *fanotify_mark_fsid(mark) = fsid->id;
> > +       mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID;
> > +       if (fsid->weak)
> > +               mark->flags |= FSNOTIFY_MARK_FLAG_WEAK_FSID;
> > +
> > +       /* First mark added will determine if group is single or multi fsid */
> > +       if (list_empty(&group->marks_list))
> > +               return 0;
> > +
> > +       /* Find sb of an existing mark */
> > +       list_for_each_entry(old, &group->marks_list, g_list) {
> > +               conn = READ_ONCE(old->connector);
> > +               if (!conn)
> > +                       continue;
> > +               old_sb = fsnotify_connector_sb(conn);
> > +               if (old_sb)
> > +                       break;
> > +       }
> > +
> > +       /* Only detached marks left? */
> > +       if (!old_sb)
> > +               return 0;
> > +
> > +       /* Do not allow mixing of marks with weak and strong fsid */
> > +       if ((mark->flags ^ old->flags) & FSNOTIFY_MARK_FLAG_WEAK_FSID)
> > +               return -EXDEV;
> > +
> > +       /* Allow mixing of marks with strong fsid from different fs */
> > +       if (!fsid->weak)
> > +               return 0;
> > +
> > +       /* Do not allow mixing marks with weak fsid from different fs */
> > +       if (old_sb != fsid->sb)
> > +               return -EXDEV;
> > +
> > +       /* Do not allow mixing marks from different btrfs sub-volumes */
> > +       if (!fanotify_fsid_equal(fanotify_mark_fsid(old),
> > +                                fanotify_mark_fsid(mark)))
> > +               return -EXDEV;
> > +
> > +       return 0;
> > +}
> > +
> >  static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
> >                                                    fsnotify_connp_t *connp,
> >                                                    unsigned int obj_type,
> >                                                    unsigned int fan_flags,
> > -                                                  __kernel_fsid_t *fsid)
> > +                                                  struct fan_fsid *fsid)
> >  {
> >         struct ucounts *ucounts = group->fanotify_data.ucounts;
> >         struct fanotify_mark *fan_mark;
> > @@ -1225,8 +1282,9 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
> >
> >         /* Cache fsid of filesystem containing the marked object */
> >         if (fsid) {
> > -               fan_mark->fsid = *fsid;
> > -               mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID;
> > +               ret = fanotify_check_mark_fsid(group, mark, fsid);
> > +               if (ret)
>
> OOPS, missed fsnotify_put_mark(mark);
> better add a goto target out_put_mark as this is the second case now.

FYI, I pushed the fix to:
https://github.com/amir73il/linux/commits/fanotify_fsid

Let me know if you want me to post v2 for this.

Thanks,
Amir.

>
> > +                       goto out_dec_ucounts;
> >         } else {
> >                 fan_mark->fsid.val[0] = fan_mark->fsid.val[1] = 0;
> >         }
> > @@ -1289,7 +1347,7 @@ static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
> >  static int fanotify_add_mark(struct fsnotify_group *group,
> >                              fsnotify_connp_t *connp, unsigned int obj_type,
> >                              __u32 mask, unsigned int fan_flags,
> > -                            __kernel_fsid_t *fsid)
> > +                            struct fan_fsid *fsid)
> >  {
> >         struct fsnotify_mark *fsn_mark;
> >         bool recalc;
> > @@ -1337,7 +1395,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
> >
> >  static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
> >                                       struct vfsmount *mnt, __u32 mask,
> > -                                     unsigned int flags, __kernel_fsid_t *fsid)
> > +                                     unsigned int flags, struct fan_fsid *fsid)
> >  {
> >         return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
> >                                  FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid);
> > @@ -1345,7 +1403,7 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
> >
> >  static int fanotify_add_sb_mark(struct fsnotify_group *group,
> >                                 struct super_block *sb, __u32 mask,
> > -                               unsigned int flags, __kernel_fsid_t *fsid)
> > +                               unsigned int flags, struct fan_fsid *fsid)
> >  {
> >         return fanotify_add_mark(group, &sb->s_fsnotify_marks,
> >                                  FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid);
> > @@ -1353,7 +1411,7 @@ static int fanotify_add_sb_mark(struct fsnotify_group *group,
> >
> >  static int fanotify_add_inode_mark(struct fsnotify_group *group,
> >                                    struct inode *inode, __u32 mask,
> > -                                  unsigned int flags, __kernel_fsid_t *fsid)
> > +                                  unsigned int flags, struct fan_fsid *fsid)
> >  {
> >         pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
> >
> > @@ -1564,20 +1622,25 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
> >         return fd;
> >  }
> >
> > -static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
> > +static int fanotify_test_fsid(struct dentry *dentry, unsigned int flags,
> > +                             struct fan_fsid *fsid)
> >  {
> > +       unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
> >         __kernel_fsid_t root_fsid;
> >         int err;
> >
> >         /*
> >          * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse).
> >          */
> > -       err = vfs_get_fsid(dentry, fsid);
> > +       err = vfs_get_fsid(dentry, &fsid->id);
> >         if (err)
> >                 return err;
> >
> > -       if (!fsid->val[0] && !fsid->val[1])
> > -               return -ENODEV;
> > +       /* Allow weak fsid when marking inodes */
> > +       fsid->sb = dentry->d_sb;
> > +       fsid->weak = mark_type == FAN_MARK_INODE;
> > +       if (!fsid->id.val[0] && !fsid->id.val[1])
> > +               return fsid->weak ? 0 : -ENODEV;
> >
> >         /*
> >          * Make sure dentry is not of a filesystem subvolume (e.g. btrfs)
> > @@ -1587,10 +1650,10 @@ static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
> >         if (err)
> >                 return err;
> >
> > -       if (root_fsid.val[0] != fsid->val[0] ||
> > -           root_fsid.val[1] != fsid->val[1])
> > -               return -EXDEV;
> > +       if (!fanotify_fsid_equal(&root_fsid, &fsid->id))
> > +               return fsid->weak ? 0 : -EXDEV;
> >
> > +       fsid->weak = false;
> >         return 0;
> >  }
> >
> > @@ -1675,7 +1738,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
> >         struct fsnotify_group *group;
> >         struct fd f;
> >         struct path path;
> > -       __kernel_fsid_t __fsid, *fsid = NULL;
> > +       struct fan_fsid __fsid, *fsid = NULL;
> >         u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
> >         unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
> >         unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
> > @@ -1827,7 +1890,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
> >         }
> >
> >         if (fid_mode) {
> > -               ret = fanotify_test_fsid(path.dentry, &__fsid);
> > +               ret = fanotify_test_fsid(path.dentry, flags, &__fsid);
> >                 if (ret)
> >                         goto path_put_and_out;
> >
> > diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
> > index a80b525ca653..7f63be5ca0f1 100644
> > --- a/include/linux/fsnotify_backend.h
> > +++ b/include/linux/fsnotify_backend.h
> > @@ -529,6 +529,7 @@ struct fsnotify_mark {
> >  #define FSNOTIFY_MARK_FLAG_NO_IREF             0x0200
> >  #define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS    0x0400
> >  #define FSNOTIFY_MARK_FLAG_HAS_FSID            0x0800
> > +#define FSNOTIFY_MARK_FLAG_WEAK_FSID           0x1000
> >         unsigned int flags;             /* flags [mark->lock] */
> >  };
> >
> > --
> > 2.34.1
> >
Amir Goldstein Nov. 30, 2023, 12:42 p.m. UTC | #5
On Tue, Nov 21, 2023 at 3:33 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Mon, Nov 20, 2023 at 9:42 AM Amir Goldstein <amir73il@gmail.com> wrote:
> >
> > On Sat, Nov 18, 2023 at 8:30 PM Amir Goldstein <amir73il@gmail.com> wrote:
> > >
> > > So far, fanotify returns -ENODEV or -EXDEV when trying to set a mark
> > > on a filesystem with a "weak" fsid, namely, zero fsid (e.g. fuse), or
> > > non-uniform fsid (e.g. btrfs non-root subvol).
> > >
> > > When group is watching inodes all from the same filesystem (or subvol),
> > > allow adding inode marks with "weak" fsid, because there is no ambiguity
> > > regarding which filesystem reports the event.
> > >
> > > The first mark added to a group determines if this group is single or
> > > multi filesystem, depending on the fsid at the path of the added mark.
> > >
> > > If the first mark added has a "strong" fsid, marks with "weak" fsid
> > > cannot be added and vice versa.
> > >
> > > If the first mark added has a "weak" fsid, following marks must have
> > > the same "weak" fsid and the same sb as the first mark.
> > >
> > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > ---
> > >  fs/notify/fanotify/fanotify.c      | 15 +----
> > >  fs/notify/fanotify/fanotify.h      |  6 ++
> > >  fs/notify/fanotify/fanotify_user.c | 97 ++++++++++++++++++++++++------
> > >  include/linux/fsnotify_backend.h   |  1 +
> > >  4 files changed, 90 insertions(+), 29 deletions(-)
> > >
> > > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> > > index aff1ab3c32aa..1e4def21811e 100644
> > > --- a/fs/notify/fanotify/fanotify.c
> > > +++ b/fs/notify/fanotify/fanotify.c
> > > @@ -29,12 +29,6 @@ static unsigned int fanotify_hash_path(const struct path *path)
> > >                 hash_ptr(path->mnt, FANOTIFY_EVENT_HASH_BITS);
> > >  }
> > >
> > > -static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1,
> > > -                                      __kernel_fsid_t *fsid2)
> > > -{
> > > -       return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1];
> > > -}
> > > -
> > >  static unsigned int fanotify_hash_fsid(__kernel_fsid_t *fsid)
> > >  {
> > >         return hash_32(fsid->val[0], FANOTIFY_EVENT_HASH_BITS) ^
> > > @@ -851,7 +845,8 @@ static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
> > >                 if (!(mark->flags & FSNOTIFY_MARK_FLAG_HAS_FSID))
> > >                         continue;
> > >                 fsid = FANOTIFY_MARK(mark)->fsid;
> > > -               if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
> > > +               if (!(mark->flags & FSNOTIFY_MARK_FLAG_WEAK_FSID) &&
> > > +                   WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
> > >                         continue;
> > >                 return fsid;
> > >         }
> > > @@ -933,12 +928,8 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
> > >                         return 0;
> > >         }
> > >
> > > -       if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) {
> > > +       if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS))
> > >                 fsid = fanotify_get_fsid(iter_info);
> > > -               /* Racing with mark destruction or creation? */
> > > -               if (!fsid.val[0] && !fsid.val[1])
> > > -                       return 0;
> > > -       }
> > >
> > >         event = fanotify_alloc_event(group, mask, data, data_type, dir,
> > >                                      file_name, &fsid, match_mask);
> > > diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> > > index 2847fa564298..9c97ae638ac5 100644
> > > --- a/fs/notify/fanotify/fanotify.h
> > > +++ b/fs/notify/fanotify/fanotify.h
> > > @@ -504,6 +504,12 @@ static inline __kernel_fsid_t *fanotify_mark_fsid(struct fsnotify_mark *mark)
> > >         return &FANOTIFY_MARK(mark)->fsid;
> > >  }
> > >
> > > +static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1,
> > > +                                      __kernel_fsid_t *fsid2)
> > > +{
> > > +       return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1];
> > > +}
> > > +
> > >  static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
> > >  {
> > >         unsigned int mflags = 0;
> > > diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
> > > index e3d836d4d156..1cc68ea56c17 100644
> > > --- a/fs/notify/fanotify/fanotify_user.c
> > > +++ b/fs/notify/fanotify/fanotify_user.c
> > > @@ -23,7 +23,7 @@
> > >
> > >  #include <asm/ioctls.h>
> > >
> > > -#include "../../mount.h"
> > > +#include "../fsnotify.h"
> > >  #include "../fdinfo.h"
> > >  #include "fanotify.h"
> > >
> > > @@ -1192,11 +1192,68 @@ static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
> > >         return recalc;
> > >  }
> > >
> > > +struct fan_fsid {
> > > +       struct super_block *sb;
> > > +       __kernel_fsid_t id;
> > > +       bool weak;
> > > +};
> > > +
> > > +static int fanotify_check_mark_fsid(struct fsnotify_group *group,
> > > +                                   struct fsnotify_mark *mark,
> > > +                                   struct fan_fsid *fsid)
> > > +{
> > > +       struct fsnotify_mark_connector *conn;
> > > +       struct fsnotify_mark *old;
> > > +       struct super_block *old_sb = NULL;
> > > +
> > > +       *fanotify_mark_fsid(mark) = fsid->id;
> > > +       mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID;
> > > +       if (fsid->weak)
> > > +               mark->flags |= FSNOTIFY_MARK_FLAG_WEAK_FSID;
> > > +
> > > +       /* First mark added will determine if group is single or multi fsid */
> > > +       if (list_empty(&group->marks_list))
> > > +               return 0;
> > > +
> > > +       /* Find sb of an existing mark */
> > > +       list_for_each_entry(old, &group->marks_list, g_list) {
> > > +               conn = READ_ONCE(old->connector);
> > > +               if (!conn)
> > > +                       continue;
> > > +               old_sb = fsnotify_connector_sb(conn);
> > > +               if (old_sb)
> > > +                       break;
> > > +       }
> > > +
> > > +       /* Only detached marks left? */
> > > +       if (!old_sb)
> > > +               return 0;
> > > +
> > > +       /* Do not allow mixing of marks with weak and strong fsid */
> > > +       if ((mark->flags ^ old->flags) & FSNOTIFY_MARK_FLAG_WEAK_FSID)
> > > +               return -EXDEV;
> > > +
> > > +       /* Allow mixing of marks with strong fsid from different fs */
> > > +       if (!fsid->weak)
> > > +               return 0;
> > > +
> > > +       /* Do not allow mixing marks with weak fsid from different fs */
> > > +       if (old_sb != fsid->sb)
> > > +               return -EXDEV;
> > > +
> > > +       /* Do not allow mixing marks from different btrfs sub-volumes */
> > > +       if (!fanotify_fsid_equal(fanotify_mark_fsid(old),
> > > +                                fanotify_mark_fsid(mark)))
> > > +               return -EXDEV;
> > > +
> > > +       return 0;
> > > +}
> > > +
> > >  static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
> > >                                                    fsnotify_connp_t *connp,
> > >                                                    unsigned int obj_type,
> > >                                                    unsigned int fan_flags,
> > > -                                                  __kernel_fsid_t *fsid)
> > > +                                                  struct fan_fsid *fsid)
> > >  {
> > >         struct ucounts *ucounts = group->fanotify_data.ucounts;
> > >         struct fanotify_mark *fan_mark;
> > > @@ -1225,8 +1282,9 @@ static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
> > >
> > >         /* Cache fsid of filesystem containing the marked object */
> > >         if (fsid) {
> > > -               fan_mark->fsid = *fsid;
> > > -               mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID;
> > > +               ret = fanotify_check_mark_fsid(group, mark, fsid);
> > > +               if (ret)
> >
> > OOPS, missed fsnotify_put_mark(mark);
> > better add a goto target out_put_mark as this is the second case now.
>
> FYI, I pushed the fix to:
> https://github.com/amir73il/linux/commits/fanotify_fsid
>
> Let me know if you want me to post v2 for this.
>

Hi Jan,

Ping.

Reminder, I have LTP tests for testing fs with "weak" fsid [2].
After your LTP patches are merged, I will rebase them.

The way that I tested fs with "weak" fsid is that LTP tests
the ntfs-3g FUSE filesystem with .all_filesystems = 1.

With this work, the tests started to run on FUSE and skip
the test cases of sb/mount marks.

Thanks,
Amir.

[2] https://github.com/amir73il/tlp/commits/fanotify_fsid
Jan Kara Nov. 30, 2023, 3:47 p.m. UTC | #6
On Sat 18-11-23 20:30:18, Amir Goldstein wrote:
> So far, fanotify returns -ENODEV or -EXDEV when trying to set a mark
> on a filesystem with a "weak" fsid, namely, zero fsid (e.g. fuse), or
> non-uniform fsid (e.g. btrfs non-root subvol).
> 
> When group is watching inodes all from the same filesystem (or subvol),
> allow adding inode marks with "weak" fsid, because there is no ambiguity
> regarding which filesystem reports the event.
> 
> The first mark added to a group determines if this group is single or
> multi filesystem, depending on the fsid at the path of the added mark.
> 
> If the first mark added has a "strong" fsid, marks with "weak" fsid
> cannot be added and vice versa.
> 
> If the first mark added has a "weak" fsid, following marks must have
> the same "weak" fsid and the same sb as the first mark.
> 
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>

Looks mostly good to me, some small comments below.

> @@ -1192,11 +1192,68 @@ static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
>  	return recalc;
>  }
>  
> +struct fan_fsid {
> +	struct super_block *sb;
> +	__kernel_fsid_t id;
> +	bool weak;
> +};
> +
> +static int fanotify_check_mark_fsid(struct fsnotify_group *group,
> +				    struct fsnotify_mark *mark,
> +				    struct fan_fsid *fsid)

I'd call this fanotify_set_mark_fsid() because that's what it does and
as part of that it also checks whether it can actually set the fsid...

> @@ -1564,20 +1622,25 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
>  	return fd;
>  }
>  
> -static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
> +static int fanotify_test_fsid(struct dentry *dentry, unsigned int flags,
> +			      struct fan_fsid *fsid)
>  {
> +	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
>  	__kernel_fsid_t root_fsid;
>  	int err;
>  
>  	/*
>  	 * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse).
>  	 */
> -	err = vfs_get_fsid(dentry, fsid);
> +	err = vfs_get_fsid(dentry, &fsid->id);
>  	if (err)
>  		return err;
>  
> -	if (!fsid->val[0] && !fsid->val[1])
> -		return -ENODEV;
> +	/* Allow weak fsid when marking inodes */
> +	fsid->sb = dentry->d_sb;
> +	fsid->weak = mark_type == FAN_MARK_INODE;
> +	if (!fsid->id.val[0] && !fsid->id.val[1])
> +		return fsid->weak ? 0 : -ENODEV;
>  
>  	/*
>  	 * Make sure dentry is not of a filesystem subvolume (e.g. btrfs)
> @@ -1587,10 +1650,10 @@ static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
>  	if (err)
>  		return err;
>  
> -	if (root_fsid.val[0] != fsid->val[0] ||
> -	    root_fsid.val[1] != fsid->val[1])
> -		return -EXDEV;
> +	if (!fanotify_fsid_equal(&root_fsid, &fsid->id))
> +		return fsid->weak ? 0 : -EXDEV;
>  
> +	fsid->weak = false;
>  	return 0;
>  }

So the handling of 'weak' confuses me here as it combines determining
whether fsid is weak with determining whether we accept it or not. Maybe my
brain is just fried... Can we maybe simplify to something like:

	fsid->sb = dentry->d_sb;
	if (!fsid->id.val[0] && !fsid->id.val[1]) {
		fsid->weak = true;
		goto check;
	}

	... fetch root_fsid ...

	if (!fanotify_fsid_equal(&root_fsid, &fsid->id))
		fsid->weak = true;

check:
	/* Allow weak fsids only for inode marks... */
	if (fsid->weak && mark_type != FAN_MARK_INODE)
		return -EXDEV;
	return 0;

This is how I understand the logic from your description but I'm not 100%
sure this is equivalent to your code above ;).

Thanks!

								Honza
Amir Goldstein Nov. 30, 2023, 4:28 p.m. UTC | #7
On Thu, Nov 30, 2023 at 5:47 PM Jan Kara <jack@suse.cz> wrote:
>
> On Sat 18-11-23 20:30:18, Amir Goldstein wrote:
> > So far, fanotify returns -ENODEV or -EXDEV when trying to set a mark
> > on a filesystem with a "weak" fsid, namely, zero fsid (e.g. fuse), or
> > non-uniform fsid (e.g. btrfs non-root subvol).
> >
> > When group is watching inodes all from the same filesystem (or subvol),
> > allow adding inode marks with "weak" fsid, because there is no ambiguity
> > regarding which filesystem reports the event.
> >
> > The first mark added to a group determines if this group is single or
> > multi filesystem, depending on the fsid at the path of the added mark.
> >
> > If the first mark added has a "strong" fsid, marks with "weak" fsid
> > cannot be added and vice versa.
> >
> > If the first mark added has a "weak" fsid, following marks must have
> > the same "weak" fsid and the same sb as the first mark.
> >
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
>
> Looks mostly good to me, some small comments below.
>
> > @@ -1192,11 +1192,68 @@ static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
> >       return recalc;
> >  }
> >
> > +struct fan_fsid {
> > +     struct super_block *sb;
> > +     __kernel_fsid_t id;
> > +     bool weak;
> > +};
> > +
> > +static int fanotify_check_mark_fsid(struct fsnotify_group *group,
> > +                                 struct fsnotify_mark *mark,
> > +                                 struct fan_fsid *fsid)
>
> I'd call this fanotify_set_mark_fsid() because that's what it does and
> as part of that it also checks whether it can actually set the fsid...

ok.

>
> > @@ -1564,20 +1622,25 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
> >       return fd;
> >  }
> >
> > -static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
> > +static int fanotify_test_fsid(struct dentry *dentry, unsigned int flags,
> > +                           struct fan_fsid *fsid)
> >  {
> > +     unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
> >       __kernel_fsid_t root_fsid;
> >       int err;
> >
> >       /*
> >        * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse).
> >        */
> > -     err = vfs_get_fsid(dentry, fsid);
> > +     err = vfs_get_fsid(dentry, &fsid->id);
> >       if (err)
> >               return err;
> >
> > -     if (!fsid->val[0] && !fsid->val[1])
> > -             return -ENODEV;
> > +     /* Allow weak fsid when marking inodes */
> > +     fsid->sb = dentry->d_sb;
> > +     fsid->weak = mark_type == FAN_MARK_INODE;
> > +     if (!fsid->id.val[0] && !fsid->id.val[1])
> > +             return fsid->weak ? 0 : -ENODEV;
> >
> >       /*
> >        * Make sure dentry is not of a filesystem subvolume (e.g. btrfs)
> > @@ -1587,10 +1650,10 @@ static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
> >       if (err)
> >               return err;
> >
> > -     if (root_fsid.val[0] != fsid->val[0] ||
> > -         root_fsid.val[1] != fsid->val[1])
> > -             return -EXDEV;
> > +     if (!fanotify_fsid_equal(&root_fsid, &fsid->id))
> > +             return fsid->weak ? 0 : -EXDEV;
> >
> > +     fsid->weak = false;
> >       return 0;
> >  }
>
> So the handling of 'weak' confuses me here as it combines determining
> whether fsid is weak with determining whether we accept it or not. Maybe my
> brain is just fried... Can we maybe simplify to something like:
>
>         fsid->sb = dentry->d_sb;
>         if (!fsid->id.val[0] && !fsid->id.val[1]) {
>                 fsid->weak = true;
>                 goto check;
>         }
>
>         ... fetch root_fsid ...
>
>         if (!fanotify_fsid_equal(&root_fsid, &fsid->id))
>                 fsid->weak = true;
>
> check:
>         /* Allow weak fsids only for inode marks... */
>         if (fsid->weak && mark_type != FAN_MARK_INODE)
>                 return -EXDEV;
>         return 0;
>
> This is how I understand the logic from your description but I'm not 100%
> sure this is equivalent to your code above ;).

Almost.
The return value for FUSE + sb mark is ENODEV.

The code below should work and be readable.

Thanks,
Amir.

        fsid->sb = dentry->d_sb;
        if (!fsid->id.val[0] && !fsid->id.val[1]) {
                err = -ENODEV;
                goto weak;
        }

... fetch root_fsid ...

        if (!fanotify_fsid_equal(&root_fsid, &fsid->id)) {
                err = -EXDEV;
                goto weak;
        }

        return 0;

weak:
        /* Allow weak fsid when marking inodes */
        fsid->weak = true;
        return (mark_type == FAN_MARK_INODE) ? 0 : err;
}
diff mbox series

Patch

diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index aff1ab3c32aa..1e4def21811e 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -29,12 +29,6 @@  static unsigned int fanotify_hash_path(const struct path *path)
 		hash_ptr(path->mnt, FANOTIFY_EVENT_HASH_BITS);
 }
 
-static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1,
-				       __kernel_fsid_t *fsid2)
-{
-	return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1];
-}
-
 static unsigned int fanotify_hash_fsid(__kernel_fsid_t *fsid)
 {
 	return hash_32(fsid->val[0], FANOTIFY_EVENT_HASH_BITS) ^
@@ -851,7 +845,8 @@  static __kernel_fsid_t fanotify_get_fsid(struct fsnotify_iter_info *iter_info)
 		if (!(mark->flags & FSNOTIFY_MARK_FLAG_HAS_FSID))
 			continue;
 		fsid = FANOTIFY_MARK(mark)->fsid;
-		if (WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
+		if (!(mark->flags & FSNOTIFY_MARK_FLAG_WEAK_FSID) &&
+		    WARN_ON_ONCE(!fsid.val[0] && !fsid.val[1]))
 			continue;
 		return fsid;
 	}
@@ -933,12 +928,8 @@  static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
 			return 0;
 	}
 
-	if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) {
+	if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS))
 		fsid = fanotify_get_fsid(iter_info);
-		/* Racing with mark destruction or creation? */
-		if (!fsid.val[0] && !fsid.val[1])
-			return 0;
-	}
 
 	event = fanotify_alloc_event(group, mask, data, data_type, dir,
 				     file_name, &fsid, match_mask);
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index 2847fa564298..9c97ae638ac5 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -504,6 +504,12 @@  static inline __kernel_fsid_t *fanotify_mark_fsid(struct fsnotify_mark *mark)
 	return &FANOTIFY_MARK(mark)->fsid;
 }
 
+static inline bool fanotify_fsid_equal(__kernel_fsid_t *fsid1,
+				       __kernel_fsid_t *fsid2)
+{
+	return fsid1->val[0] == fsid2->val[0] && fsid1->val[1] == fsid2->val[1];
+}
+
 static inline unsigned int fanotify_mark_user_flags(struct fsnotify_mark *mark)
 {
 	unsigned int mflags = 0;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index e3d836d4d156..1cc68ea56c17 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -23,7 +23,7 @@ 
 
 #include <asm/ioctls.h>
 
-#include "../../mount.h"
+#include "../fsnotify.h"
 #include "../fdinfo.h"
 #include "fanotify.h"
 
@@ -1192,11 +1192,68 @@  static bool fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
 	return recalc;
 }
 
+struct fan_fsid {
+	struct super_block *sb;
+	__kernel_fsid_t id;
+	bool weak;
+};
+
+static int fanotify_check_mark_fsid(struct fsnotify_group *group,
+				    struct fsnotify_mark *mark,
+				    struct fan_fsid *fsid)
+{
+	struct fsnotify_mark_connector *conn;
+	struct fsnotify_mark *old;
+	struct super_block *old_sb = NULL;
+
+	*fanotify_mark_fsid(mark) = fsid->id;
+	mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID;
+	if (fsid->weak)
+		mark->flags |= FSNOTIFY_MARK_FLAG_WEAK_FSID;
+
+	/* First mark added will determine if group is single or multi fsid */
+	if (list_empty(&group->marks_list))
+		return 0;
+
+	/* Find sb of an existing mark */
+	list_for_each_entry(old, &group->marks_list, g_list) {
+		conn = READ_ONCE(old->connector);
+		if (!conn)
+			continue;
+		old_sb = fsnotify_connector_sb(conn);
+		if (old_sb)
+			break;
+	}
+
+	/* Only detached marks left? */
+	if (!old_sb)
+		return 0;
+
+	/* Do not allow mixing of marks with weak and strong fsid */
+	if ((mark->flags ^ old->flags) & FSNOTIFY_MARK_FLAG_WEAK_FSID)
+		return -EXDEV;
+
+	/* Allow mixing of marks with strong fsid from different fs */
+	if (!fsid->weak)
+		return 0;
+
+	/* Do not allow mixing marks with weak fsid from different fs */
+	if (old_sb != fsid->sb)
+		return -EXDEV;
+
+	/* Do not allow mixing marks from different btrfs sub-volumes */
+	if (!fanotify_fsid_equal(fanotify_mark_fsid(old),
+				 fanotify_mark_fsid(mark)))
+		return -EXDEV;
+
+	return 0;
+}
+
 static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
 						   fsnotify_connp_t *connp,
 						   unsigned int obj_type,
 						   unsigned int fan_flags,
-						   __kernel_fsid_t *fsid)
+						   struct fan_fsid *fsid)
 {
 	struct ucounts *ucounts = group->fanotify_data.ucounts;
 	struct fanotify_mark *fan_mark;
@@ -1225,8 +1282,9 @@  static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
 
 	/* Cache fsid of filesystem containing the marked object */
 	if (fsid) {
-		fan_mark->fsid = *fsid;
-		mark->flags |= FSNOTIFY_MARK_FLAG_HAS_FSID;
+		ret = fanotify_check_mark_fsid(group, mark, fsid);
+		if (ret)
+			goto out_dec_ucounts;
 	} else {
 		fan_mark->fsid.val[0] = fan_mark->fsid.val[1] = 0;
 	}
@@ -1289,7 +1347,7 @@  static int fanotify_may_update_existing_mark(struct fsnotify_mark *fsn_mark,
 static int fanotify_add_mark(struct fsnotify_group *group,
 			     fsnotify_connp_t *connp, unsigned int obj_type,
 			     __u32 mask, unsigned int fan_flags,
-			     __kernel_fsid_t *fsid)
+			     struct fan_fsid *fsid)
 {
 	struct fsnotify_mark *fsn_mark;
 	bool recalc;
@@ -1337,7 +1395,7 @@  static int fanotify_add_mark(struct fsnotify_group *group,
 
 static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 				      struct vfsmount *mnt, __u32 mask,
-				      unsigned int flags, __kernel_fsid_t *fsid)
+				      unsigned int flags, struct fan_fsid *fsid)
 {
 	return fanotify_add_mark(group, &real_mount(mnt)->mnt_fsnotify_marks,
 				 FSNOTIFY_OBJ_TYPE_VFSMOUNT, mask, flags, fsid);
@@ -1345,7 +1403,7 @@  static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 
 static int fanotify_add_sb_mark(struct fsnotify_group *group,
 				struct super_block *sb, __u32 mask,
-				unsigned int flags, __kernel_fsid_t *fsid)
+				unsigned int flags, struct fan_fsid *fsid)
 {
 	return fanotify_add_mark(group, &sb->s_fsnotify_marks,
 				 FSNOTIFY_OBJ_TYPE_SB, mask, flags, fsid);
@@ -1353,7 +1411,7 @@  static int fanotify_add_sb_mark(struct fsnotify_group *group,
 
 static int fanotify_add_inode_mark(struct fsnotify_group *group,
 				   struct inode *inode, __u32 mask,
-				   unsigned int flags, __kernel_fsid_t *fsid)
+				   unsigned int flags, struct fan_fsid *fsid)
 {
 	pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
 
@@ -1564,20 +1622,25 @@  SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	return fd;
 }
 
-static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
+static int fanotify_test_fsid(struct dentry *dentry, unsigned int flags,
+			      struct fan_fsid *fsid)
 {
+	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
 	__kernel_fsid_t root_fsid;
 	int err;
 
 	/*
 	 * Make sure dentry is not of a filesystem with zero fsid (e.g. fuse).
 	 */
-	err = vfs_get_fsid(dentry, fsid);
+	err = vfs_get_fsid(dentry, &fsid->id);
 	if (err)
 		return err;
 
-	if (!fsid->val[0] && !fsid->val[1])
-		return -ENODEV;
+	/* Allow weak fsid when marking inodes */
+	fsid->sb = dentry->d_sb;
+	fsid->weak = mark_type == FAN_MARK_INODE;
+	if (!fsid->id.val[0] && !fsid->id.val[1])
+		return fsid->weak ? 0 : -ENODEV;
 
 	/*
 	 * Make sure dentry is not of a filesystem subvolume (e.g. btrfs)
@@ -1587,10 +1650,10 @@  static int fanotify_test_fsid(struct dentry *dentry, __kernel_fsid_t *fsid)
 	if (err)
 		return err;
 
-	if (root_fsid.val[0] != fsid->val[0] ||
-	    root_fsid.val[1] != fsid->val[1])
-		return -EXDEV;
+	if (!fanotify_fsid_equal(&root_fsid, &fsid->id))
+		return fsid->weak ? 0 : -EXDEV;
 
+	fsid->weak = false;
 	return 0;
 }
 
@@ -1675,7 +1738,7 @@  static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	struct fsnotify_group *group;
 	struct fd f;
 	struct path path;
-	__kernel_fsid_t __fsid, *fsid = NULL;
+	struct fan_fsid __fsid, *fsid = NULL;
 	u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
 	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
 	unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
@@ -1827,7 +1890,7 @@  static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	}
 
 	if (fid_mode) {
-		ret = fanotify_test_fsid(path.dentry, &__fsid);
+		ret = fanotify_test_fsid(path.dentry, flags, &__fsid);
 		if (ret)
 			goto path_put_and_out;
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index a80b525ca653..7f63be5ca0f1 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -529,6 +529,7 @@  struct fsnotify_mark {
 #define FSNOTIFY_MARK_FLAG_NO_IREF		0x0200
 #define FSNOTIFY_MARK_FLAG_HAS_IGNORE_FLAGS	0x0400
 #define FSNOTIFY_MARK_FLAG_HAS_FSID		0x0800
+#define FSNOTIFY_MARK_FLAG_WEAK_FSID		0x1000
 	unsigned int flags;		/* flags [mark->lock] */
 };