diff mbox series

[07/10] fsnotify: lazy attach fsnotify_sb_info state to sb

Message ID 20240317184154.1200192-8-amir73il@gmail.com (mailing list archive)
State New
Headers show
Series Further reduce overhead of fsnotify permission hooks | expand

Commit Message

Amir Goldstein March 17, 2024, 6:41 p.m. UTC
Define a container struct fsnotify_sb_info to hold per-sb state,
including the reference to sb marks connector.

Allocate the fsnotify_sb_info state before attaching connector to any
object on the sb and free it only when killing sb.

This state is going to be used for storing per priority watched objects
counters.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/notify/fsnotify.c             | 16 +++++++++++++---
 fs/notify/fsnotify.h             |  9 ++++++++-
 fs/notify/mark.c                 | 32 +++++++++++++++++++++++++++++++-
 include/linux/fs.h               |  8 ++++----
 include/linux/fsnotify_backend.h | 17 +++++++++++++++++
 5 files changed, 73 insertions(+), 9 deletions(-)

Comments

Christian Brauner March 20, 2024, 8:47 a.m. UTC | #1
On Sun, Mar 17, 2024 at 08:41:51PM +0200, Amir Goldstein wrote:
> Define a container struct fsnotify_sb_info to hold per-sb state,
> including the reference to sb marks connector.
> 
> Allocate the fsnotify_sb_info state before attaching connector to any
> object on the sb and free it only when killing sb.
> 
> This state is going to be used for storing per priority watched objects
> counters.
> 
> Suggested-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> ---
>  fs/notify/fsnotify.c             | 16 +++++++++++++---
>  fs/notify/fsnotify.h             |  9 ++++++++-
>  fs/notify/mark.c                 | 32 +++++++++++++++++++++++++++++++-
>  include/linux/fs.h               |  8 ++++----
>  include/linux/fsnotify_backend.h | 17 +++++++++++++++++
>  5 files changed, 73 insertions(+), 9 deletions(-)
> 
> diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
> index 503e7c75e777..fb3f36bc6ea9 100644
> --- a/fs/notify/fsnotify.c
> +++ b/fs/notify/fsnotify.c
> @@ -89,11 +89,18 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
>  
>  void fsnotify_sb_delete(struct super_block *sb)
>  {
> +	struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
> +
> +	/* Were any marks ever added to any object on this sb? */
> +	if (!sbinfo)
> +		return;
> +
>  	fsnotify_unmount_inodes(sb);
>  	fsnotify_clear_marks_by_sb(sb);
>  	/* Wait for outstanding object references from connectors */
>  	wait_var_event(fsnotify_sb_watched_objects(sb),
>  		       !atomic_long_read(fsnotify_sb_watched_objects(sb)));
> +	kfree(sbinfo);
>  }
>  
>  /*
> @@ -489,6 +496,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
>  {
>  	const struct path *path = fsnotify_data_path(data, data_type);
>  	struct super_block *sb = fsnotify_data_sb(data, data_type);
> +	struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
>  	struct fsnotify_iter_info iter_info = {};
>  	struct mount *mnt = NULL;
>  	struct inode *inode2 = NULL;
> @@ -525,7 +533,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
>  	 * SRCU because we have no references to any objects and do not
>  	 * need SRCU to keep them "alive".
>  	 */
> -	if (!sb->s_fsnotify_marks &&
> +	if ((!sbinfo || !sbinfo->sb_marks) &&
>  	    (!mnt || !mnt->mnt_fsnotify_marks) &&
>  	    (!inode || !inode->i_fsnotify_marks) &&
>  	    (!inode2 || !inode2->i_fsnotify_marks))
> @@ -552,8 +560,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
>  
>  	iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
>  
> -	iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
> -		fsnotify_first_mark(&sb->s_fsnotify_marks);
> +	if (sbinfo) {
> +		iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
> +			fsnotify_first_mark(&sbinfo->sb_marks);
> +	}
>  	if (mnt) {
>  		iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] =
>  			fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
> diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
> index 8b73ad45cc71..378f9ec6d64b 100644
> --- a/fs/notify/fsnotify.h
> +++ b/fs/notify/fsnotify.h
> @@ -53,6 +53,13 @@ static inline struct super_block *fsnotify_connector_sb(
>  	return fsnotify_object_sb(conn->obj, conn->type);
>  }
>  
> +static inline fsnotify_connp_t *fsnotify_sb_marks(struct super_block *sb)
> +{
> +	struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
> +
> +	return sbinfo ? &sbinfo->sb_marks : NULL;
> +}
> +
>  /* destroy all events sitting in this groups notification queue */
>  extern void fsnotify_flush_notify(struct fsnotify_group *group);
>  
> @@ -78,7 +85,7 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
>  /* run the list of all marks associated with sb and destroy them */
>  static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
>  {
> -	fsnotify_destroy_marks(&sb->s_fsnotify_marks);
> +	fsnotify_destroy_marks(fsnotify_sb_marks(sb));
>  }
>  
>  /*
> diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> index 0b703f9e6344..db053e0e218d 100644
> --- a/fs/notify/mark.c
> +++ b/fs/notify/mark.c
> @@ -105,7 +105,7 @@ static fsnotify_connp_t *fsnotify_object_connp(void *obj, int obj_type)
>  	case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
>  		return &real_mount(obj)->mnt_fsnotify_marks;
>  	case FSNOTIFY_OBJ_TYPE_SB:
> -		return &((struct super_block *)obj)->s_fsnotify_marks;
> +		return fsnotify_sb_marks(obj);
>  	default:
>  		return NULL;
>  	}
> @@ -568,6 +568,26 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
>  	return -1;
>  }
>  
> +static int fsnotify_attach_info_to_sb(struct super_block *sb)
> +{
> +	struct fsnotify_sb_info *sbinfo;
> +
> +	/* sb info is freed on fsnotify_sb_delete() */
> +	sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
> +	if (!sbinfo)
> +		return -ENOMEM;
> +
> +	/*
> +	 * cmpxchg() provides the barrier so that callers of fsnotify_sb_info()
> +	 * will observe an initialized structure
> +	 */
> +	if (cmpxchg(&sb->s_fsnotify_info, NULL, sbinfo)) {
> +		/* Someone else created sbinfo for us */
> +		kfree(sbinfo);
> +	}

Alternatively, you could consider using wait_var_event() to let
concurrent attachers wait for s_fsnotify_info to be initialized using a
sentinel value to indicate that the caller should wait. But not sure if
it's worth it.
Amir Goldstein March 20, 2024, 9:37 a.m. UTC | #2
On Wed, Mar 20, 2024 at 10:47 AM Christian Brauner <brauner@kernel.org> wrote:
>
> On Sun, Mar 17, 2024 at 08:41:51PM +0200, Amir Goldstein wrote:
> > Define a container struct fsnotify_sb_info to hold per-sb state,
> > including the reference to sb marks connector.
> >
> > Allocate the fsnotify_sb_info state before attaching connector to any
> > object on the sb and free it only when killing sb.
> >
> > This state is going to be used for storing per priority watched objects
> > counters.
> >
> > Suggested-by: Jan Kara <jack@suse.cz>
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > ---
> >  fs/notify/fsnotify.c             | 16 +++++++++++++---
> >  fs/notify/fsnotify.h             |  9 ++++++++-
> >  fs/notify/mark.c                 | 32 +++++++++++++++++++++++++++++++-
> >  include/linux/fs.h               |  8 ++++----
> >  include/linux/fsnotify_backend.h | 17 +++++++++++++++++
> >  5 files changed, 73 insertions(+), 9 deletions(-)
> >
> > diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
> > index 503e7c75e777..fb3f36bc6ea9 100644
> > --- a/fs/notify/fsnotify.c
> > +++ b/fs/notify/fsnotify.c
> > @@ -89,11 +89,18 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
> >
> >  void fsnotify_sb_delete(struct super_block *sb)
> >  {
> > +     struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
> > +
> > +     /* Were any marks ever added to any object on this sb? */
> > +     if (!sbinfo)
> > +             return;
> > +
> >       fsnotify_unmount_inodes(sb);
> >       fsnotify_clear_marks_by_sb(sb);
> >       /* Wait for outstanding object references from connectors */
> >       wait_var_event(fsnotify_sb_watched_objects(sb),
> >                      !atomic_long_read(fsnotify_sb_watched_objects(sb)));
> > +     kfree(sbinfo);
> >  }
> >
> >  /*
> > @@ -489,6 +496,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
> >  {
> >       const struct path *path = fsnotify_data_path(data, data_type);
> >       struct super_block *sb = fsnotify_data_sb(data, data_type);
> > +     struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
> >       struct fsnotify_iter_info iter_info = {};
> >       struct mount *mnt = NULL;
> >       struct inode *inode2 = NULL;
> > @@ -525,7 +533,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
> >        * SRCU because we have no references to any objects and do not
> >        * need SRCU to keep them "alive".
> >        */
> > -     if (!sb->s_fsnotify_marks &&
> > +     if ((!sbinfo || !sbinfo->sb_marks) &&
> >           (!mnt || !mnt->mnt_fsnotify_marks) &&
> >           (!inode || !inode->i_fsnotify_marks) &&
> >           (!inode2 || !inode2->i_fsnotify_marks))
> > @@ -552,8 +560,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
> >
> >       iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
> >
> > -     iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
> > -             fsnotify_first_mark(&sb->s_fsnotify_marks);
> > +     if (sbinfo) {
> > +             iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
> > +                     fsnotify_first_mark(&sbinfo->sb_marks);
> > +     }
> >       if (mnt) {
> >               iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] =
> >                       fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
> > diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
> > index 8b73ad45cc71..378f9ec6d64b 100644
> > --- a/fs/notify/fsnotify.h
> > +++ b/fs/notify/fsnotify.h
> > @@ -53,6 +53,13 @@ static inline struct super_block *fsnotify_connector_sb(
> >       return fsnotify_object_sb(conn->obj, conn->type);
> >  }
> >
> > +static inline fsnotify_connp_t *fsnotify_sb_marks(struct super_block *sb)
> > +{
> > +     struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
> > +
> > +     return sbinfo ? &sbinfo->sb_marks : NULL;
> > +}
> > +
> >  /* destroy all events sitting in this groups notification queue */
> >  extern void fsnotify_flush_notify(struct fsnotify_group *group);
> >
> > @@ -78,7 +85,7 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
> >  /* run the list of all marks associated with sb and destroy them */
> >  static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
> >  {
> > -     fsnotify_destroy_marks(&sb->s_fsnotify_marks);
> > +     fsnotify_destroy_marks(fsnotify_sb_marks(sb));
> >  }
> >
> >  /*
> > diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> > index 0b703f9e6344..db053e0e218d 100644
> > --- a/fs/notify/mark.c
> > +++ b/fs/notify/mark.c
> > @@ -105,7 +105,7 @@ static fsnotify_connp_t *fsnotify_object_connp(void *obj, int obj_type)
> >       case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
> >               return &real_mount(obj)->mnt_fsnotify_marks;
> >       case FSNOTIFY_OBJ_TYPE_SB:
> > -             return &((struct super_block *)obj)->s_fsnotify_marks;
> > +             return fsnotify_sb_marks(obj);
> >       default:
> >               return NULL;
> >       }
> > @@ -568,6 +568,26 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
> >       return -1;
> >  }
> >
> > +static int fsnotify_attach_info_to_sb(struct super_block *sb)
> > +{
> > +     struct fsnotify_sb_info *sbinfo;
> > +
> > +     /* sb info is freed on fsnotify_sb_delete() */
> > +     sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
> > +     if (!sbinfo)
> > +             return -ENOMEM;
> > +
> > +     /*
> > +      * cmpxchg() provides the barrier so that callers of fsnotify_sb_info()
> > +      * will observe an initialized structure
> > +      */
> > +     if (cmpxchg(&sb->s_fsnotify_info, NULL, sbinfo)) {
> > +             /* Someone else created sbinfo for us */
> > +             kfree(sbinfo);
> > +     }
>
> Alternatively, you could consider using wait_var_event() to let
> concurrent attachers wait for s_fsnotify_info to be initialized using a
> sentinel value to indicate that the caller should wait. But not sure if
> it's worth it.

Not worth it IMO. Adding watches is an extremely rare event
in the grand picture.

Thanks,
Amir.
Jan Kara March 20, 2024, 9:51 a.m. UTC | #3
On Wed 20-03-24 11:37:57, Amir Goldstein wrote:
> On Wed, Mar 20, 2024 at 10:47 AM Christian Brauner <brauner@kernel.org> wrote:
> >
> > On Sun, Mar 17, 2024 at 08:41:51PM +0200, Amir Goldstein wrote:
> > > Define a container struct fsnotify_sb_info to hold per-sb state,
> > > including the reference to sb marks connector.
> > >
> > > Allocate the fsnotify_sb_info state before attaching connector to any
> > > object on the sb and free it only when killing sb.
> > >
> > > This state is going to be used for storing per priority watched objects
> > > counters.
> > >
> > > Suggested-by: Jan Kara <jack@suse.cz>
> > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > ---
> > >  fs/notify/fsnotify.c             | 16 +++++++++++++---
> > >  fs/notify/fsnotify.h             |  9 ++++++++-
> > >  fs/notify/mark.c                 | 32 +++++++++++++++++++++++++++++++-
> > >  include/linux/fs.h               |  8 ++++----
> > >  include/linux/fsnotify_backend.h | 17 +++++++++++++++++
> > >  5 files changed, 73 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
> > > index 503e7c75e777..fb3f36bc6ea9 100644
> > > --- a/fs/notify/fsnotify.c
> > > +++ b/fs/notify/fsnotify.c
> > > @@ -89,11 +89,18 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
> > >
> > >  void fsnotify_sb_delete(struct super_block *sb)
> > >  {
> > > +     struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
> > > +
> > > +     /* Were any marks ever added to any object on this sb? */
> > > +     if (!sbinfo)
> > > +             return;
> > > +
> > >       fsnotify_unmount_inodes(sb);
> > >       fsnotify_clear_marks_by_sb(sb);
> > >       /* Wait for outstanding object references from connectors */
> > >       wait_var_event(fsnotify_sb_watched_objects(sb),
> > >                      !atomic_long_read(fsnotify_sb_watched_objects(sb)));
> > > +     kfree(sbinfo);
> > >  }
> > >
> > >  /*
> > > @@ -489,6 +496,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
> > >  {
> > >       const struct path *path = fsnotify_data_path(data, data_type);
> > >       struct super_block *sb = fsnotify_data_sb(data, data_type);
> > > +     struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
> > >       struct fsnotify_iter_info iter_info = {};
> > >       struct mount *mnt = NULL;
> > >       struct inode *inode2 = NULL;
> > > @@ -525,7 +533,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
> > >        * SRCU because we have no references to any objects and do not
> > >        * need SRCU to keep them "alive".
> > >        */
> > > -     if (!sb->s_fsnotify_marks &&
> > > +     if ((!sbinfo || !sbinfo->sb_marks) &&
> > >           (!mnt || !mnt->mnt_fsnotify_marks) &&
> > >           (!inode || !inode->i_fsnotify_marks) &&
> > >           (!inode2 || !inode2->i_fsnotify_marks))
> > > @@ -552,8 +560,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
> > >
> > >       iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
> > >
> > > -     iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
> > > -             fsnotify_first_mark(&sb->s_fsnotify_marks);
> > > +     if (sbinfo) {
> > > +             iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
> > > +                     fsnotify_first_mark(&sbinfo->sb_marks);
> > > +     }
> > >       if (mnt) {
> > >               iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] =
> > >                       fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
> > > diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
> > > index 8b73ad45cc71..378f9ec6d64b 100644
> > > --- a/fs/notify/fsnotify.h
> > > +++ b/fs/notify/fsnotify.h
> > > @@ -53,6 +53,13 @@ static inline struct super_block *fsnotify_connector_sb(
> > >       return fsnotify_object_sb(conn->obj, conn->type);
> > >  }
> > >
> > > +static inline fsnotify_connp_t *fsnotify_sb_marks(struct super_block *sb)
> > > +{
> > > +     struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
> > > +
> > > +     return sbinfo ? &sbinfo->sb_marks : NULL;
> > > +}
> > > +
> > >  /* destroy all events sitting in this groups notification queue */
> > >  extern void fsnotify_flush_notify(struct fsnotify_group *group);
> > >
> > > @@ -78,7 +85,7 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
> > >  /* run the list of all marks associated with sb and destroy them */
> > >  static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
> > >  {
> > > -     fsnotify_destroy_marks(&sb->s_fsnotify_marks);
> > > +     fsnotify_destroy_marks(fsnotify_sb_marks(sb));
> > >  }
> > >
> > >  /*
> > > diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> > > index 0b703f9e6344..db053e0e218d 100644
> > > --- a/fs/notify/mark.c
> > > +++ b/fs/notify/mark.c
> > > @@ -105,7 +105,7 @@ static fsnotify_connp_t *fsnotify_object_connp(void *obj, int obj_type)
> > >       case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
> > >               return &real_mount(obj)->mnt_fsnotify_marks;
> > >       case FSNOTIFY_OBJ_TYPE_SB:
> > > -             return &((struct super_block *)obj)->s_fsnotify_marks;
> > > +             return fsnotify_sb_marks(obj);
> > >       default:
> > >               return NULL;
> > >       }
> > > @@ -568,6 +568,26 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
> > >       return -1;
> > >  }
> > >
> > > +static int fsnotify_attach_info_to_sb(struct super_block *sb)
> > > +{
> > > +     struct fsnotify_sb_info *sbinfo;
> > > +
> > > +     /* sb info is freed on fsnotify_sb_delete() */
> > > +     sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
> > > +     if (!sbinfo)
> > > +             return -ENOMEM;
> > > +
> > > +     /*
> > > +      * cmpxchg() provides the barrier so that callers of fsnotify_sb_info()
> > > +      * will observe an initialized structure
> > > +      */
> > > +     if (cmpxchg(&sb->s_fsnotify_info, NULL, sbinfo)) {
> > > +             /* Someone else created sbinfo for us */
> > > +             kfree(sbinfo);
> > > +     }
> >
> > Alternatively, you could consider using wait_var_event() to let
> > concurrent attachers wait for s_fsnotify_info to be initialized using a
> > sentinel value to indicate that the caller should wait. But not sure if
> > it's worth it.
> 
> Not worth it IMO. Adding watches is an extremely rare event
> in the grand picture.

Agreed. The cmpxchg() scheme has generally proven to be good enough in
similar situations and simple enough to understand...

								Honza
Christian Brauner March 20, 2024, 10:13 a.m. UTC | #4
On Wed, Mar 20, 2024 at 10:51:34AM +0100, Jan Kara wrote:
> On Wed 20-03-24 11:37:57, Amir Goldstein wrote:
> > On Wed, Mar 20, 2024 at 10:47 AM Christian Brauner <brauner@kernel.org> wrote:
> > >
> > > On Sun, Mar 17, 2024 at 08:41:51PM +0200, Amir Goldstein wrote:
> > > > Define a container struct fsnotify_sb_info to hold per-sb state,
> > > > including the reference to sb marks connector.
> > > >
> > > > Allocate the fsnotify_sb_info state before attaching connector to any
> > > > object on the sb and free it only when killing sb.
> > > >
> > > > This state is going to be used for storing per priority watched objects
> > > > counters.
> > > >
> > > > Suggested-by: Jan Kara <jack@suse.cz>
> > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > > > ---
> > > >  fs/notify/fsnotify.c             | 16 +++++++++++++---
> > > >  fs/notify/fsnotify.h             |  9 ++++++++-
> > > >  fs/notify/mark.c                 | 32 +++++++++++++++++++++++++++++++-
> > > >  include/linux/fs.h               |  8 ++++----
> > > >  include/linux/fsnotify_backend.h | 17 +++++++++++++++++
> > > >  5 files changed, 73 insertions(+), 9 deletions(-)
> > > >
> > > > diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
> > > > index 503e7c75e777..fb3f36bc6ea9 100644
> > > > --- a/fs/notify/fsnotify.c
> > > > +++ b/fs/notify/fsnotify.c
> > > > @@ -89,11 +89,18 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
> > > >
> > > >  void fsnotify_sb_delete(struct super_block *sb)
> > > >  {
> > > > +     struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
> > > > +
> > > > +     /* Were any marks ever added to any object on this sb? */
> > > > +     if (!sbinfo)
> > > > +             return;
> > > > +
> > > >       fsnotify_unmount_inodes(sb);
> > > >       fsnotify_clear_marks_by_sb(sb);
> > > >       /* Wait for outstanding object references from connectors */
> > > >       wait_var_event(fsnotify_sb_watched_objects(sb),
> > > >                      !atomic_long_read(fsnotify_sb_watched_objects(sb)));
> > > > +     kfree(sbinfo);
> > > >  }
> > > >
> > > >  /*
> > > > @@ -489,6 +496,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
> > > >  {
> > > >       const struct path *path = fsnotify_data_path(data, data_type);
> > > >       struct super_block *sb = fsnotify_data_sb(data, data_type);
> > > > +     struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
> > > >       struct fsnotify_iter_info iter_info = {};
> > > >       struct mount *mnt = NULL;
> > > >       struct inode *inode2 = NULL;
> > > > @@ -525,7 +533,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
> > > >        * SRCU because we have no references to any objects and do not
> > > >        * need SRCU to keep them "alive".
> > > >        */
> > > > -     if (!sb->s_fsnotify_marks &&
> > > > +     if ((!sbinfo || !sbinfo->sb_marks) &&
> > > >           (!mnt || !mnt->mnt_fsnotify_marks) &&
> > > >           (!inode || !inode->i_fsnotify_marks) &&
> > > >           (!inode2 || !inode2->i_fsnotify_marks))
> > > > @@ -552,8 +560,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
> > > >
> > > >       iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
> > > >
> > > > -     iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
> > > > -             fsnotify_first_mark(&sb->s_fsnotify_marks);
> > > > +     if (sbinfo) {
> > > > +             iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
> > > > +                     fsnotify_first_mark(&sbinfo->sb_marks);
> > > > +     }
> > > >       if (mnt) {
> > > >               iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] =
> > > >                       fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
> > > > diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
> > > > index 8b73ad45cc71..378f9ec6d64b 100644
> > > > --- a/fs/notify/fsnotify.h
> > > > +++ b/fs/notify/fsnotify.h
> > > > @@ -53,6 +53,13 @@ static inline struct super_block *fsnotify_connector_sb(
> > > >       return fsnotify_object_sb(conn->obj, conn->type);
> > > >  }
> > > >
> > > > +static inline fsnotify_connp_t *fsnotify_sb_marks(struct super_block *sb)
> > > > +{
> > > > +     struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
> > > > +
> > > > +     return sbinfo ? &sbinfo->sb_marks : NULL;
> > > > +}
> > > > +
> > > >  /* destroy all events sitting in this groups notification queue */
> > > >  extern void fsnotify_flush_notify(struct fsnotify_group *group);
> > > >
> > > > @@ -78,7 +85,7 @@ static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
> > > >  /* run the list of all marks associated with sb and destroy them */
> > > >  static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
> > > >  {
> > > > -     fsnotify_destroy_marks(&sb->s_fsnotify_marks);
> > > > +     fsnotify_destroy_marks(fsnotify_sb_marks(sb));
> > > >  }
> > > >
> > > >  /*
> > > > diff --git a/fs/notify/mark.c b/fs/notify/mark.c
> > > > index 0b703f9e6344..db053e0e218d 100644
> > > > --- a/fs/notify/mark.c
> > > > +++ b/fs/notify/mark.c
> > > > @@ -105,7 +105,7 @@ static fsnotify_connp_t *fsnotify_object_connp(void *obj, int obj_type)
> > > >       case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
> > > >               return &real_mount(obj)->mnt_fsnotify_marks;
> > > >       case FSNOTIFY_OBJ_TYPE_SB:
> > > > -             return &((struct super_block *)obj)->s_fsnotify_marks;
> > > > +             return fsnotify_sb_marks(obj);
> > > >       default:
> > > >               return NULL;
> > > >       }
> > > > @@ -568,6 +568,26 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
> > > >       return -1;
> > > >  }
> > > >
> > > > +static int fsnotify_attach_info_to_sb(struct super_block *sb)
> > > > +{
> > > > +     struct fsnotify_sb_info *sbinfo;
> > > > +
> > > > +     /* sb info is freed on fsnotify_sb_delete() */
> > > > +     sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
> > > > +     if (!sbinfo)
> > > > +             return -ENOMEM;
> > > > +
> > > > +     /*
> > > > +      * cmpxchg() provides the barrier so that callers of fsnotify_sb_info()
> > > > +      * will observe an initialized structure
> > > > +      */
> > > > +     if (cmpxchg(&sb->s_fsnotify_info, NULL, sbinfo)) {
> > > > +             /* Someone else created sbinfo for us */
> > > > +             kfree(sbinfo);
> > > > +     }
> > >
> > > Alternatively, you could consider using wait_var_event() to let
> > > concurrent attachers wait for s_fsnotify_info to be initialized using a
> > > sentinel value to indicate that the caller should wait. But not sure if
> > > it's worth it.
> > 
> > Not worth it IMO. Adding watches is an extremely rare event
> > in the grand picture.
> 
> Agreed. The cmpxchg() scheme has generally proven to be good enough in
> similar situations and simple enough to understand...

Thanks, sounds good to me.
diff mbox series

Patch

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 503e7c75e777..fb3f36bc6ea9 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -89,11 +89,18 @@  static void fsnotify_unmount_inodes(struct super_block *sb)
 
 void fsnotify_sb_delete(struct super_block *sb)
 {
+	struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
+
+	/* Were any marks ever added to any object on this sb? */
+	if (!sbinfo)
+		return;
+
 	fsnotify_unmount_inodes(sb);
 	fsnotify_clear_marks_by_sb(sb);
 	/* Wait for outstanding object references from connectors */
 	wait_var_event(fsnotify_sb_watched_objects(sb),
 		       !atomic_long_read(fsnotify_sb_watched_objects(sb)));
+	kfree(sbinfo);
 }
 
 /*
@@ -489,6 +496,7 @@  int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 {
 	const struct path *path = fsnotify_data_path(data, data_type);
 	struct super_block *sb = fsnotify_data_sb(data, data_type);
+	struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
 	struct fsnotify_iter_info iter_info = {};
 	struct mount *mnt = NULL;
 	struct inode *inode2 = NULL;
@@ -525,7 +533,7 @@  int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 	 * SRCU because we have no references to any objects and do not
 	 * need SRCU to keep them "alive".
 	 */
-	if (!sb->s_fsnotify_marks &&
+	if ((!sbinfo || !sbinfo->sb_marks) &&
 	    (!mnt || !mnt->mnt_fsnotify_marks) &&
 	    (!inode || !inode->i_fsnotify_marks) &&
 	    (!inode2 || !inode2->i_fsnotify_marks))
@@ -552,8 +560,10 @@  int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 
 	iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
 
-	iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
-		fsnotify_first_mark(&sb->s_fsnotify_marks);
+	if (sbinfo) {
+		iter_info.marks[FSNOTIFY_ITER_TYPE_SB] =
+			fsnotify_first_mark(&sbinfo->sb_marks);
+	}
 	if (mnt) {
 		iter_info.marks[FSNOTIFY_ITER_TYPE_VFSMOUNT] =
 			fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 8b73ad45cc71..378f9ec6d64b 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -53,6 +53,13 @@  static inline struct super_block *fsnotify_connector_sb(
 	return fsnotify_object_sb(conn->obj, conn->type);
 }
 
+static inline fsnotify_connp_t *fsnotify_sb_marks(struct super_block *sb)
+{
+	struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb);
+
+	return sbinfo ? &sbinfo->sb_marks : NULL;
+}
+
 /* destroy all events sitting in this groups notification queue */
 extern void fsnotify_flush_notify(struct fsnotify_group *group);
 
@@ -78,7 +85,7 @@  static inline void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
 /* run the list of all marks associated with sb and destroy them */
 static inline void fsnotify_clear_marks_by_sb(struct super_block *sb)
 {
-	fsnotify_destroy_marks(&sb->s_fsnotify_marks);
+	fsnotify_destroy_marks(fsnotify_sb_marks(sb));
 }
 
 /*
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 0b703f9e6344..db053e0e218d 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -105,7 +105,7 @@  static fsnotify_connp_t *fsnotify_object_connp(void *obj, int obj_type)
 	case FSNOTIFY_OBJ_TYPE_VFSMOUNT:
 		return &real_mount(obj)->mnt_fsnotify_marks;
 	case FSNOTIFY_OBJ_TYPE_SB:
-		return &((struct super_block *)obj)->s_fsnotify_marks;
+		return fsnotify_sb_marks(obj);
 	default:
 		return NULL;
 	}
@@ -568,6 +568,26 @@  int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
 	return -1;
 }
 
+static int fsnotify_attach_info_to_sb(struct super_block *sb)
+{
+	struct fsnotify_sb_info *sbinfo;
+
+	/* sb info is freed on fsnotify_sb_delete() */
+	sbinfo = kzalloc(sizeof(*sbinfo), GFP_KERNEL);
+	if (!sbinfo)
+		return -ENOMEM;
+
+	/*
+	 * cmpxchg() provides the barrier so that callers of fsnotify_sb_info()
+	 * will observe an initialized structure
+	 */
+	if (cmpxchg(&sb->s_fsnotify_info, NULL, sbinfo)) {
+		/* Someone else created sbinfo for us */
+		kfree(sbinfo);
+	}
+	return 0;
+}
+
 static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
 					       void *obj, unsigned int obj_type)
 {
@@ -639,6 +659,16 @@  static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj,
 	if (WARN_ON(!fsnotify_valid_obj_type(obj_type)))
 		return -EINVAL;
 
+	/*
+	 * Attach the sb info before attaching a connector to any object on sb.
+	 * The sb info will remain attached as long as sb lives.
+	 */
+	if (!fsnotify_sb_info(sb)) {
+		err = fsnotify_attach_info_to_sb(sb);
+		if (err)
+			return err;
+	}
+
 	connp = fsnotify_object_connp(obj, obj_type);
 restart:
 	spin_lock(&mark->lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 00fc429b0af0..7f40b592f711 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -73,6 +73,8 @@  struct fscrypt_inode_info;
 struct fscrypt_operations;
 struct fsverity_info;
 struct fsverity_operations;
+struct fsnotify_mark_connector;
+struct fsnotify_sb_info;
 struct fs_context;
 struct fs_parameter_spec;
 struct fileattr;
@@ -620,8 +622,6 @@  is_uncached_acl(struct posix_acl *acl)
 #define IOP_XATTR	0x0008
 #define IOP_DEFAULT_READLINK	0x0010
 
-struct fsnotify_mark_connector;
-
 /*
  * Keep mostly read-only and often accessed (especially for
  * the RCU path lookup and 'stat' data) fields at the beginning
@@ -1249,7 +1249,7 @@  struct super_block {
 
 	/*
 	 * Keep s_fs_info, s_time_gran, s_fsnotify_mask, and
-	 * s_fsnotify_marks together for cache efficiency. They are frequently
+	 * s_fsnotify_info together for cache efficiency. They are frequently
 	 * accessed and rarely modified.
 	 */
 	void			*s_fs_info;	/* Filesystem private info */
@@ -1261,7 +1261,7 @@  struct super_block {
 	time64_t		   s_time_max;
 #ifdef CONFIG_FSNOTIFY
 	__u32			s_fsnotify_mask;
-	struct fsnotify_mark_connector __rcu	*s_fsnotify_marks;
+	struct fsnotify_sb_info	*s_fsnotify_info;
 #endif
 
 	/*
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 83004d9e07a3..c9f2b2f6b493 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -477,6 +477,23 @@  struct fsnotify_mark_connector {
 	struct hlist_head list;
 };
 
+/*
+ * Container for per-sb fsnotify state (sb marks and more).
+ * Attached lazily on first marked object on the sb and freed when killing sb.
+ */
+struct fsnotify_sb_info {
+	struct fsnotify_mark_connector __rcu *sb_marks;
+};
+
+static inline struct fsnotify_sb_info *fsnotify_sb_info(struct super_block *sb)
+{
+#ifdef CONFIG_FSNOTIFY
+	return READ_ONCE(sb->s_fsnotify_info);
+#else
+	return NULL;
+#endif
+}
+
 static inline atomic_long_t *fsnotify_sb_watched_objects(struct super_block *sb)
 {
 	return &sb->s_fsnotify_connectors;