diff mbox series

[v2] VFS: generate FS_CREATE before FS_OPEN when ->atomic_open used.

Message ID 171817619547.14261.975798725161704336@noble.neil.brown.name (mailing list archive)
State New
Headers show
Series [v2] VFS: generate FS_CREATE before FS_OPEN when ->atomic_open used. | expand

Commit Message

NeilBrown June 12, 2024, 7:09 a.m. UTC
When a file is opened and created with open(..., O_CREAT) we get
both the CREATE and OPEN fsnotify events and would expect them in that
order.   For most filesystems we get them in that order because
open_last_lookups() calls fsnofify_create() and then do_open() (from
path_openat()) calls vfs_open()->do_dentry_open() which calls
fsnotify_open().

However when ->atomic_open is used, the
   do_dentry_open() -> fsnotify_open()
call happens from finish_open() which is called from the ->atomic_open
handler in lookup_open() which is called *before* open_last_lookups()
calls fsnotify_create.  So we get the "open" notification before
"create" - which is backwards.  ltp testcase inotify02 tests this and
reports the inconsistency.

This patch lifts the fsnotify_open() call out of do_dentry_open() and
places it higher up the call stack.  There are three callers of
do_dentry_open().

For vfs_open() and kernel_file_open() the fsnotify_open() is placed
directly in that caller so there should be no behavioural change.

For finish_open() there are two cases:
 - finish_open is used in ->atomic_open handlers.  For these we add a
   call to fsnotify_open() at the top of do_open() if FMODE_OPENED is
   set - which means do_dentry_open() has been called.
 - finish_open is used in ->tmpfile() handlers.  For these a similar
   call to fsnotify_open() is added to vfs_tmpfile()

With this patch NFSv3 is restored to its previous behaviour (before
->atomic_open support was added) of generating CREATE notifications
before OPEN, and NFSv4 now has that same correct ordering that is has
not had before.  I haven't tested other filesystems.

Fixes: 7c6c5249f061 ("NFS: add atomic_open for NFSv3 to handle O_TRUNC correctly.")
Reported-by: James Clark <james.clark@arm.com>
Closes: https://lore.kernel.org/all/01c3bf2e-eb1f-4b7f-a54f-d2a05dd3d8c8@arm.com
Signed-off-by: NeilBrown <neilb@suse.de>
---
 fs/namei.c |  5 +++++
 fs/open.c  | 19 ++++++++++++-------
 2 files changed, 17 insertions(+), 7 deletions(-)

Comments

James Clark June 12, 2024, 10:27 a.m. UTC | #1
On 12/06/2024 08:09, NeilBrown wrote:
> 
> When a file is opened and created with open(..., O_CREAT) we get
> both the CREATE and OPEN fsnotify events and would expect them in that
> order.   For most filesystems we get them in that order because
> open_last_lookups() calls fsnofify_create() and then do_open() (from
> path_openat()) calls vfs_open()->do_dentry_open() which calls
> fsnotify_open().
> 
> However when ->atomic_open is used, the
>    do_dentry_open() -> fsnotify_open()
> call happens from finish_open() which is called from the ->atomic_open
> handler in lookup_open() which is called *before* open_last_lookups()
> calls fsnotify_create.  So we get the "open" notification before
> "create" - which is backwards.  ltp testcase inotify02 tests this and
> reports the inconsistency.
> 
> This patch lifts the fsnotify_open() call out of do_dentry_open() and
> places it higher up the call stack.  There are three callers of
> do_dentry_open().
> 
> For vfs_open() and kernel_file_open() the fsnotify_open() is placed
> directly in that caller so there should be no behavioural change.
> 
> For finish_open() there are two cases:
>  - finish_open is used in ->atomic_open handlers.  For these we add a
>    call to fsnotify_open() at the top of do_open() if FMODE_OPENED is
>    set - which means do_dentry_open() has been called.
>  - finish_open is used in ->tmpfile() handlers.  For these a similar
>    call to fsnotify_open() is added to vfs_tmpfile()
> 
> With this patch NFSv3 is restored to its previous behaviour (before
> ->atomic_open support was added) of generating CREATE notifications
> before OPEN, and NFSv4 now has that same correct ordering that is has
> not had before.  I haven't tested other filesystems.
> 
> Fixes: 7c6c5249f061 ("NFS: add atomic_open for NFSv3 to handle O_TRUNC correctly.")
> Reported-by: James Clark <james.clark@arm.com>
> Closes: https://lore.kernel.org/all/01c3bf2e-eb1f-4b7f-a54f-d2a05dd3d8c8@arm.com
> Signed-off-by: NeilBrown <neilb@suse.de>

That's passing for me now on NFSv3:

Tested-by: James Clark <james.clark@arm.com>

> ---
>  fs/namei.c |  5 +++++
>  fs/open.c  | 19 ++++++++++++-------
>  2 files changed, 17 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/namei.c b/fs/namei.c
> index 37fb0a8aa09a..057afacc4b60 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -3612,6 +3612,9 @@ static int do_open(struct nameidata *nd,
>  	int acc_mode;
>  	int error;
>  
> +	if (file->f_mode & FMODE_OPENED)
> +		fsnotify_open(file);
> +
>  	if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
>  		error = complete_walk(nd);
>  		if (error)
> @@ -3700,6 +3703,8 @@ int vfs_tmpfile(struct mnt_idmap *idmap,
>  	mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
>  	error = dir->i_op->tmpfile(idmap, dir, file, mode);
>  	dput(child);
> +	if (file->f_mode & FMODE_OPENED)
> +		fsnotify_open(file);
>  	if (error)
>  		return error;
>  	/* Don't check for other permissions, the inode was just created */
> diff --git a/fs/open.c b/fs/open.c
> index 89cafb572061..970f299c0e77 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -1004,11 +1004,6 @@ static int do_dentry_open(struct file *f,
>  		}
>  	}
>  
> -	/*
> -	 * Once we return a file with FMODE_OPENED, __fput() will call
> -	 * fsnotify_close(), so we need fsnotify_open() here for symmetry.
> -	 */
> -	fsnotify_open(f);
>  	return 0;
>  
>  cleanup_all:
> @@ -1085,8 +1080,17 @@ EXPORT_SYMBOL(file_path);
>   */
>  int vfs_open(const struct path *path, struct file *file)
>  {
> +	int ret;
> +
>  	file->f_path = *path;
> -	return do_dentry_open(file, NULL);
> +	ret = do_dentry_open(file, NULL);
> +	if (!ret)
> +		/*
> +		 * Once we return a file with FMODE_OPENED, __fput() will call
> +		 * fsnotify_close(), so we need fsnotify_open() here for symmetry.
> +		 */
> +		fsnotify_open(file);
> +	return ret;
>  }
>  
>  struct file *dentry_open(const struct path *path, int flags,
> @@ -1178,7 +1182,8 @@ struct file *kernel_file_open(const struct path *path, int flags,
>  	if (error) {
>  		fput(f);
>  		f = ERR_PTR(error);
> -	}
> +	} else
> +		fsnotify_open(f);
>  	return f;
>  }
>  EXPORT_SYMBOL_GPL(kernel_file_open);
Amir Goldstein June 12, 2024, 10:34 a.m. UTC | #2
On Wed, Jun 12, 2024 at 10:10 AM NeilBrown <neilb@suse.de> wrote:
>
>
> When a file is opened and created with open(..., O_CREAT) we get
> both the CREATE and OPEN fsnotify events and would expect them in that
> order.   For most filesystems we get them in that order because
> open_last_lookups() calls fsnofify_create() and then do_open() (from
> path_openat()) calls vfs_open()->do_dentry_open() which calls
> fsnotify_open().
>
> However when ->atomic_open is used, the
>    do_dentry_open() -> fsnotify_open()
> call happens from finish_open() which is called from the ->atomic_open
> handler in lookup_open() which is called *before* open_last_lookups()
> calls fsnotify_create.  So we get the "open" notification before
> "create" - which is backwards.  ltp testcase inotify02 tests this and
> reports the inconsistency.
>
> This patch lifts the fsnotify_open() call out of do_dentry_open() and
> places it higher up the call stack.  There are three callers of
> do_dentry_open().
>
> For vfs_open() and kernel_file_open() the fsnotify_open() is placed
> directly in that caller so there should be no behavioural change.
>
> For finish_open() there are two cases:
>  - finish_open is used in ->atomic_open handlers.  For these we add a
>    call to fsnotify_open() at the top of do_open() if FMODE_OPENED is
>    set - which means do_dentry_open() has been called.
>  - finish_open is used in ->tmpfile() handlers.  For these a similar
>    call to fsnotify_open() is added to vfs_tmpfile()

Any handlers other than ovl_tmpfile()?
This one is a very recent and pretty special case.
Did open(O_TMPFILE) used to emit an OPEN event before that change?

>
> With this patch NFSv3 is restored to its previous behaviour (before
> ->atomic_open support was added) of generating CREATE notifications
> before OPEN, and NFSv4 now has that same correct ordering that is has
> not had before.  I haven't tested other filesystems.
>
> Fixes: 7c6c5249f061 ("NFS: add atomic_open for NFSv3 to handle O_TRUNC correctly.")
> Reported-by: James Clark <james.clark@arm.com>
> Closes: https://lore.kernel.org/all/01c3bf2e-eb1f-4b7f-a54f-d2a05dd3d8c8@arm.com
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/namei.c |  5 +++++
>  fs/open.c  | 19 ++++++++++++-------
>  2 files changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/fs/namei.c b/fs/namei.c
> index 37fb0a8aa09a..057afacc4b60 100644
> --- a/fs/namei.c
> +++ b/fs/namei.c
> @@ -3612,6 +3612,9 @@ static int do_open(struct nameidata *nd,
>         int acc_mode;
>         int error;
>
> +       if (file->f_mode & FMODE_OPENED)
> +               fsnotify_open(file);
> +
>         if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
>                 error = complete_walk(nd);
>                 if (error)
> @@ -3700,6 +3703,8 @@ int vfs_tmpfile(struct mnt_idmap *idmap,
>         mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
>         error = dir->i_op->tmpfile(idmap, dir, file, mode);
>         dput(child);
> +       if (file->f_mode & FMODE_OPENED)
> +               fsnotify_open(file);
>         if (error)
>                 return error;
>         /* Don't check for other permissions, the inode was just created */
> diff --git a/fs/open.c b/fs/open.c
> index 89cafb572061..970f299c0e77 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -1004,11 +1004,6 @@ static int do_dentry_open(struct file *f,
>                 }
>         }
>
> -       /*
> -        * Once we return a file with FMODE_OPENED, __fput() will call
> -        * fsnotify_close(), so we need fsnotify_open() here for symmetry.
> -        */
> -       fsnotify_open(f);
>         return 0;
>
>  cleanup_all:
> @@ -1085,8 +1080,17 @@ EXPORT_SYMBOL(file_path);
>   */
>  int vfs_open(const struct path *path, struct file *file)
>  {
> +       int ret;
> +
>         file->f_path = *path;
> -       return do_dentry_open(file, NULL);
> +       ret = do_dentry_open(file, NULL);
> +       if (!ret)
> +               /*
> +                * Once we return a file with FMODE_OPENED, __fput() will call
> +                * fsnotify_close(), so we need fsnotify_open() here for symmetry.
> +                */
> +               fsnotify_open(file);

I agree that this change preserves the logic, but (my own) comment
above is inconsistent with the case of:

        if ((f->f_flags & O_DIRECT) && !(f->f_mode & FMODE_CAN_ODIRECT))
                return -EINVAL;

Which does set FMODE_OPENED, but does not emit an OPEN event.

I have a feeling that the comment is correct about the CLOSE event in
that case, but honestly, I don't think this corner case is that important,
just maybe the comment needs to be slightly clarified?

Thanks,
Amir.

> +       return ret;
>  }
>
>  struct file *dentry_open(const struct path *path, int flags,
> @@ -1178,7 +1182,8 @@ struct file *kernel_file_open(const struct path *path, int flags,
>         if (error) {
>                 fput(f);
>                 f = ERR_PTR(error);
> -       }
> +       } else
> +               fsnotify_open(f);
>         return f;
>  }
>  EXPORT_SYMBOL_GPL(kernel_file_open);
> --
> 2.44.0
>
NeilBrown June 12, 2024, 11:47 a.m. UTC | #3
On Wed, 12 Jun 2024, Amir Goldstein wrote:
> On Wed, Jun 12, 2024 at 10:10 AM NeilBrown <neilb@suse.de> wrote:
> >
> >
> > When a file is opened and created with open(..., O_CREAT) we get
> > both the CREATE and OPEN fsnotify events and would expect them in that
> > order.   For most filesystems we get them in that order because
> > open_last_lookups() calls fsnofify_create() and then do_open() (from
> > path_openat()) calls vfs_open()->do_dentry_open() which calls
> > fsnotify_open().
> >
> > However when ->atomic_open is used, the
> >    do_dentry_open() -> fsnotify_open()
> > call happens from finish_open() which is called from the ->atomic_open
> > handler in lookup_open() which is called *before* open_last_lookups()
> > calls fsnotify_create.  So we get the "open" notification before
> > "create" - which is backwards.  ltp testcase inotify02 tests this and
> > reports the inconsistency.
> >
> > This patch lifts the fsnotify_open() call out of do_dentry_open() and
> > places it higher up the call stack.  There are three callers of
> > do_dentry_open().
> >
> > For vfs_open() and kernel_file_open() the fsnotify_open() is placed
> > directly in that caller so there should be no behavioural change.
> >
> > For finish_open() there are two cases:
> >  - finish_open is used in ->atomic_open handlers.  For these we add a
> >    call to fsnotify_open() at the top of do_open() if FMODE_OPENED is
> >    set - which means do_dentry_open() has been called.
> >  - finish_open is used in ->tmpfile() handlers.  For these a similar
> >    call to fsnotify_open() is added to vfs_tmpfile()
> 
> Any handlers other than ovl_tmpfile()?

Local filesystems tend to call finish_open_simple() which is a trivial
wrapper around finish_open().
Every .tmpfile handler calls either finish_open() or finish_open_simple().

> This one is a very recent and pretty special case.
> Did open(O_TMPFILE) used to emit an OPEN event before that change?

I believe so, yes.

Thanks,
NeilBrown

> 
> >
> > With this patch NFSv3 is restored to its previous behaviour (before
> > ->atomic_open support was added) of generating CREATE notifications
> > before OPEN, and NFSv4 now has that same correct ordering that is has
> > not had before.  I haven't tested other filesystems.
> >
> > Fixes: 7c6c5249f061 ("NFS: add atomic_open for NFSv3 to handle O_TRUNC correctly.")
> > Reported-by: James Clark <james.clark@arm.com>
> > Closes: https://lore.kernel.org/all/01c3bf2e-eb1f-4b7f-a54f-d2a05dd3d8c8@arm.com
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/namei.c |  5 +++++
> >  fs/open.c  | 19 ++++++++++++-------
> >  2 files changed, 17 insertions(+), 7 deletions(-)
> >
> > diff --git a/fs/namei.c b/fs/namei.c
> > index 37fb0a8aa09a..057afacc4b60 100644
> > --- a/fs/namei.c
> > +++ b/fs/namei.c
> > @@ -3612,6 +3612,9 @@ static int do_open(struct nameidata *nd,
> >         int acc_mode;
> >         int error;
> >
> > +       if (file->f_mode & FMODE_OPENED)
> > +               fsnotify_open(file);
> > +
> >         if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
> >                 error = complete_walk(nd);
> >                 if (error)
> > @@ -3700,6 +3703,8 @@ int vfs_tmpfile(struct mnt_idmap *idmap,
> >         mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
> >         error = dir->i_op->tmpfile(idmap, dir, file, mode);
> >         dput(child);
> > +       if (file->f_mode & FMODE_OPENED)
> > +               fsnotify_open(file);
> >         if (error)
> >                 return error;
> >         /* Don't check for other permissions, the inode was just created */
> > diff --git a/fs/open.c b/fs/open.c
> > index 89cafb572061..970f299c0e77 100644
> > --- a/fs/open.c
> > +++ b/fs/open.c
> > @@ -1004,11 +1004,6 @@ static int do_dentry_open(struct file *f,
> >                 }
> >         }
> >
> > -       /*
> > -        * Once we return a file with FMODE_OPENED, __fput() will call
> > -        * fsnotify_close(), so we need fsnotify_open() here for symmetry.
> > -        */
> > -       fsnotify_open(f);
> >         return 0;
> >
> >  cleanup_all:
> > @@ -1085,8 +1080,17 @@ EXPORT_SYMBOL(file_path);
> >   */
> >  int vfs_open(const struct path *path, struct file *file)
> >  {
> > +       int ret;
> > +
> >         file->f_path = *path;
> > -       return do_dentry_open(file, NULL);
> > +       ret = do_dentry_open(file, NULL);
> > +       if (!ret)
> > +               /*
> > +                * Once we return a file with FMODE_OPENED, __fput() will call
> > +                * fsnotify_close(), so we need fsnotify_open() here for symmetry.
> > +                */
> > +               fsnotify_open(file);
> 
> I agree that this change preserves the logic, but (my own) comment
> above is inconsistent with the case of:
> 
>         if ((f->f_flags & O_DIRECT) && !(f->f_mode & FMODE_CAN_ODIRECT))
>                 return -EINVAL;
> 
> Which does set FMODE_OPENED, but does not emit an OPEN event.

If I understand correctly, that case doesn't emit an OPEN event before
my patch, but will result in a CLOSE event.
After my patch ... I think it still doesn't emit OPEN.

I wonder if, instead of adding the the fsnotify_open() in do_open(), we
should put it in the\
	if (file->f_mode & (FMODE_OPENED | FMODE_CREATED)) {
case of open_last_lookups().

Or maybe it really doesn't hurt to have a CLOSE event without and OPEN. 
OPEN without CLOSE would be problematic, but the other way around
shouldn't matter....  It feels untidy, but maybe we don't care.

Thanks,
NeilBrown


> 
> I have a feeling that the comment is correct about the CLOSE event in
> that case, but honestly, I don't think this corner case is that important,
> just maybe the comment needs to be slightly clarified?
> 
> Thanks,
> Amir.
> 
> > +       return ret;
> >  }
> >
> >  struct file *dentry_open(const struct path *path, int flags,
> > @@ -1178,7 +1182,8 @@ struct file *kernel_file_open(const struct path *path, int flags,
> >         if (error) {
> >                 fput(f);
> >                 f = ERR_PTR(error);
> > -       }
> > +       } else
> > +               fsnotify_open(f);
> >         return f;
> >  }
> >  EXPORT_SYMBOL_GPL(kernel_file_open);
> > --
> > 2.44.0
> >
>
Christian Brauner June 12, 2024, 1:53 p.m. UTC | #4
On Wed, Jun 12, 2024 at 05:09:55PM +1000, NeilBrown wrote:
> 
> When a file is opened and created with open(..., O_CREAT) we get
> both the CREATE and OPEN fsnotify events and would expect them in that
> order.   For most filesystems we get them in that order because
> open_last_lookups() calls fsnofify_create() and then do_open() (from
> path_openat()) calls vfs_open()->do_dentry_open() which calls
> fsnotify_open().
> 
> However when ->atomic_open is used, the
>    do_dentry_open() -> fsnotify_open()
> call happens from finish_open() which is called from the ->atomic_open
> handler in lookup_open() which is called *before* open_last_lookups()
> calls fsnotify_create.  So we get the "open" notification before
> "create" - which is backwards.  ltp testcase inotify02 tests this and
> reports the inconsistency.
> 
> This patch lifts the fsnotify_open() call out of do_dentry_open() and
> places it higher up the call stack.  There are three callers of
> do_dentry_open().
> 
> For vfs_open() and kernel_file_open() the fsnotify_open() is placed
> directly in that caller so there should be no behavioural change.
> 
> For finish_open() there are two cases:
>  - finish_open is used in ->atomic_open handlers.  For these we add a
>    call to fsnotify_open() at the top of do_open() if FMODE_OPENED is
>    set - which means do_dentry_open() has been called.
>  - finish_open is used in ->tmpfile() handlers.  For these a similar
>    call to fsnotify_open() is added to vfs_tmpfile()
> 
> With this patch NFSv3 is restored to its previous behaviour (before
> ->atomic_open support was added) of generating CREATE notifications
> before OPEN, and NFSv4 now has that same correct ordering that is has
> not had before.  I haven't tested other filesystems.
> 
> Fixes: 7c6c5249f061 ("NFS: add atomic_open for NFSv3 to handle O_TRUNC correctly.")
> Reported-by: James Clark <james.clark@arm.com>
> Closes: https://lore.kernel.org/all/01c3bf2e-eb1f-4b7f-a54f-d2a05dd3d8c8@arm.com
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---

We should take this is a bugfix because it doesn't change behavior.

But then we should follow this up with a patch series that tries to
rectify the open/close imbalance because I find that pretty ugly. That's
at least my opinion.

We should aim to only generate an open event when may_open() succeeds
and don't generate a close event when the open has failed. Maybe:

+#ifdef CONFIG_FSNOTIFY
+#define file_nonotify(f) ((f)->f_mode |= __FMODE_NONOTIFY)
+#else
+#define file_nonotify(f) ((void)(f))
+#endif

will do.

Basic open permissions failing should count as failure to open and thus
also turn of a close event.

The somewhat ugly part is imho that security hooks introduce another
layer of complexity. While we do count security_file_permission() as
a failure to open we wouldn't e.g., count security_file_post_open() as a
failure to open (Though granted here that "*_post_open()" makes it
easier.). But it is really ugly that LSMs get to say "no" _after_ the
file has been opened. I suspect this is some IMA or EVM thing where they
hash the contents or something but it's royally ugly and I complained
about this before. But maybe such things should just generate an LSM
layer event via fsnotify in the future (FSNOTIFY_MAC) or something...
Then userspace can see "Hey, the VFS said yes but then the MAC stuff
said no."
Amir Goldstein June 13, 2024, 7:31 a.m. UTC | #5
On Wed, Jun 12, 2024 at 4:53 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Wed, Jun 12, 2024 at 05:09:55PM +1000, NeilBrown wrote:
> >
> > When a file is opened and created with open(..., O_CREAT) we get
> > both the CREATE and OPEN fsnotify events and would expect them in that
> > order.   For most filesystems we get them in that order because
> > open_last_lookups() calls fsnofify_create() and then do_open() (from
> > path_openat()) calls vfs_open()->do_dentry_open() which calls
> > fsnotify_open().
> >
> > However when ->atomic_open is used, the
> >    do_dentry_open() -> fsnotify_open()
> > call happens from finish_open() which is called from the ->atomic_open
> > handler in lookup_open() which is called *before* open_last_lookups()
> > calls fsnotify_create.  So we get the "open" notification before
> > "create" - which is backwards.  ltp testcase inotify02 tests this and
> > reports the inconsistency.
> >
> > This patch lifts the fsnotify_open() call out of do_dentry_open() and
> > places it higher up the call stack.  There are three callers of
> > do_dentry_open().
> >
> > For vfs_open() and kernel_file_open() the fsnotify_open() is placed
> > directly in that caller so there should be no behavioural change.
> >
> > For finish_open() there are two cases:
> >  - finish_open is used in ->atomic_open handlers.  For these we add a
> >    call to fsnotify_open() at the top of do_open() if FMODE_OPENED is
> >    set - which means do_dentry_open() has been called.
> >  - finish_open is used in ->tmpfile() handlers.  For these a similar
> >    call to fsnotify_open() is added to vfs_tmpfile()
> >
> > With this patch NFSv3 is restored to its previous behaviour (before
> > ->atomic_open support was added) of generating CREATE notifications
> > before OPEN, and NFSv4 now has that same correct ordering that is has
> > not had before.  I haven't tested other filesystems.
> >
> > Fixes: 7c6c5249f061 ("NFS: add atomic_open for NFSv3 to handle O_TRUNC correctly.")

I think it is better to add (also?)
Fixes: 7b8c9d7bb457 ("fsnotify: move fsnotify_open() hook into
do_dentry_open()")
because this is when the test case was regressed for other atomic_open() fs

> > Reported-by: James Clark <james.clark@arm.com>
> > Closes: https://lore.kernel.org/all/01c3bf2e-eb1f-4b7f-a54f-d2a05dd3d8c8@arm.com
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
>
> We should take this is a bugfix because it doesn't change behavior.
>

I agree.
I would love for this to be backported to at least v6.9.y
because FAN_CREATE events supported on fuse,nfs, (zero f_fsid)
only since v6.8, which triggered my fix to fanotify16 LTP test.

> But then we should follow this up with a patch series that tries to
> rectify the open/close imbalance because I find that pretty ugly. That's
> at least my opinion.
>
> We should aim to only generate an open event when may_open() succeeds
> and don't generate a close event when the open has failed. Maybe:
>
> +#ifdef CONFIG_FSNOTIFY
> +#define file_nonotify(f) ((f)->f_mode |= __FMODE_NONOTIFY)
> +#else
> +#define file_nonotify(f) ((void)(f))
> +#endif
>
> will do.

Why bother with the ifdef? __FMODE_NONOTIFY is always defined.

Maybe something like this (untested partial patch):


+static inline int fsnotify_open_error(struct file *f, int error)
+{
+       /*
+        * Once we return a file with FMODE_OPENED, __fput() will call
+        * fsnotify_close(), so we need to either call fsnotify_open() or
+        * set __FMODE_NONOTIFY to suppress fsnotify_close() for symmetry.
+        */
+       if (error)
+               f->f_mode |= __FMODE_NONOTIFY;
+       else
+               fsnotify_open(f);
+       return error;
+}
+
 static int do_dentry_open(struct file *f,
                          int (*open)(struct inode *, struct file *))
 {
@@ -1004,11 +1018,6 @@ static int do_dentry_open(struct file *f,
                }
        }

-       /*
-        * Once we return a file with FMODE_OPENED, __fput() will call
-        * fsnotify_close(), so we need fsnotify_open() here for symmetry.
-        */
-       fsnotify_open(f);
        return 0;

 cleanup_all:
@@ -1085,8 +1094,11 @@ EXPORT_SYMBOL(file_path);
  */
 int vfs_open(const struct path *path, struct file *file)
 {
+       int error;
+
        file->f_path = *path;
-       return do_dentry_open(file, NULL);
+       error = do_dentry_open(file, NULL);
+       return fsnotify_open_error(file, error);
 }

 struct file *dentry_open(const struct path *path, int flags,
@@ -1175,6 +1187,7 @@ struct file *kernel_file_open(const struct path
*path, int flags,

        f->f_path = *path;
        error = do_dentry_open(f, NULL);
+       fsnotify_open_error(f, error);
        if (error) {
                fput(f);
                f = ERR_PTR(error);


>
> Basic open permissions failing should count as failure to open and thus
> also turn of a close event.
>
> The somewhat ugly part is imho that security hooks introduce another
> layer of complexity. While we do count security_file_permission() as
> a failure to open we wouldn't e.g., count security_file_post_open() as a
> failure to open (Though granted here that "*_post_open()" makes it
> easier.). But it is really ugly that LSMs get to say "no" _after_ the
> file has been opened. I suspect this is some IMA or EVM thing where they
> hash the contents or something but it's royally ugly and I complained
> about this before. But maybe such things should just generate an LSM
> layer event via fsnotify in the future (FSNOTIFY_MAC) or something...
> Then userspace can see "Hey, the VFS said yes but then the MAC stuff
> said no."

Not sure what IMA/EVM needs so cannot comment about this proposal.

Thanks,
Amir.
Amir Goldstein June 13, 2024, 7:38 a.m. UTC | #6
On Wed, Jun 12, 2024 at 2:47 PM NeilBrown <neilb@suse.de> wrote:
>
> On Wed, 12 Jun 2024, Amir Goldstein wrote:
> > On Wed, Jun 12, 2024 at 10:10 AM NeilBrown <neilb@suse.de> wrote:
> > >
> > >
> > > When a file is opened and created with open(..., O_CREAT) we get
> > > both the CREATE and OPEN fsnotify events and would expect them in that
> > > order.   For most filesystems we get them in that order because
> > > open_last_lookups() calls fsnofify_create() and then do_open() (from
> > > path_openat()) calls vfs_open()->do_dentry_open() which calls
> > > fsnotify_open().
> > >
> > > However when ->atomic_open is used, the
> > >    do_dentry_open() -> fsnotify_open()
> > > call happens from finish_open() which is called from the ->atomic_open
> > > handler in lookup_open() which is called *before* open_last_lookups()
> > > calls fsnotify_create.  So we get the "open" notification before
> > > "create" - which is backwards.  ltp testcase inotify02 tests this and
> > > reports the inconsistency.
> > >
> > > This patch lifts the fsnotify_open() call out of do_dentry_open() and
> > > places it higher up the call stack.  There are three callers of
> > > do_dentry_open().
> > >
> > > For vfs_open() and kernel_file_open() the fsnotify_open() is placed
> > > directly in that caller so there should be no behavioural change.
> > >
> > > For finish_open() there are two cases:
> > >  - finish_open is used in ->atomic_open handlers.  For these we add a
> > >    call to fsnotify_open() at the top of do_open() if FMODE_OPENED is
> > >    set - which means do_dentry_open() has been called.
> > >  - finish_open is used in ->tmpfile() handlers.  For these a similar
> > >    call to fsnotify_open() is added to vfs_tmpfile()
> >
> > Any handlers other than ovl_tmpfile()?
>
> Local filesystems tend to call finish_open_simple() which is a trivial
> wrapper around finish_open().
> Every .tmpfile handler calls either finish_open() or finish_open_simple().
>
> > This one is a very recent and pretty special case.
> > Did open(O_TMPFILE) used to emit an OPEN event before that change?
>
> I believe so, yes.
>

Right. Thanks for clarifying.

> Thanks,
> NeilBrown
>
> >
> > >
> > > With this patch NFSv3 is restored to its previous behaviour (before
> > > ->atomic_open support was added) of generating CREATE notifications
> > > before OPEN, and NFSv4 now has that same correct ordering that is has
> > > not had before.  I haven't tested other filesystems.
> > >
> > > Fixes: 7c6c5249f061 ("NFS: add atomic_open for NFSv3 to handle O_TRUNC correctly.")
> > > Reported-by: James Clark <james.clark@arm.com>
> > > Closes: https://lore.kernel.org/all/01c3bf2e-eb1f-4b7f-a54f-d2a05dd3d8c8@arm.com
> > > Signed-off-by: NeilBrown <neilb@suse.de>
> > > ---
> > >  fs/namei.c |  5 +++++
> > >  fs/open.c  | 19 ++++++++++++-------
> > >  2 files changed, 17 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/fs/namei.c b/fs/namei.c
> > > index 37fb0a8aa09a..057afacc4b60 100644
> > > --- a/fs/namei.c
> > > +++ b/fs/namei.c
> > > @@ -3612,6 +3612,9 @@ static int do_open(struct nameidata *nd,
> > >         int acc_mode;
> > >         int error;
> > >
> > > +       if (file->f_mode & FMODE_OPENED)
> > > +               fsnotify_open(file);
> > > +
> > >         if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
> > >                 error = complete_walk(nd);
> > >                 if (error)
> > > @@ -3700,6 +3703,8 @@ int vfs_tmpfile(struct mnt_idmap *idmap,
> > >         mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
> > >         error = dir->i_op->tmpfile(idmap, dir, file, mode);
> > >         dput(child);
> > > +       if (file->f_mode & FMODE_OPENED)
> > > +               fsnotify_open(file);
> > >         if (error)
> > >                 return error;
> > >         /* Don't check for other permissions, the inode was just created */
> > > diff --git a/fs/open.c b/fs/open.c
> > > index 89cafb572061..970f299c0e77 100644
> > > --- a/fs/open.c
> > > +++ b/fs/open.c
> > > @@ -1004,11 +1004,6 @@ static int do_dentry_open(struct file *f,
> > >                 }
> > >         }
> > >
> > > -       /*
> > > -        * Once we return a file with FMODE_OPENED, __fput() will call
> > > -        * fsnotify_close(), so we need fsnotify_open() here for symmetry.
> > > -        */
> > > -       fsnotify_open(f);
> > >         return 0;
> > >
> > >  cleanup_all:
> > > @@ -1085,8 +1080,17 @@ EXPORT_SYMBOL(file_path);
> > >   */
> > >  int vfs_open(const struct path *path, struct file *file)
> > >  {
> > > +       int ret;
> > > +
> > >         file->f_path = *path;
> > > -       return do_dentry_open(file, NULL);
> > > +       ret = do_dentry_open(file, NULL);
> > > +       if (!ret)
> > > +               /*
> > > +                * Once we return a file with FMODE_OPENED, __fput() will call
> > > +                * fsnotify_close(), so we need fsnotify_open() here for symmetry.
> > > +                */
> > > +               fsnotify_open(file);
> >
> > I agree that this change preserves the logic, but (my own) comment
> > above is inconsistent with the case of:
> >
> >         if ((f->f_flags & O_DIRECT) && !(f->f_mode & FMODE_CAN_ODIRECT))
> >                 return -EINVAL;
> >
> > Which does set FMODE_OPENED, but does not emit an OPEN event.
>
> If I understand correctly, that case doesn't emit an OPEN event before
> my patch, but will result in a CLOSE event.
> After my patch ... I think it still doesn't emit OPEN.
>
> I wonder if, instead of adding the the fsnotify_open() in do_open(), we
> should put it in the\
>         if (file->f_mode & (FMODE_OPENED | FMODE_CREATED)) {
> case of open_last_lookups().
>

We cannot do that.
See the reasoning for 7b8c9d7bb457 ("fsnotify: move fsnotify_open() hook into
do_dentry_open()") - we need the events for other callers of vfs_open(),
like overlayfs and nfsd.

> Or maybe it really doesn't hurt to have a CLOSE event without and OPEN.
> OPEN without CLOSE would be problematic, but the other way around
> shouldn't matter....  It feels untidy, but maybe we don't care.
>

We have had unmatched CLOSE events for a very long time before
7b8c9d7bb457 ("fsnotify: move fsnotify_open() hook into do_dentry_open()")
and I do not know of any complaints.

When I made this change, its purpose was not to match all OPEN/CLOSE
but to add missing OPEN events. However, I did try to avoid unmatched
CLOSE at least for the common cases.

Thanks,
Amir.
Christian Brauner June 15, 2024, 5:35 a.m. UTC | #7
On Wed, 12 Jun 2024 17:09:55 +1000, NeilBrown wrote:
> When a file is opened and created with open(..., O_CREAT) we get
> both the CREATE and OPEN fsnotify events and would expect them in that
> order.   For most filesystems we get them in that order because
> open_last_lookups() calls fsnofify_create() and then do_open() (from
> path_openat()) calls vfs_open()->do_dentry_open() which calls
> fsnotify_open().
> 
> [...]

Applied to the vfs.fixes branch of the vfs/vfs.git tree.
Patches in the vfs.fixes branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs.fixes

[1/1] VFS: generate FS_CREATE before FS_OPEN when ->atomic_open used.
      https://git.kernel.org/vfs/vfs/c/7536b2f06724
Jan Kara June 17, 2024, 9:37 a.m. UTC | #8
On Sat 15-06-24 07:35:42, Christian Brauner wrote:
> On Wed, 12 Jun 2024 17:09:55 +1000, NeilBrown wrote:
> > When a file is opened and created with open(..., O_CREAT) we get
> > both the CREATE and OPEN fsnotify events and would expect them in that
> > order.   For most filesystems we get them in that order because
> > open_last_lookups() calls fsnofify_create() and then do_open() (from
> > path_openat()) calls vfs_open()->do_dentry_open() which calls
> > fsnotify_open().
> > 
> > [...]
> 
> Applied to the vfs.fixes branch of the vfs/vfs.git tree.
> Patches in the vfs.fixes branch should appear in linux-next soon.
> 
> Please report any outstanding bugs that were missed during review in a
> new review to the original patch series allowing us to drop it.
> 
> It's encouraged to provide Acked-bys and Reviewed-bys even though the
> patch has now been applied. If possible patch trailers will be updated.
> 
> Note that commit hashes shown below are subject to change due to rebase,
> trailer updates or similar. If in doubt, please check the listed branch.
> 
> tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
> branch: vfs.fixes
> 
> [1/1] VFS: generate FS_CREATE before FS_OPEN when ->atomic_open used.
>       https://git.kernel.org/vfs/vfs/c/7536b2f06724

I have reviewed the patch you've committed since I wasn't quite sure which
changes you're going to apply after your discussion with Amir. And I have
two comments:

@@ -1085,8 +1080,17 @@ EXPORT_SYMBOL(file_path);
  */
 int vfs_open(const struct path *path, struct file *file)
 {
+	int ret;
+
 	file->f_path = *path;
-	return do_dentry_open(file, NULL);
+	ret = do_dentry_open(file, NULL);
+	if (!ret)
+		/*
+		 * Once we return a file with FMODE_OPENED, __fput() will call
+		 * fsnotify_close(), so we need fsnotify_open() here for symmetry.
+		 */
+		fsnotify_open(file);
+	return ret;
 }

AFAICT this will have a side-effect that now fsnotify_open() will be
generated even for O_PATH open. It is true that fsnotify_close() is getting
generated for them already and we should strive for symmetry. Conceptually
it doesn't make sense to me to generate fsnotify events for O_PATH
opens/closes but maybe I miss something. Amir, any opinion here?

@@ -3612,6 +3612,9 @@ static int do_open(struct nameidata *nd,
 	int acc_mode;
 	int error;
 
+	if (file->f_mode & FMODE_OPENED)
+		fsnotify_open(file);
+
 	if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
 		error = complete_walk(nd);
 		if (error)

Frankly, this works but looks as an odd place to put this notification to.
Why not just placing it just next to where fsnotify_create() is generated
in open_last_lookups()? Like:

        if (open_flag & O_CREAT)
                inode_lock(dir->d_inode);
        else
                inode_lock_shared(dir->d_inode);
        dentry = lookup_open(nd, file, op, got_write);
-	if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
-		fsnotify_create(dir->d_inode, dentry);
+	if (!IS_ERR(dentry)) {
+		if (file->f_mode & FMODE_CREATED)
+	                fsnotify_create(dir->d_inode, dentry);
+		if (file->f_mode & FMODE_OPENED)
+			fsnotify_open(file);
+	}
        if (open_flag & O_CREAT)
                inode_unlock(dir->d_inode);
        else
                inode_unlock_shared(dir->d_inode);

That looks like a place where it is much more obvious this is for
atomic_open() handling? Now I admit I'm not really closely familiar with
the atomic_open() paths so maybe I miss something and do_open() is better.

								Honza
Amir Goldstein June 17, 2024, 12:09 p.m. UTC | #9
On Mon, Jun 17, 2024 at 12:37 PM Jan Kara <jack@suse.cz> wrote:
>
> On Sat 15-06-24 07:35:42, Christian Brauner wrote:
> > On Wed, 12 Jun 2024 17:09:55 +1000, NeilBrown wrote:
> > > When a file is opened and created with open(..., O_CREAT) we get
> > > both the CREATE and OPEN fsnotify events and would expect them in that
> > > order.   For most filesystems we get them in that order because
> > > open_last_lookups() calls fsnofify_create() and then do_open() (from
> > > path_openat()) calls vfs_open()->do_dentry_open() which calls
> > > fsnotify_open().
> > >
> > > [...]
> >
> > Applied to the vfs.fixes branch of the vfs/vfs.git tree.
> > Patches in the vfs.fixes branch should appear in linux-next soon.
> >
> > Please report any outstanding bugs that were missed during review in a
> > new review to the original patch series allowing us to drop it.
> >
> > It's encouraged to provide Acked-bys and Reviewed-bys even though the
> > patch has now been applied. If possible patch trailers will be updated.
> >
> > Note that commit hashes shown below are subject to change due to rebase,
> > trailer updates or similar. If in doubt, please check the listed branch.
> >
> > tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
> > branch: vfs.fixes
> >
> > [1/1] VFS: generate FS_CREATE before FS_OPEN when ->atomic_open used.
> >       https://git.kernel.org/vfs/vfs/c/7536b2f06724
>
> I have reviewed the patch you've committed since I wasn't quite sure which
> changes you're going to apply after your discussion with Amir. And I have
> two comments:
>
> @@ -1085,8 +1080,17 @@ EXPORT_SYMBOL(file_path);
>   */
>  int vfs_open(const struct path *path, struct file *file)
>  {
> +       int ret;
> +
>         file->f_path = *path;
> -       return do_dentry_open(file, NULL);
> +       ret = do_dentry_open(file, NULL);
> +       if (!ret)
> +               /*
> +                * Once we return a file with FMODE_OPENED, __fput() will call
> +                * fsnotify_close(), so we need fsnotify_open() here for symmetry.
> +                */
> +               fsnotify_open(file);

Please add { } around multi line indented text.

> +       return ret;
>  }
>
> AFAICT this will have a side-effect that now fsnotify_open() will be
> generated even for O_PATH open. It is true that fsnotify_close() is getting
> generated for them already and we should strive for symmetry. Conceptually
> it doesn't make sense to me to generate fsnotify events for O_PATH
> opens/closes but maybe I miss something. Amir, any opinion here?

Good catch!

I agree that we do not need OPEN nor CLOSE events for O_PATH.
I suggest to solve it with:

@@ -915,7 +929,7 @@ static int do_dentry_open(struct file *f,
        f->f_sb_err = file_sample_sb_err(f);

        if (unlikely(f->f_flags & O_PATH)) {
-               f->f_mode = FMODE_PATH | FMODE_OPENED;
+               f->f_mode = FMODE_PATH | FMODE_OPENED | __FMODE_NONOTIFY;
                f->f_op = &empty_fops;
                return 0;
        }

>
> @@ -3612,6 +3612,9 @@ static int do_open(struct nameidata *nd,
>         int acc_mode;
>         int error;
>
> +       if (file->f_mode & FMODE_OPENED)
> +               fsnotify_open(file);
> +
>         if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
>                 error = complete_walk(nd);
>                 if (error)
>
> Frankly, this works but looks as an odd place to put this notification to.
> Why not just placing it just next to where fsnotify_create() is generated
> in open_last_lookups()? Like:
>
>         if (open_flag & O_CREAT)
>                 inode_lock(dir->d_inode);
>         else
>                 inode_lock_shared(dir->d_inode);
>         dentry = lookup_open(nd, file, op, got_write);
> -       if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
> -               fsnotify_create(dir->d_inode, dentry);
> +       if (!IS_ERR(dentry)) {
> +               if (file->f_mode & FMODE_CREATED)
> +                       fsnotify_create(dir->d_inode, dentry);
> +               if (file->f_mode & FMODE_OPENED)
> +                       fsnotify_open(file);
> +       }
>         if (open_flag & O_CREAT)
>                 inode_unlock(dir->d_inode);
>         else
>                 inode_unlock_shared(dir->d_inode);
>
> That looks like a place where it is much more obvious this is for
> atomic_open() handling? Now I admit I'm not really closely familiar with
> the atomic_open() paths so maybe I miss something and do_open() is better.

It looks nice, but I think it is missing the fast lookup case without O_CREAT
(i.e. goto finish_lookup).

Thanks,
Amir.
Jan Kara June 17, 2024, 1:07 p.m. UTC | #10
On Mon 17-06-24 15:09:09, Amir Goldstein wrote:
> On Mon, Jun 17, 2024 at 12:37 PM Jan Kara <jack@suse.cz> wrote:
> > On Sat 15-06-24 07:35:42, Christian Brauner wrote:
> > > On Wed, 12 Jun 2024 17:09:55 +1000, NeilBrown wrote:
> > > > When a file is opened and created with open(..., O_CREAT) we get
> > > > both the CREATE and OPEN fsnotify events and would expect them in that
> > > > order.   For most filesystems we get them in that order because
> > > > open_last_lookups() calls fsnofify_create() and then do_open() (from
> > > > path_openat()) calls vfs_open()->do_dentry_open() which calls
> > > > fsnotify_open().
> > > >
> > > > [...]
> > >
> > > Applied to the vfs.fixes branch of the vfs/vfs.git tree.
> > > Patches in the vfs.fixes branch should appear in linux-next soon.
> > >
> > > Please report any outstanding bugs that were missed during review in a
> > > new review to the original patch series allowing us to drop it.
> > >
> > > It's encouraged to provide Acked-bys and Reviewed-bys even though the
> > > patch has now been applied. If possible patch trailers will be updated.
> > >
> > > Note that commit hashes shown below are subject to change due to rebase,
> > > trailer updates or similar. If in doubt, please check the listed branch.
> > >
> > > tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
> > > branch: vfs.fixes
> > >
> > > [1/1] VFS: generate FS_CREATE before FS_OPEN when ->atomic_open used.
> > >       https://git.kernel.org/vfs/vfs/c/7536b2f06724
> >
> > I have reviewed the patch you've committed since I wasn't quite sure which
> > changes you're going to apply after your discussion with Amir. And I have
> > two comments:
> >
> > @@ -1085,8 +1080,17 @@ EXPORT_SYMBOL(file_path);
> >   */
> >  int vfs_open(const struct path *path, struct file *file)
> >  {
> > +       int ret;
> > +
> >         file->f_path = *path;
> > -       return do_dentry_open(file, NULL);
> > +       ret = do_dentry_open(file, NULL);
> > +       if (!ret)
> > +               /*
> > +                * Once we return a file with FMODE_OPENED, __fput() will call
> > +                * fsnotify_close(), so we need fsnotify_open() here for symmetry.
> > +                */
> > +               fsnotify_open(file);
> 
> Please add { } around multi line indented text.
> 
> > +       return ret;
> >  }
> >
> > AFAICT this will have a side-effect that now fsnotify_open() will be
> > generated even for O_PATH open. It is true that fsnotify_close() is getting
> > generated for them already and we should strive for symmetry. Conceptually
> > it doesn't make sense to me to generate fsnotify events for O_PATH
> > opens/closes but maybe I miss something. Amir, any opinion here?
> 
> Good catch!
> 
> I agree that we do not need OPEN nor CLOSE events for O_PATH.
> I suggest to solve it with:
> 
> @@ -915,7 +929,7 @@ static int do_dentry_open(struct file *f,
>         f->f_sb_err = file_sample_sb_err(f);
> 
>         if (unlikely(f->f_flags & O_PATH)) {
> -               f->f_mode = FMODE_PATH | FMODE_OPENED;
> +               f->f_mode = FMODE_PATH | FMODE_OPENED | __FMODE_NONOTIFY;
>                 f->f_op = &empty_fops;
>                 return 0;
>         }

First I was somewhat nervous about this as it results in returning O_PATH
fd with __FMODE_NONOTIFY to userspace and I was afraid it may influence
generation of events *somewhere*. But checking a bit, we use 'file' for
generating only open, access, modify, and close events so yes, this should
be safe. Alternatively we could add explicit checks for !O_PATH when
generating open / close events.

> > @@ -3612,6 +3612,9 @@ static int do_open(struct nameidata *nd,
> >         int acc_mode;
> >         int error;
> >
> > +       if (file->f_mode & FMODE_OPENED)
> > +               fsnotify_open(file);
> > +
> >         if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
> >                 error = complete_walk(nd);
> >                 if (error)
> >
> > Frankly, this works but looks as an odd place to put this notification to.
> > Why not just placing it just next to where fsnotify_create() is generated
> > in open_last_lookups()? Like:
> >
> >         if (open_flag & O_CREAT)
> >                 inode_lock(dir->d_inode);
> >         else
> >                 inode_lock_shared(dir->d_inode);
> >         dentry = lookup_open(nd, file, op, got_write);
> > -       if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
> > -               fsnotify_create(dir->d_inode, dentry);
> > +       if (!IS_ERR(dentry)) {
> > +               if (file->f_mode & FMODE_CREATED)
> > +                       fsnotify_create(dir->d_inode, dentry);
> > +               if (file->f_mode & FMODE_OPENED)
> > +                       fsnotify_open(file);
> > +       }
> >         if (open_flag & O_CREAT)
> >                 inode_unlock(dir->d_inode);
> >         else
> >                 inode_unlock_shared(dir->d_inode);
> >
> > That looks like a place where it is much more obvious this is for
> > atomic_open() handling? Now I admit I'm not really closely familiar with
> > the atomic_open() paths so maybe I miss something and do_open() is better.
> 
> It looks nice, but I think it is missing the fast lookup case without O_CREAT
> (i.e. goto finish_lookup).

I don't think so. AFAICT that case will generate the event in vfs_open()
anyway and not in open_last_lookups() / do_open(). Am I missing something?

								Honza
Amir Goldstein June 17, 2024, 1:49 p.m. UTC | #11
On Mon, Jun 17, 2024 at 4:07 PM Jan Kara <jack@suse.cz> wrote:
>
> On Mon 17-06-24 15:09:09, Amir Goldstein wrote:
> > On Mon, Jun 17, 2024 at 12:37 PM Jan Kara <jack@suse.cz> wrote:
> > > On Sat 15-06-24 07:35:42, Christian Brauner wrote:
> > > > On Wed, 12 Jun 2024 17:09:55 +1000, NeilBrown wrote:
> > > > > When a file is opened and created with open(..., O_CREAT) we get
> > > > > both the CREATE and OPEN fsnotify events and would expect them in that
> > > > > order.   For most filesystems we get them in that order because
> > > > > open_last_lookups() calls fsnofify_create() and then do_open() (from
> > > > > path_openat()) calls vfs_open()->do_dentry_open() which calls
> > > > > fsnotify_open().
> > > > >
> > > > > [...]
> > > >
> > > > Applied to the vfs.fixes branch of the vfs/vfs.git tree.
> > > > Patches in the vfs.fixes branch should appear in linux-next soon.
> > > >
> > > > Please report any outstanding bugs that were missed during review in a
> > > > new review to the original patch series allowing us to drop it.
> > > >
> > > > It's encouraged to provide Acked-bys and Reviewed-bys even though the
> > > > patch has now been applied. If possible patch trailers will be updated.
> > > >
> > > > Note that commit hashes shown below are subject to change due to rebase,
> > > > trailer updates or similar. If in doubt, please check the listed branch.
> > > >
> > > > tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
> > > > branch: vfs.fixes
> > > >
> > > > [1/1] VFS: generate FS_CREATE before FS_OPEN when ->atomic_open used.
> > > >       https://git.kernel.org/vfs/vfs/c/7536b2f06724
> > >
> > > I have reviewed the patch you've committed since I wasn't quite sure which
> > > changes you're going to apply after your discussion with Amir. And I have
> > > two comments:
> > >
> > > @@ -1085,8 +1080,17 @@ EXPORT_SYMBOL(file_path);
> > >   */
> > >  int vfs_open(const struct path *path, struct file *file)
> > >  {
> > > +       int ret;
> > > +
> > >         file->f_path = *path;
> > > -       return do_dentry_open(file, NULL);
> > > +       ret = do_dentry_open(file, NULL);
> > > +       if (!ret)
> > > +               /*
> > > +                * Once we return a file with FMODE_OPENED, __fput() will call
> > > +                * fsnotify_close(), so we need fsnotify_open() here for symmetry.
> > > +                */
> > > +               fsnotify_open(file);
> >
> > Please add { } around multi line indented text.
> >
> > > +       return ret;
> > >  }
> > >
> > > AFAICT this will have a side-effect that now fsnotify_open() will be
> > > generated even for O_PATH open. It is true that fsnotify_close() is getting
> > > generated for them already and we should strive for symmetry. Conceptually
> > > it doesn't make sense to me to generate fsnotify events for O_PATH
> > > opens/closes but maybe I miss something. Amir, any opinion here?
> >
> > Good catch!
> >
> > I agree that we do not need OPEN nor CLOSE events for O_PATH.
> > I suggest to solve it with:
> >
> > @@ -915,7 +929,7 @@ static int do_dentry_open(struct file *f,
> >         f->f_sb_err = file_sample_sb_err(f);
> >
> >         if (unlikely(f->f_flags & O_PATH)) {
> > -               f->f_mode = FMODE_PATH | FMODE_OPENED;
> > +               f->f_mode = FMODE_PATH | FMODE_OPENED | __FMODE_NONOTIFY;
> >                 f->f_op = &empty_fops;
> >                 return 0;
> >         }
>
> First I was somewhat nervous about this as it results in returning O_PATH
> fd with __FMODE_NONOTIFY to userspace and I was afraid it may influence
> generation of events *somewhere*.

It influences my POC code of future lookup permission event:
https://github.com/amir73il/linux/commits/fan_lookup_perm/
which is supposed to generate events on lookup with O_PATH fd.

> But checking a bit, we use 'file' for
> generating only open, access, modify, and close events so yes, this should
> be safe. Alternatively we could add explicit checks for !O_PATH when
> generating open / close events.
>

So yeh, this would be better:

--- a/include/linux/fsnotify.h
+++ b/include/linux/fsnotify.h
@@ -112,7 +112,7 @@ static inline int fsnotify_file(struct file *file,
__u32 mask)
 {
        const struct path *path;

-       if (file->f_mode & FMODE_NONOTIFY)
+       if (file->f_mode & (FMODE_NONOTIFY | FMODE_PATH))
                return 0;

        path = &file->f_path;
--

It is a dilemma, if this patch should be separate.
On the one hand it fixes unbalanced CLOSE events on O_PATH fd,
so it is an independent fix.
OTOH, it is a requirement for moving fsnotify_open() out of
do_dentry_open(), so not so healthy to separate them, when it is less clear
that they need to be backported together.

> > > @@ -3612,6 +3612,9 @@ static int do_open(struct nameidata *nd,
> > >         int acc_mode;
> > >         int error;
> > >
> > > +       if (file->f_mode & FMODE_OPENED)
> > > +               fsnotify_open(file);
> > > +
> > >         if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
> > >                 error = complete_walk(nd);
> > >                 if (error)
> > >
> > > Frankly, this works but looks as an odd place to put this notification to.
> > > Why not just placing it just next to where fsnotify_create() is generated
> > > in open_last_lookups()? Like:
> > >
> > >         if (open_flag & O_CREAT)
> > >                 inode_lock(dir->d_inode);
> > >         else
> > >                 inode_lock_shared(dir->d_inode);
> > >         dentry = lookup_open(nd, file, op, got_write);
> > > -       if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
> > > -               fsnotify_create(dir->d_inode, dentry);
> > > +       if (!IS_ERR(dentry)) {
> > > +               if (file->f_mode & FMODE_CREATED)
> > > +                       fsnotify_create(dir->d_inode, dentry);
> > > +               if (file->f_mode & FMODE_OPENED)
> > > +                       fsnotify_open(file);
> > > +       }
> > >         if (open_flag & O_CREAT)
> > >                 inode_unlock(dir->d_inode);
> > >         else
> > >                 inode_unlock_shared(dir->d_inode);
> > >
> > > That looks like a place where it is much more obvious this is for
> > > atomic_open() handling? Now I admit I'm not really closely familiar with
> > > the atomic_open() paths so maybe I miss something and do_open() is better.
> >
> > It looks nice, but I think it is missing the fast lookup case without O_CREAT
> > (i.e. goto finish_lookup).
>
> I don't think so. AFAICT that case will generate the event in vfs_open()
> anyway and not in open_last_lookups() / do_open(). Am I missing something?

No. I am. This code is hard to follow.
I am fine with your variant, but maybe after so many in-tree changes
including the extra change of O_PATH, perhaps it would be better
to move this patch to your fsnotify tree?

Thanks,
Amir.
Jan Kara June 17, 2024, 3:51 p.m. UTC | #12
On Mon 17-06-24 16:49:55, Amir Goldstein wrote:
> On Mon, Jun 17, 2024 at 4:07 PM Jan Kara <jack@suse.cz> wrote:
> >
> > On Mon 17-06-24 15:09:09, Amir Goldstein wrote:
> > > On Mon, Jun 17, 2024 at 12:37 PM Jan Kara <jack@suse.cz> wrote:
> > > > On Sat 15-06-24 07:35:42, Christian Brauner wrote:
> > > > > On Wed, 12 Jun 2024 17:09:55 +1000, NeilBrown wrote:
> > > > > > When a file is opened and created with open(..., O_CREAT) we get
> > > > > > both the CREATE and OPEN fsnotify events and would expect them in that
> > > > > > order.   For most filesystems we get them in that order because
> > > > > > open_last_lookups() calls fsnofify_create() and then do_open() (from
> > > > > > path_openat()) calls vfs_open()->do_dentry_open() which calls
> > > > > > fsnotify_open().
> > > > > >
> > > > > > [...]
> > > > >
> > > > > Applied to the vfs.fixes branch of the vfs/vfs.git tree.
> > > > > Patches in the vfs.fixes branch should appear in linux-next soon.
> > > > >
> > > > > Please report any outstanding bugs that were missed during review in a
> > > > > new review to the original patch series allowing us to drop it.
> > > > >
> > > > > It's encouraged to provide Acked-bys and Reviewed-bys even though the
> > > > > patch has now been applied. If possible patch trailers will be updated.
> > > > >
> > > > > Note that commit hashes shown below are subject to change due to rebase,
> > > > > trailer updates or similar. If in doubt, please check the listed branch.
> > > > >
> > > > > tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
> > > > > branch: vfs.fixes
> > > > >
> > > > > [1/1] VFS: generate FS_CREATE before FS_OPEN when ->atomic_open used.
> > > > >       https://git.kernel.org/vfs/vfs/c/7536b2f06724
> > > >
> > > > I have reviewed the patch you've committed since I wasn't quite sure which
> > > > changes you're going to apply after your discussion with Amir. And I have
> > > > two comments:
> > > >
> > > > @@ -1085,8 +1080,17 @@ EXPORT_SYMBOL(file_path);
> > > >   */
> > > >  int vfs_open(const struct path *path, struct file *file)
> > > >  {
> > > > +       int ret;
> > > > +
> > > >         file->f_path = *path;
> > > > -       return do_dentry_open(file, NULL);
> > > > +       ret = do_dentry_open(file, NULL);
> > > > +       if (!ret)
> > > > +               /*
> > > > +                * Once we return a file with FMODE_OPENED, __fput() will call
> > > > +                * fsnotify_close(), so we need fsnotify_open() here for symmetry.
> > > > +                */
> > > > +               fsnotify_open(file);
> > >
> > > Please add { } around multi line indented text.
> > >
> > > > +       return ret;
> > > >  }
> > > >
> > > > AFAICT this will have a side-effect that now fsnotify_open() will be
> > > > generated even for O_PATH open. It is true that fsnotify_close() is getting
> > > > generated for them already and we should strive for symmetry. Conceptually
> > > > it doesn't make sense to me to generate fsnotify events for O_PATH
> > > > opens/closes but maybe I miss something. Amir, any opinion here?
> > >
> > > Good catch!
> > >
> > > I agree that we do not need OPEN nor CLOSE events for O_PATH.
> > > I suggest to solve it with:
> > >
> > > @@ -915,7 +929,7 @@ static int do_dentry_open(struct file *f,
> > >         f->f_sb_err = file_sample_sb_err(f);
> > >
> > >         if (unlikely(f->f_flags & O_PATH)) {
> > > -               f->f_mode = FMODE_PATH | FMODE_OPENED;
> > > +               f->f_mode = FMODE_PATH | FMODE_OPENED | __FMODE_NONOTIFY;
> > >                 f->f_op = &empty_fops;
> > >                 return 0;
> > >         }
> >
> > First I was somewhat nervous about this as it results in returning O_PATH
> > fd with __FMODE_NONOTIFY to userspace and I was afraid it may influence
> > generation of events *somewhere*.
> 
> It influences my POC code of future lookup permission event:
> https://github.com/amir73il/linux/commits/fan_lookup_perm/
> which is supposed to generate events on lookup with O_PATH fd.
> 
> > But checking a bit, we use 'file' for
> > generating only open, access, modify, and close events so yes, this should
> > be safe. Alternatively we could add explicit checks for !O_PATH when
> > generating open / close events.
> >
> 
> So yeh, this would be better:
> 
> --- a/include/linux/fsnotify.h
> +++ b/include/linux/fsnotify.h
> @@ -112,7 +112,7 @@ static inline int fsnotify_file(struct file *file,
> __u32 mask)
>  {
>         const struct path *path;
> 
> -       if (file->f_mode & FMODE_NONOTIFY)
> +       if (file->f_mode & (FMODE_NONOTIFY | FMODE_PATH))
>                 return 0;
> 
>         path = &file->f_path;
> --
> 
> It is a dilemma, if this patch should be separate.
> On the one hand it fixes unbalanced CLOSE events on O_PATH fd,
> so it is an independent fix.
> OTOH, it is a requirement for moving fsnotify_open() out of
> do_dentry_open(), so not so healthy to separate them, when it is less clear
> that they need to be backported together.

Yeah, I guess a separate patch would be better because it also needs a good
changelog explaining this.

> > > > @@ -3612,6 +3612,9 @@ static int do_open(struct nameidata *nd,
> > > >         int acc_mode;
> > > >         int error;
> > > >
> > > > +       if (file->f_mode & FMODE_OPENED)
> > > > +               fsnotify_open(file);
> > > > +
> > > >         if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
> > > >                 error = complete_walk(nd);
> > > >                 if (error)
> > > >
> > > > Frankly, this works but looks as an odd place to put this notification to.
> > > > Why not just placing it just next to where fsnotify_create() is generated
> > > > in open_last_lookups()? Like:
> > > >
> > > >         if (open_flag & O_CREAT)
> > > >                 inode_lock(dir->d_inode);
> > > >         else
> > > >                 inode_lock_shared(dir->d_inode);
> > > >         dentry = lookup_open(nd, file, op, got_write);
> > > > -       if (!IS_ERR(dentry) && (file->f_mode & FMODE_CREATED))
> > > > -               fsnotify_create(dir->d_inode, dentry);
> > > > +       if (!IS_ERR(dentry)) {
> > > > +               if (file->f_mode & FMODE_CREATED)
> > > > +                       fsnotify_create(dir->d_inode, dentry);
> > > > +               if (file->f_mode & FMODE_OPENED)
> > > > +                       fsnotify_open(file);
> > > > +       }
> > > >         if (open_flag & O_CREAT)
> > > >                 inode_unlock(dir->d_inode);
> > > >         else
> > > >                 inode_unlock_shared(dir->d_inode);
> > > >
> > > > That looks like a place where it is much more obvious this is for
> > > > atomic_open() handling? Now I admit I'm not really closely familiar with
> > > > the atomic_open() paths so maybe I miss something and do_open() is better.
> > >
> > > It looks nice, but I think it is missing the fast lookup case without O_CREAT
> > > (i.e. goto finish_lookup).
> >
> > I don't think so. AFAICT that case will generate the event in vfs_open()
> > anyway and not in open_last_lookups() / do_open(). Am I missing something?
> 
> No. I am. This code is hard to follow.
> I am fine with your variant, but maybe after so many in-tree changes
> including the extra change of O_PATH, perhaps it would be better
> to move this patch to your fsnotify tree?

I don't care much which tree this goes through as the actual amount of
context is minimal. But I definitely want to send another version of the
series out to the tree. I guess I'll do it because Neil might have trouble
justifying the O_PATH change in the changelog :).

								Honza
diff mbox series

Patch

diff --git a/fs/namei.c b/fs/namei.c
index 37fb0a8aa09a..057afacc4b60 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3612,6 +3612,9 @@  static int do_open(struct nameidata *nd,
 	int acc_mode;
 	int error;
 
+	if (file->f_mode & FMODE_OPENED)
+		fsnotify_open(file);
+
 	if (!(file->f_mode & (FMODE_OPENED | FMODE_CREATED))) {
 		error = complete_walk(nd);
 		if (error)
@@ -3700,6 +3703,8 @@  int vfs_tmpfile(struct mnt_idmap *idmap,
 	mode = vfs_prepare_mode(idmap, dir, mode, mode, mode);
 	error = dir->i_op->tmpfile(idmap, dir, file, mode);
 	dput(child);
+	if (file->f_mode & FMODE_OPENED)
+		fsnotify_open(file);
 	if (error)
 		return error;
 	/* Don't check for other permissions, the inode was just created */
diff --git a/fs/open.c b/fs/open.c
index 89cafb572061..970f299c0e77 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1004,11 +1004,6 @@  static int do_dentry_open(struct file *f,
 		}
 	}
 
-	/*
-	 * Once we return a file with FMODE_OPENED, __fput() will call
-	 * fsnotify_close(), so we need fsnotify_open() here for symmetry.
-	 */
-	fsnotify_open(f);
 	return 0;
 
 cleanup_all:
@@ -1085,8 +1080,17 @@  EXPORT_SYMBOL(file_path);
  */
 int vfs_open(const struct path *path, struct file *file)
 {
+	int ret;
+
 	file->f_path = *path;
-	return do_dentry_open(file, NULL);
+	ret = do_dentry_open(file, NULL);
+	if (!ret)
+		/*
+		 * Once we return a file with FMODE_OPENED, __fput() will call
+		 * fsnotify_close(), so we need fsnotify_open() here for symmetry.
+		 */
+		fsnotify_open(file);
+	return ret;
 }
 
 struct file *dentry_open(const struct path *path, int flags,
@@ -1178,7 +1182,8 @@  struct file *kernel_file_open(const struct path *path, int flags,
 	if (error) {
 		fput(f);
 		f = ERR_PTR(error);
-	}
+	} else
+		fsnotify_open(f);
 	return f;
 }
 EXPORT_SYMBOL_GPL(kernel_file_open);