diff mbox series

[4/5] fanotify: add support for exclusive create of mark

Message ID 20220307155741.1352405-5-amir73il@gmail.com (mailing list archive)
State New, archived
Headers show
Series Volatile fanotify marks | expand

Commit Message

Amir Goldstein March 7, 2022, 3:57 p.m. UTC
Similar to inotify's IN_MARK_CREATE, adding an fanotify mark with flag
FAN_MARK_CREATE will fail with error EEXIST if an fanotify mark already
exists on the object.

Unlike inotify's IN_MARK_CREATE, FAN_MARK_CREATE has to supplied in
combination with FAN_MARK_ADD (FAN_MARK_ADD is like inotify_add_watch()
and the behavior of IN_MARK_ADD is the default for fanotify_mark()).

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
---
 fs/notify/fanotify/fanotify_user.c | 13 ++++++++++---
 include/linux/fanotify.h           |  8 +++++---
 include/uapi/linux/fanotify.h      |  1 +
 3 files changed, 16 insertions(+), 6 deletions(-)

Comments

Jan Kara March 17, 2022, 3:34 p.m. UTC | #1
On Mon 07-03-22 17:57:40, Amir Goldstein wrote:
> Similar to inotify's IN_MARK_CREATE, adding an fanotify mark with flag
> FAN_MARK_CREATE will fail with error EEXIST if an fanotify mark already
> exists on the object.
> 
> Unlike inotify's IN_MARK_CREATE, FAN_MARK_CREATE has to supplied in
> combination with FAN_MARK_ADD (FAN_MARK_ADD is like inotify_add_watch()
> and the behavior of IN_MARK_ADD is the default for fanotify_mark()).
> 
> Signed-off-by: Amir Goldstein <amir73il@gmail.com>

What I'm missing in this changelog is "why". Is it just about feature
parity with inotify? I don't find this feature particularly useful...

								Honza

> ---
>  fs/notify/fanotify/fanotify_user.c | 13 ++++++++++---
>  include/linux/fanotify.h           |  8 +++++---
>  include/uapi/linux/fanotify.h      |  1 +
>  3 files changed, 16 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
> index 9b32b76a9c30..99c5ced6abd8 100644
> --- a/fs/notify/fanotify/fanotify_user.c
> +++ b/fs/notify/fanotify/fanotify_user.c
> @@ -1185,6 +1185,9 @@ static int fanotify_add_mark(struct fsnotify_group *group,
>  			mutex_unlock(&group->mark_mutex);
>  			return PTR_ERR(fsn_mark);
>  		}
> +	} else if (flags & FAN_MARK_CREATE) {
> +		ret = -EEXIST;
> +		goto out;
>  	}
>  
>  	/*
> @@ -1510,6 +1513,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
>  	__kernel_fsid_t __fsid, *fsid = NULL;
>  	u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
>  	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
> +	unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
>  	bool ignored = flags & FAN_MARK_IGNORED_MASK;
>  	unsigned int obj_type, fid_mode;
>  	u32 umask = 0;
> @@ -1539,7 +1543,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
>  		return -EINVAL;
>  	}
>  
> -	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
> +	if (flags & FAN_MARK_CREATE && mark_cmd != FAN_MARK_ADD)
> +		return -EINVAL;
> +
> +	switch (mark_cmd) {
>  	case FAN_MARK_ADD:
>  	case FAN_MARK_REMOVE:
>  		if (!mask)
> @@ -1671,7 +1678,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
>  	}
>  
>  	/* create/update an inode mark */
> -	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
> +	switch (mark_cmd) {
>  	case FAN_MARK_ADD:
>  		if (mark_type == FAN_MARK_MOUNT)
>  			ret = fanotify_add_vfsmount_mark(group, mnt, mask,
> @@ -1749,7 +1756,7 @@ static int __init fanotify_user_setup(void)
>  
>  	BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
>  	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12);
> -	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
> +	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 10);
>  
>  	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
>  					 SLAB_PANIC|SLAB_ACCOUNT);
> diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
> index 419cadcd7ff5..780f4b17d4c9 100644
> --- a/include/linux/fanotify.h
> +++ b/include/linux/fanotify.h
> @@ -59,14 +59,16 @@
>  #define FANOTIFY_MARK_TYPE_BITS	(FAN_MARK_INODE | FAN_MARK_MOUNT | \
>  				 FAN_MARK_FILESYSTEM)
>  
> +#define FANOTIFY_MARK_CMD_BITS	(FAN_MARK_ADD | FAN_MARK_REMOVE | \
> +				 FAN_MARK_FLUSH)
> +
>  #define FANOTIFY_MARK_FLAGS	(FANOTIFY_MARK_TYPE_BITS | \
> -				 FAN_MARK_ADD | \
> -				 FAN_MARK_REMOVE | \
> +				 FANOTIFY_MARK_CMD_BITS | \
>  				 FAN_MARK_DONT_FOLLOW | \
>  				 FAN_MARK_ONLYDIR | \
>  				 FAN_MARK_IGNORED_MASK | \
>  				 FAN_MARK_IGNORED_SURV_MODIFY | \
> -				 FAN_MARK_FLUSH)
> +				 FAN_MARK_CREATE)
>  
>  /*
>   * Events that can be reported with data type FSNOTIFY_EVENT_PATH.
> diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
> index e8ac38cc2fd6..c41feac21fe9 100644
> --- a/include/uapi/linux/fanotify.h
> +++ b/include/uapi/linux/fanotify.h
> @@ -82,6 +82,7 @@
>  #define FAN_MARK_IGNORED_SURV_MODIFY	0x00000040
>  #define FAN_MARK_FLUSH		0x00000080
>  /* FAN_MARK_FILESYSTEM is	0x00000100 */
> +#define FAN_MARK_CREATE		0x00000200
>  
>  /* These are NOT bitwise flags.  Both bits can be used togther.  */
>  #define FAN_MARK_INODE		0x00000000
> -- 
> 2.25.1
>
Jan Kara March 17, 2022, 3:45 p.m. UTC | #2
On Thu 17-03-22 16:34:43, Jan Kara wrote:
> On Mon 07-03-22 17:57:40, Amir Goldstein wrote:
> > Similar to inotify's IN_MARK_CREATE, adding an fanotify mark with flag
> > FAN_MARK_CREATE will fail with error EEXIST if an fanotify mark already
> > exists on the object.
> > 
> > Unlike inotify's IN_MARK_CREATE, FAN_MARK_CREATE has to supplied in
> > combination with FAN_MARK_ADD (FAN_MARK_ADD is like inotify_add_watch()
> > and the behavior of IN_MARK_ADD is the default for fanotify_mark()).
> > 
> > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> 
> What I'm missing in this changelog is "why". Is it just about feature
> parity with inotify? I don't find this feature particularly useful...

OK, now I understand after reading patch 5/5. Hum, but I'm not quite happy
about the limitation to non-existing mark as much as I understand why you
need it. Let me think...

								Honza
Amir Goldstein March 18, 2022, 3:13 a.m. UTC | #3
On Thu, Mar 17, 2022 at 5:45 PM Jan Kara <jack@suse.cz> wrote:
>
> On Thu 17-03-22 16:34:43, Jan Kara wrote:
> > On Mon 07-03-22 17:57:40, Amir Goldstein wrote:
> > > Similar to inotify's IN_MARK_CREATE, adding an fanotify mark with flag
> > > FAN_MARK_CREATE will fail with error EEXIST if an fanotify mark already
> > > exists on the object.
> > >
> > > Unlike inotify's IN_MARK_CREATE, FAN_MARK_CREATE has to supplied in
> > > combination with FAN_MARK_ADD (FAN_MARK_ADD is like inotify_add_watch()
> > > and the behavior of IN_MARK_ADD is the default for fanotify_mark()).
> > >
> > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> >
> > What I'm missing in this changelog is "why". Is it just about feature
> > parity with inotify? I don't find this feature particularly useful...
>
> OK, now I understand after reading patch 5/5. Hum, but I'm not quite happy
> about the limitation to non-existing mark as much as I understand why you
> need it. Let me think...
>

Sorry for not articulating the problem better.
Let me write up the problem and maybe someone can come up with a better
solution than I did.

The problem that I was trying to avoid with FAN_MARK_VOLATILE is similar
to an existing UAPI problem with FAN_MARK_IGNORED_SURV_MODIFY -
This flag can only be set and not cleared and when set it affects all the events
set in the mask prior to that time, leading to unpredictable results.

Let's say a user sets FAN_CLOSE in ignored mask without _SURV_MODIFY
and later sets FAN_OPEN  in ignored mask with _SURV_MODIFY.
Does the ignored mask now include FAN_CLOSE? That depends
whether or not FAN_MODIFY event took place between the two calls.

That is one of the reasons I was trying to get rid of _SURV_MODIFY with
new FAN_MARK_IGNORE flag. The trickery in FAN_MARK_CREATE is
that the problem is averted - if a mark property can only be set and never
cleared and if it affects all past and future changes to mask, allow to set
this property during mark creation time and only during mark creation time.

I don't think there is a real use case for changing the _SURV_MODIFY
nor _VOLATILE property of a mark and indeed with new FAN_MARK_IGNORE
semantics, we may only allow to set _SURV_MODIFY along with
FAN_MARK_CREATE, so there are two problems solved using this method.

The fact that FAN_MARK_CREATE has feature parity with inotify is not
the reason to add it, but it does help to swallow this somewhat awkward
solution. And it is certainly easy to document.

As the commit message implies, I was contemplating whether
FAN_MARK_CREATE should be an alternative to FAN_MARK_ADD
or an ORed flag.
Semantics-wise we could decide either way.
I chose the option that seemed easier to implement and document
the behavior of FAN_MARK_VOLATILE.
Using FAN_MARK_CREATE as an alternative to FAN_MARK_ADD may
be a bit more elegant for UAPI though.
We could use a macro to get UAPI elegance without compromising simplicity:

#define FAN_MARK_NEW (FAN_MARK_ADD | FAN_MARK_CREATE)

Thanks,
Amir.
Jan Kara March 18, 2022, 10:32 a.m. UTC | #4
On Fri 18-03-22 05:13:01, Amir Goldstein wrote:
> On Thu, Mar 17, 2022 at 5:45 PM Jan Kara <jack@suse.cz> wrote:
> >
> > On Thu 17-03-22 16:34:43, Jan Kara wrote:
> > > On Mon 07-03-22 17:57:40, Amir Goldstein wrote:
> > > > Similar to inotify's IN_MARK_CREATE, adding an fanotify mark with flag
> > > > FAN_MARK_CREATE will fail with error EEXIST if an fanotify mark already
> > > > exists on the object.
> > > >
> > > > Unlike inotify's IN_MARK_CREATE, FAN_MARK_CREATE has to supplied in
> > > > combination with FAN_MARK_ADD (FAN_MARK_ADD is like inotify_add_watch()
> > > > and the behavior of IN_MARK_ADD is the default for fanotify_mark()).
> > > >
> > > > Signed-off-by: Amir Goldstein <amir73il@gmail.com>
> > >
> > > What I'm missing in this changelog is "why". Is it just about feature
> > > parity with inotify? I don't find this feature particularly useful...
> >
> > OK, now I understand after reading patch 5/5. Hum, but I'm not quite happy
> > about the limitation to non-existing mark as much as I understand why you
> > need it. Let me think...
> >
> 
> Sorry for not articulating the problem better.
> Let me write up the problem and maybe someone can come up with a better
> solution than I did.
> 
> The problem that I was trying to avoid with FAN_MARK_VOLATILE is similar
> to an existing UAPI problem with FAN_MARK_IGNORED_SURV_MODIFY -
> This flag can only be set and not cleared and when set it affects all the events
> set in the mask prior to that time, leading to unpredictable results.
>
> Let's say a user sets FAN_CLOSE in ignored mask without _SURV_MODIFY
> and later sets FAN_OPEN  in ignored mask with _SURV_MODIFY.
> Does the ignored mask now include FAN_CLOSE? That depends
> whether or not FAN_MODIFY event took place between the two calls.

Yeah, but with FAN_MARK_VOLATILE the problem also goes the other way
around. If I set FAN_MARK_VOLATILE on some inode and later add something to
a normal mask, I might be rightfully surprised when the mark gets evicted
and thus I will not get events I'm expecting. Granted the application would
be stepping on its own toes because marks are "merged" only for the same
notification group but still it could be surprising and avoiding such
mishaps would probably involve extra tracking on the application side.

The problem essentially lies in mixing mark "flags" (ONDIR, ON_CHILD,
VOLATILE, SURV_MODIFY) with mark mask. Mark operations with identical set
of flags can be merged without troubles but once flags are different
results of the merge are always "interesting". So far the consequences were
mostly benign (getting more events than the application may have expected)
but with FAN_MARK_VOLATILE we can also start loosing events and that is
more serious.

So far my thinking is that we either follow the path of possibly generating
more events than necessary (i.e., any merge of two masks that do not both
have FAN_MARK_VOLATILE set will clear FAN_MARK_VOLATILE) or we rework the
whole mark API (and implementation!) to completely avoid these strange
effects of flag merging. I don't like FAN_MARK_CREATE much because IMO it
solves only half of the problem - when new mark with a flag wants to merge
with an existing mark, but does not solve the other half when some other
mark wants to merge to a mark with a flag. Thoughts?

								Honza
Amir Goldstein March 18, 2022, 11:04 a.m. UTC | #5
> > The problem that I was trying to avoid with FAN_MARK_VOLATILE is similar
> > to an existing UAPI problem with FAN_MARK_IGNORED_SURV_MODIFY -
> > This flag can only be set and not cleared and when set it affects all the events
> > set in the mask prior to that time, leading to unpredictable results.
> >
> > Let's say a user sets FAN_CLOSE in ignored mask without _SURV_MODIFY
> > and later sets FAN_OPEN  in ignored mask with _SURV_MODIFY.
> > Does the ignored mask now include FAN_CLOSE? That depends
> > whether or not FAN_MODIFY event took place between the two calls.
>
> Yeah, but with FAN_MARK_VOLATILE the problem also goes the other way
> around. If I set FAN_MARK_VOLATILE on some inode and later add something to
> a normal mask, I might be rightfully surprised when the mark gets evicted
> and thus I will not get events I'm expecting. Granted the application would
> be stepping on its own toes because marks are "merged" only for the same
> notification group but still it could be surprising and avoiding such
> mishaps would probably involve extra tracking on the application side.
>
> The problem essentially lies in mixing mark "flags" (ONDIR, ON_CHILD,
> VOLATILE, SURV_MODIFY) with mark mask. Mark operations with identical set
> of flags can be merged without troubles but once flags are different
> results of the merge are always "interesting". So far the consequences were
> mostly benign (getting more events than the application may have expected)
> but with FAN_MARK_VOLATILE we can also start loosing events and that is
> more serious.
>
> So far my thinking is that we either follow the path of possibly generating
> more events than necessary (i.e., any merge of two masks that do not both
> have FAN_MARK_VOLATILE set will clear FAN_MARK_VOLATILE) or we rework the
> whole mark API (and implementation!) to completely avoid these strange
> effects of flag merging. I don't like FAN_MARK_CREATE much because IMO it
> solves only half of the problem - when new mark with a flag wants to merge
> with an existing mark, but does not solve the other half when some other
> mark wants to merge to a mark with a flag. Thoughts?
>

Yes. Just one thought.
My applications never needed to change the mark mask after it was
set and I don't really see a huge use case for changing the mask
once it was set (besides removing the entire mark).

So instead of FAN_MARK_CREATE, we may try to see if FAN_MARK_CONST
results in something that is useful and not too complicated to implement
and document.

IMO using a "const" initialization for the "volatile" mark is not such a big
limitation and should not cripple the feature.

Thoughts?

Thanks,
Amir.
Jan Kara March 18, 2022, 2:09 p.m. UTC | #6
On Fri 18-03-22 13:04:51, Amir Goldstein wrote:
> > > The problem that I was trying to avoid with FAN_MARK_VOLATILE is similar
> > > to an existing UAPI problem with FAN_MARK_IGNORED_SURV_MODIFY -
> > > This flag can only be set and not cleared and when set it affects all the events
> > > set in the mask prior to that time, leading to unpredictable results.
> > >
> > > Let's say a user sets FAN_CLOSE in ignored mask without _SURV_MODIFY
> > > and later sets FAN_OPEN  in ignored mask with _SURV_MODIFY.
> > > Does the ignored mask now include FAN_CLOSE? That depends
> > > whether or not FAN_MODIFY event took place between the two calls.
> >
> > Yeah, but with FAN_MARK_VOLATILE the problem also goes the other way
> > around. If I set FAN_MARK_VOLATILE on some inode and later add something to
> > a normal mask, I might be rightfully surprised when the mark gets evicted
> > and thus I will not get events I'm expecting. Granted the application would
> > be stepping on its own toes because marks are "merged" only for the same
> > notification group but still it could be surprising and avoiding such
> > mishaps would probably involve extra tracking on the application side.
> >
> > The problem essentially lies in mixing mark "flags" (ONDIR, ON_CHILD,
> > VOLATILE, SURV_MODIFY) with mark mask. Mark operations with identical set
> > of flags can be merged without troubles but once flags are different
> > results of the merge are always "interesting". So far the consequences were
> > mostly benign (getting more events than the application may have expected)
> > but with FAN_MARK_VOLATILE we can also start loosing events and that is
> > more serious.
> >
> > So far my thinking is that we either follow the path of possibly generating
> > more events than necessary (i.e., any merge of two masks that do not both
> > have FAN_MARK_VOLATILE set will clear FAN_MARK_VOLATILE) or we rework the
> > whole mark API (and implementation!) to completely avoid these strange
> > effects of flag merging. I don't like FAN_MARK_CREATE much because IMO it
> > solves only half of the problem - when new mark with a flag wants to merge
> > with an existing mark, but does not solve the other half when some other
> > mark wants to merge to a mark with a flag. Thoughts?
> >
> 
> Yes. Just one thought.
> My applications never needed to change the mark mask after it was
> set and I don't really see a huge use case for changing the mask
> once it was set (besides removing the entire mark).
> 
> So instead of FAN_MARK_CREATE, we may try to see if FAN_MARK_CONST
> results in something that is useful and not too complicated to implement
> and document.
> 
> IMO using a "const" initialization for the "volatile" mark is not such a big
> limitation and should not cripple the feature.

OK, so basically if there's mark already placed at the inode and we try to
add FAN_MARK_CONST, the addition would fail, and similarly if we later tried
to add further mark to the inode with FAN_MARK_CONST mark, it would fail?

Thinking out loud: What does FAN_MARK_CONST bring compared to the
suggestion to go via the path of possibly generating more events by
clearing FAN_MARK_VOLATILE? I guess some additional safety if you would add
another mark to the same inode by an accident. Because if you never update
marks, there's no problem with additional mark flags. Is the new flag worth
it? Not sure...  :)

								Honza
Amir Goldstein March 18, 2022, 4:06 p.m. UTC | #7
> > > So far my thinking is that we either follow the path of possibly generating
> > > more events than necessary (i.e., any merge of two masks that do not both
> > > have FAN_MARK_VOLATILE set will clear FAN_MARK_VOLATILE)

I agree that would provide predictable behavior which is also similar to
that of _SURV_MODIFY.
But IMO, this is very weird to explain/document in the wider sense.
However, if we only document that
"FAN_MARK_VOLATILE cannot be set on an existing mark and any update
 of the mask without FAN_MARK_VOLATILE clears that flag"
(i.e. we make _VOLATILE imply the _CREATE behavior)
then the merge logic is the same as you suggested, but easier to explain.

> > > or we rework the
> > > whole mark API (and implementation!) to completely avoid these strange
> > > effects of flag merging. I don't like FAN_MARK_CREATE much because IMO it
> > > solves only half of the problem - when new mark with a flag wants to merge
> > > with an existing mark, but does not solve the other half when some other
> > > mark wants to merge to a mark with a flag. Thoughts?
> > >
> >
> > Yes. Just one thought.
> > My applications never needed to change the mark mask after it was
> > set and I don't really see a huge use case for changing the mask
> > once it was set (besides removing the entire mark).
> >
> > So instead of FAN_MARK_CREATE, we may try to see if FAN_MARK_CONST
> > results in something that is useful and not too complicated to implement
> > and document.
> >
> > IMO using a "const" initialization for the "volatile" mark is not such a big
> > limitation and should not cripple the feature.
>
> OK, so basically if there's mark already placed at the inode and we try to
> add FAN_MARK_CONST, the addition would fail, and similarly if we later tried
> to add further mark to the inode with FAN_MARK_CONST mark, it would fail?
>
> Thinking out loud: What does FAN_MARK_CONST bring compared to the
> suggestion to go via the path of possibly generating more events by
> clearing FAN_MARK_VOLATILE? I guess some additional safety if you would add
> another mark to the same inode by an accident. Because if you never update
> marks, there's no problem with additional mark flags.
> Is the new flag worth it? Not sure...  :)

I rather not add new flags if we can do without them.

To summarize my last proposal:

1. On fanotify_mark() with FAN_MARK_VOLATILE
1.a. If the mark is new, the HAS_IREF flag is not set and no ihold()
1.b. If mark already exists without HAS_IREF flag, mask is updated
1.c. If mark already exists with HAS_IREF flag, mark add fails with EEXISTS

2. On fanotify_mark() without FAN_MARK_VOLATILE
2.a. If the mark is new or exists without HAS_IREF, the HAS_IREF flag
is set and ihold()
2.b. If mark already exists with HAS_IREF flag, mask is updated

Do we have a winner?

Thanks,
Amir.
Amir Goldstein March 20, 2022, 1 p.m. UTC | #8
On Fri, Mar 18, 2022 at 6:06 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> > > > So far my thinking is that we either follow the path of possibly generating
> > > > more events than necessary (i.e., any merge of two masks that do not both
> > > > have FAN_MARK_VOLATILE set will clear FAN_MARK_VOLATILE)
>
> I agree that would provide predictable behavior which is also similar to
> that of _SURV_MODIFY.
> But IMO, this is very weird to explain/document in the wider sense.
> However, if we only document that
> "FAN_MARK_VOLATILE cannot be set on an existing mark and any update
>  of the mask without FAN_MARK_VOLATILE clears that flag"
> (i.e. we make _VOLATILE imply the _CREATE behavior)
> then the merge logic is the same as you suggested, but easier to explain.
>

[...]

>
> To summarize my last proposal:
>
> 1. On fanotify_mark() with FAN_MARK_VOLATILE
> 1.a. If the mark is new, the HAS_IREF flag is not set and no ihold()
> 1.b. If mark already exists without HAS_IREF flag, mask is updated
> 1.c. If mark already exists with HAS_IREF flag, mark add fails with EEXISTS
>
> 2. On fanotify_mark() without FAN_MARK_VOLATILE
> 2.a. If the mark is new or exists without HAS_IREF, the HAS_IREF flag
> is set and ihold()
> 2.b. If mark already exists with HAS_IREF flag, mask is updated
>
> Do we have a winner?
>

FYI, I've implemented the above and pushed to branch fan_evictable.
Yes, I also changed the name of the flag to be more coherent with the
documented behavior:

    fanotify: add support for "evictable" inode marks

    When an inode mark is created with flag FAN_MARK_EVICTABLE, it will not
    pin the marked inode to inode cache, so when inode is evicted from cache
    due to memory pressure, the mark will be lost.

    When an inode mark with flag FAN_MARK_EVICATBLE is updated without using
    this flag, the marked inode is pinned to inode cache.

    When an inode mark is updated with flag FAN_MARK_EVICTABLE but an
    existing mark already has the inode pinned, the mark update fails with
    error EEXIST.

I also took care of avoiding direct reclaim deadlocks from fanotify_add_mark().
If you agree to the proposed UAPI I will post v2 patches.

Thanks,
Amir.
Jan Kara March 21, 2022, 9:09 a.m. UTC | #9
On Fri 18-03-22 18:06:40, Amir Goldstein wrote:
> > > > So far my thinking is that we either follow the path of possibly generating
> > > > more events than necessary (i.e., any merge of two masks that do not both
> > > > have FAN_MARK_VOLATILE set will clear FAN_MARK_VOLATILE)
> 
> I agree that would provide predictable behavior which is also similar to
> that of _SURV_MODIFY.
> But IMO, this is very weird to explain/document in the wider sense.
> However, if we only document that
> "FAN_MARK_VOLATILE cannot be set on an existing mark and any update
>  of the mask without FAN_MARK_VOLATILE clears that flag"
> (i.e. we make _VOLATILE imply the _CREATE behavior)
> then the merge logic is the same as you suggested, but easier to explain.

Yes, makes sense.

> > > > or we rework the
> > > > whole mark API (and implementation!) to completely avoid these strange
> > > > effects of flag merging. I don't like FAN_MARK_CREATE much because IMO it
> > > > solves only half of the problem - when new mark with a flag wants to merge
> > > > with an existing mark, but does not solve the other half when some other
> > > > mark wants to merge to a mark with a flag. Thoughts?
> > > >
> > >
> > > Yes. Just one thought.
> > > My applications never needed to change the mark mask after it was
> > > set and I don't really see a huge use case for changing the mask
> > > once it was set (besides removing the entire mark).
> > >
> > > So instead of FAN_MARK_CREATE, we may try to see if FAN_MARK_CONST
> > > results in something that is useful and not too complicated to implement
> > > and document.
> > >
> > > IMO using a "const" initialization for the "volatile" mark is not such a big
> > > limitation and should not cripple the feature.
> >
> > OK, so basically if there's mark already placed at the inode and we try to
> > add FAN_MARK_CONST, the addition would fail, and similarly if we later tried
> > to add further mark to the inode with FAN_MARK_CONST mark, it would fail?
> >
> > Thinking out loud: What does FAN_MARK_CONST bring compared to the
> > suggestion to go via the path of possibly generating more events by
> > clearing FAN_MARK_VOLATILE? I guess some additional safety if you would add
> > another mark to the same inode by an accident. Because if you never update
> > marks, there's no problem with additional mark flags.
> > Is the new flag worth it? Not sure...  :)
> 
> I rather not add new flags if we can do without them.
> 
> To summarize my last proposal:
> 
> 1. On fanotify_mark() with FAN_MARK_VOLATILE
> 1.a. If the mark is new, the HAS_IREF flag is not set and no ihold()
> 1.b. If mark already exists without HAS_IREF flag, mask is updated
> 1.c. If mark already exists with HAS_IREF flag, mark add fails with EEXISTS
> 
> 2. On fanotify_mark() without FAN_MARK_VOLATILE
> 2.a. If the mark is new or exists without HAS_IREF, the HAS_IREF flag
> is set and ihold()
> 2.b. If mark already exists with HAS_IREF flag, mask is updated
> 
> Do we have a winner?

Sounds good to me.

								Honza
Amir Goldstein March 22, 2022, 4:44 p.m. UTC | #10
> FYI, I've implemented the above and pushed to branch fan_evictable.
> Yes, I also changed the name of the flag to be more coherent with the
> documented behavior:
>
>     fanotify: add support for "evictable" inode marks
>
>     When an inode mark is created with flag FAN_MARK_EVICTABLE, it will not
>     pin the marked inode to inode cache, so when inode is evicted from cache
>     due to memory pressure, the mark will be lost.
>
>     When an inode mark with flag FAN_MARK_EVICATBLE is updated without using
>     this flag, the marked inode is pinned to inode cache.
>
>     When an inode mark is updated with flag FAN_MARK_EVICTABLE but an
>     existing mark already has the inode pinned, the mark update fails with
>     error EEXIST.
>
> I also took care of avoiding direct reclaim deadlocks from fanotify_add_mark().
> If you agree to the proposed UAPI I will post v2 patches.

Jan,

Before implementing your suggested solution, I wrote a test patch
that reproduces the deadlock.
It took me a while to get to a reproducible scenario and I ended up using
a debug feature called FAN_MARK_LARGE to get there.

You can see the test patches at the tip of these kernel and ltp branches:
https://github.com/amir73il/linux/commits/fan_evictable
https://github.com/amir73il/ltp/commits/fan_evictable

The question is how can we test this in release kernels?
Should we include FAN_MARK_LARGE as a hidden (admin only)
feature? Use sysctl knob to enable it? use an ioctl? something else?

Thanks,
Amir.
Amir Goldstein March 23, 2022, 9:16 a.m. UTC | #11
On Tue, Mar 22, 2022 at 6:44 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> > FYI, I've implemented the above and pushed to branch fan_evictable.
> > Yes, I also changed the name of the flag to be more coherent with the
> > documented behavior:
> >
> >     fanotify: add support for "evictable" inode marks
> >
> >     When an inode mark is created with flag FAN_MARK_EVICTABLE, it will not
> >     pin the marked inode to inode cache, so when inode is evicted from cache
> >     due to memory pressure, the mark will be lost.
> >
> >     When an inode mark with flag FAN_MARK_EVICATBLE is updated without using
> >     this flag, the marked inode is pinned to inode cache.
> >
> >     When an inode mark is updated with flag FAN_MARK_EVICTABLE but an
> >     existing mark already has the inode pinned, the mark update fails with
> >     error EEXIST.
> >
> > I also took care of avoiding direct reclaim deadlocks from fanotify_add_mark().
> > If you agree to the proposed UAPI I will post v2 patches.
>
> Jan,
>
> Before implementing your suggested solution, I wrote a test patch
> that reproduces the deadlock.
> It took me a while to get to a reproducible scenario and I ended up using
> a debug feature called FAN_MARK_LARGE to get there.
>
> You can see the test patches at the tip of these kernel and ltp branches:
> https://github.com/amir73il/linux/commits/fan_evictable
> https://github.com/amir73il/ltp/commits/fan_evictable
>
> The question is how can we test this in release kernels?
> Should we include FAN_MARK_LARGE as a hidden (admin only)
> feature? Use sysctl knob to enable it? use an ioctl? something else?
>

I went for ioctl and I kind of like it like.
Pushed.

Thanks,
Amir.
diff mbox series

Patch

diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 9b32b76a9c30..99c5ced6abd8 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -1185,6 +1185,9 @@  static int fanotify_add_mark(struct fsnotify_group *group,
 			mutex_unlock(&group->mark_mutex);
 			return PTR_ERR(fsn_mark);
 		}
+	} else if (flags & FAN_MARK_CREATE) {
+		ret = -EEXIST;
+		goto out;
 	}
 
 	/*
@@ -1510,6 +1513,7 @@  static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	__kernel_fsid_t __fsid, *fsid = NULL;
 	u32 valid_mask = FANOTIFY_EVENTS | FANOTIFY_EVENT_FLAGS;
 	unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
+	unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS;
 	bool ignored = flags & FAN_MARK_IGNORED_MASK;
 	unsigned int obj_type, fid_mode;
 	u32 umask = 0;
@@ -1539,7 +1543,10 @@  static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 		return -EINVAL;
 	}
 
-	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
+	if (flags & FAN_MARK_CREATE && mark_cmd != FAN_MARK_ADD)
+		return -EINVAL;
+
+	switch (mark_cmd) {
 	case FAN_MARK_ADD:
 	case FAN_MARK_REMOVE:
 		if (!mask)
@@ -1671,7 +1678,7 @@  static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
 	}
 
 	/* create/update an inode mark */
-	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE)) {
+	switch (mark_cmd) {
 	case FAN_MARK_ADD:
 		if (mark_type == FAN_MARK_MOUNT)
 			ret = fanotify_add_vfsmount_mark(group, mnt, mask,
@@ -1749,7 +1756,7 @@  static int __init fanotify_user_setup(void)
 
 	BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS);
 	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 12);
-	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 9);
+	BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 10);
 
 	fanotify_mark_cache = KMEM_CACHE(fsnotify_mark,
 					 SLAB_PANIC|SLAB_ACCOUNT);
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index 419cadcd7ff5..780f4b17d4c9 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -59,14 +59,16 @@ 
 #define FANOTIFY_MARK_TYPE_BITS	(FAN_MARK_INODE | FAN_MARK_MOUNT | \
 				 FAN_MARK_FILESYSTEM)
 
+#define FANOTIFY_MARK_CMD_BITS	(FAN_MARK_ADD | FAN_MARK_REMOVE | \
+				 FAN_MARK_FLUSH)
+
 #define FANOTIFY_MARK_FLAGS	(FANOTIFY_MARK_TYPE_BITS | \
-				 FAN_MARK_ADD | \
-				 FAN_MARK_REMOVE | \
+				 FANOTIFY_MARK_CMD_BITS | \
 				 FAN_MARK_DONT_FOLLOW | \
 				 FAN_MARK_ONLYDIR | \
 				 FAN_MARK_IGNORED_MASK | \
 				 FAN_MARK_IGNORED_SURV_MODIFY | \
-				 FAN_MARK_FLUSH)
+				 FAN_MARK_CREATE)
 
 /*
  * Events that can be reported with data type FSNOTIFY_EVENT_PATH.
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index e8ac38cc2fd6..c41feac21fe9 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -82,6 +82,7 @@ 
 #define FAN_MARK_IGNORED_SURV_MODIFY	0x00000040
 #define FAN_MARK_FLUSH		0x00000080
 /* FAN_MARK_FILESYSTEM is	0x00000100 */
+#define FAN_MARK_CREATE		0x00000200
 
 /* These are NOT bitwise flags.  Both bits can be used togther.  */
 #define FAN_MARK_INODE		0x00000000