diff mbox series

[RFC] inotify: add support watch open exec event

Message ID 20200914172737.GA5011@192.168.3.9 (mailing list archive)
State New, archived
Headers show
Series [RFC] inotify: add support watch open exec event | expand

Commit Message

Weiping Zhang Sept. 14, 2020, 5:27 p.m. UTC
Now the IN_OPEN event can report all open events for a file, but it can
not distinguish if the file was opened for execute or read/write.
This patch add a new event IN_OPEN_EXEC to support that. If user only
want to monitor a file was opened for execute, they can pass a more
precise event IN_OPEN_EXEC to inotify_add_watch.

Signed-off-by: Weiping Zhang <zhangweiping@didiglobal.com>
---
 fs/notify/inotify/inotify_user.c | 3 ++-
 include/linux/inotify.h          | 2 +-
 include/uapi/linux/inotify.h     | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

Comments

Jan Kara Sept. 15, 2020, 7:08 a.m. UTC | #1
On Tue 15-09-20 01:27:43, Weiping Zhang wrote:
> Now the IN_OPEN event can report all open events for a file, but it can
> not distinguish if the file was opened for execute or read/write.
> This patch add a new event IN_OPEN_EXEC to support that. If user only
> want to monitor a file was opened for execute, they can pass a more
> precise event IN_OPEN_EXEC to inotify_add_watch.
> 
> Signed-off-by: Weiping Zhang <zhangweiping@didiglobal.com>

Thanks for the patch but what I'm missing is a justification for it. Is
there any application that cannot use fanotify that needs to distinguish
IN_OPEN and IN_OPEN_EXEC? The OPEN_EXEC notification is for rather
specialized purposes (e.g. audit) which are generally priviledged and need
to use fanotify anyway so I don't see this as an interesting feature for
inotify...

								Honza

> ---
>  fs/notify/inotify/inotify_user.c | 3 ++-
>  include/linux/inotify.h          | 2 +-
>  include/uapi/linux/inotify.h     | 3 ++-
>  3 files changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
> index 186722ba3894..eb42d11a9988 100644
> --- a/fs/notify/inotify/inotify_user.c
> +++ b/fs/notify/inotify/inotify_user.c
> @@ -819,8 +819,9 @@ static int __init inotify_user_setup(void)
>  	BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
>  	BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
>  	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
> +	BUILD_BUG_ON(IN_OPEN_EXEC != FS_OPEN_EXEC);
>  
> -	BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22);
> +	BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 23);
>  
>  	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark,
>  					       SLAB_PANIC|SLAB_ACCOUNT);
> diff --git a/include/linux/inotify.h b/include/linux/inotify.h
> index 6a24905f6e1e..88fc82c8cf2a 100644
> --- a/include/linux/inotify.h
> +++ b/include/linux/inotify.h
> @@ -15,7 +15,7 @@ extern struct ctl_table inotify_table[]; /* for sysctl */
>  #define ALL_INOTIFY_BITS (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
>  			  IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | \
>  			  IN_MOVED_TO | IN_CREATE | IN_DELETE | \
> -			  IN_DELETE_SELF | IN_MOVE_SELF | IN_UNMOUNT | \
> +			  IN_DELETE_SELF | IN_MOVE_SELF | IN_OPEN_EXEC | IN_UNMOUNT | \
>  			  IN_Q_OVERFLOW | IN_IGNORED | IN_ONLYDIR | \
>  			  IN_DONT_FOLLOW | IN_EXCL_UNLINK | IN_MASK_ADD | \
>  			  IN_MASK_CREATE | IN_ISDIR | IN_ONESHOT)
> diff --git a/include/uapi/linux/inotify.h b/include/uapi/linux/inotify.h
> index 884b4846b630..f19ea046cc87 100644
> --- a/include/uapi/linux/inotify.h
> +++ b/include/uapi/linux/inotify.h
> @@ -39,6 +39,7 @@ struct inotify_event {
>  #define IN_DELETE		0x00000200	/* Subfile was deleted */
>  #define IN_DELETE_SELF		0x00000400	/* Self was deleted */
>  #define IN_MOVE_SELF		0x00000800	/* Self was moved */
> +#define IN_OPEN_EXEC		0x00001000	/* File was opened */
>  
>  /* the following are legal events.  they are sent as needed to any watch */
>  #define IN_UNMOUNT		0x00002000	/* Backing fs was unmounted */
> @@ -66,7 +67,7 @@ struct inotify_event {
>  #define IN_ALL_EVENTS	(IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
>  			 IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | \
>  			 IN_MOVED_TO | IN_DELETE | IN_CREATE | IN_DELETE_SELF | \
> -			 IN_MOVE_SELF)
> +			 IN_MOVE_SELF | IN_OPEN_EXEC)
>  
>  /* Flags for sys_inotify_init1.  */
>  #define IN_CLOEXEC O_CLOEXEC
> -- 
> 2.18.2
>
Amir Goldstein Sept. 15, 2020, 8:33 a.m. UTC | #2
On Tue, Sep 15, 2020 at 10:08 AM Jan Kara <jack@suse.cz> wrote:
>
> On Tue 15-09-20 01:27:43, Weiping Zhang wrote:
> > Now the IN_OPEN event can report all open events for a file, but it can
> > not distinguish if the file was opened for execute or read/write.
> > This patch add a new event IN_OPEN_EXEC to support that. If user only
> > want to monitor a file was opened for execute, they can pass a more
> > precise event IN_OPEN_EXEC to inotify_add_watch.
> >
> > Signed-off-by: Weiping Zhang <zhangweiping@didiglobal.com>
>
> Thanks for the patch but what I'm missing is a justification for it. Is
> there any application that cannot use fanotify that needs to distinguish
> IN_OPEN and IN_OPEN_EXEC? The OPEN_EXEC notification is for rather
> specialized purposes (e.g. audit) which are generally priviledged and need
> to use fanotify anyway so I don't see this as an interesting feature for
> inotify...

That would be my queue to re- bring up FAN_UNPRIVILEGED [1].
Last time this was discussed [2], FAN_UNPRIVILEGED did not have
feature parity with inotify, but now it mostly does, short of (AFAIK):
1. Rename cookie (*)
2. System tunables for limits

The question is - should I pursue it?

You asked about incentive to use feature parity fanotify and not intotify.
One answer is the ignored mask. It may be a useful feature to some.

But mostly, using the same interface for both priv and unpriv is convenient
for developers and it is convenient for kernel maintainers.
I'd like to be able to make the statement that inotify code is maintained in
bug fixes only mode, which has mostly been the reality for a long time.
But in order to be able to say "no reason to add IN_OPEN_EXEC", we
do need to stand behind the feature parity with intotify.

So I apologize to Weiping for hijacking his thread, but I think we should
get our plans declared before deciding on IN_OPEN_EXEC, because
whether there is a valid use case for non-priv user who needs IN_OPEN_EXEC
event is not the main issue IMO. Even if there isn't, we need an answer for
the next proposed inotify feature that does have a non-priv user use case.

Thanks,
Amir.

[1] https://github.com/amir73il/linux/commits/fanotify_unpriv
[2] https://lore.kernel.org/linux-fsdevel/20181114135744.GB20704@quack2.suse.cz/

(*) I got an internal complaint about missing the rename cookie with
FAN_REPORT_NAME, so I had to carry a small patch internally.
The problem is not that the rename cookie is really needed, but that without
the rename cookie, events can be re-ordered across renames and that can
generate some non-deterministic event sequences.

So I am thinking of keeping the rename cookie in the kernel event just for
no-merge indication and then userspace can use object fid to match
MOVED_FROM/MOVED_TO events.
Weiping Zhang Sept. 15, 2020, 12:09 p.m. UTC | #3
On Tue, Sep 15, 2020 at 4:34 PM Amir Goldstein <amir73il@gmail.com> wrote:
>
> On Tue, Sep 15, 2020 at 10:08 AM Jan Kara <jack@suse.cz> wrote:
> >
> > On Tue 15-09-20 01:27:43, Weiping Zhang wrote:
> > > Now the IN_OPEN event can report all open events for a file, but it can
> > > not distinguish if the file was opened for execute or read/write.
> > > This patch add a new event IN_OPEN_EXEC to support that. If user only
> > > want to monitor a file was opened for execute, they can pass a more
> > > precise event IN_OPEN_EXEC to inotify_add_watch.
> > >
> > > Signed-off-by: Weiping Zhang <zhangweiping@didiglobal.com>
> >
> > Thanks for the patch but what I'm missing is a justification for it. Is
> > there any application that cannot use fanotify that needs to distinguish
> > IN_OPEN and IN_OPEN_EXEC? The OPEN_EXEC notification is for rather
> > specialized purposes (e.g. audit) which are generally priviledged and need
> > to use fanotify anyway so I don't see this as an interesting feature for
> > inotify...
>
fanotify meets my requirement, thanks.

> That would be my queue to re- bring up FAN_UNPRIVILEGED [1].
> Last time this was discussed [2], FAN_UNPRIVILEGED did not have
> feature parity with inotify, but now it mostly does, short of (AFAIK):
> 1. Rename cookie (*)
> 2. System tunables for limits
>
> The question is - should I pursue it?
>
> You asked about incentive to use feature parity fanotify and not intotify.
> One answer is the ignored mask. It may be a useful feature to some.
>
> But mostly, using the same interface for both priv and unpriv is convenient
> for developers and it is convenient for kernel maintainers.
> I'd like to be able to make the statement that inotify code is maintained in
> bug fixes only mode, which has mostly been the reality for a long time.
> But in order to be able to say "no reason to add IN_OPEN_EXEC", we
> do need to stand behind the feature parity with intotify.
>
> So I apologize to Weiping for hijacking his thread, but I think we should
> get our plans declared before deciding on IN_OPEN_EXEC, because
> whether there is a valid use case for non-priv user who needs IN_OPEN_EXEC
> event is not the main issue IMO. Even if there isn't, we need an answer for
> the next proposed inotify feature that does have a non-priv user use case.
>
> Thanks,
> Amir.
>
> [1] https://github.com/amir73il/linux/commits/fanotify_unpriv
> [2] https://lore.kernel.org/linux-fsdevel/20181114135744.GB20704@quack2.suse.cz/
>
> (*) I got an internal complaint about missing the rename cookie with
> FAN_REPORT_NAME, so I had to carry a small patch internally.
> The problem is not that the rename cookie is really needed, but that without
> the rename cookie, events can be re-ordered across renames and that can
> generate some non-deterministic event sequences.
>
> So I am thinking of keeping the rename cookie in the kernel event just for
> no-merge indication and then userspace can use object fid to match
> MOVED_FROM/MOVED_TO events.
Jan Kara Oct. 1, 2020, 11 a.m. UTC | #4
I'm sorry for late reply on this one...

On Tue 15-09-20 11:33:41, Amir Goldstein wrote:
> On Tue, Sep 15, 2020 at 10:08 AM Jan Kara <jack@suse.cz> wrote:
> >
> > On Tue 15-09-20 01:27:43, Weiping Zhang wrote:
> > > Now the IN_OPEN event can report all open events for a file, but it can
> > > not distinguish if the file was opened for execute or read/write.
> > > This patch add a new event IN_OPEN_EXEC to support that. If user only
> > > want to monitor a file was opened for execute, they can pass a more
> > > precise event IN_OPEN_EXEC to inotify_add_watch.
> > >
> > > Signed-off-by: Weiping Zhang <zhangweiping@didiglobal.com>
> >
> > Thanks for the patch but what I'm missing is a justification for it. Is
> > there any application that cannot use fanotify that needs to distinguish
> > IN_OPEN and IN_OPEN_EXEC? The OPEN_EXEC notification is for rather
> > specialized purposes (e.g. audit) which are generally priviledged and need
> > to use fanotify anyway so I don't see this as an interesting feature for
> > inotify...
> 
> That would be my queue to re- bring up FAN_UNPRIVILEGED [1].
> Last time this was discussed [2], FAN_UNPRIVILEGED did not have
> feature parity with inotify, but now it mostly does, short of (AFAIK):
> 1. Rename cookie (*)
> 2. System tunables for limits
> 
> The question is - should I pursue it?

So I think that at this point some form less priviledged fanotify use
starts to make sense. So let's discuss how it would look like... What comes
to my mind:

1) We'd need to make max_user_instances, max_user_watches, and
max_queued_events configurable similarly as for inotify. The first two
using ucounts so that the configuration is actually per-namespace as for
inotify.

2) I don't quite like the FAN_UNPRIVILEDGED flag. I'd rather see the checks
being done based on functionality requested in fanotify_init() /
fanotify_mark(). E.g. FAN_UNLIMITED_QUEUE or permission events will require
CAP_SYS_ADMIN, mount/sb marks will require CAP_DAC_READ_SEARCH, etc.
We should also consider which capability checks should be system-global and
which can be just user-namespace ones...

> You asked about incentive to use feature parity fanotify and not intotify.
> One answer is the ignored mask. It may be a useful feature to some.
> 
> But mostly, using the same interface for both priv and unpriv is convenient
> for developers and it is convenient for kernel maintainers.

I agree about userspace developers, for kernel I think that allowing
unpriviledged fanotify has actually additional maintenance cost - all that
additional code with limits & capability checks, larger attack surface
available for unpriviledged tasks so more security scrutiny & CVE handling,
etc. And we have to maintain inotify exactly as much as previously at least
for the following decade, likely even longer.

> I'd like to be able to make the statement that inotify code is maintained in
> bug fixes only mode, which has mostly been the reality for a long time.

Yes, I agree that inotify is in maintenance only mode.

> But in order to be able to say "no reason to add IN_OPEN_EXEC", we
> do need to stand behind the feature parity with intotify.
> 
> So I apologize to Weiping for hijacking his thread, but I think we should
> get our plans declared before deciding on IN_OPEN_EXEC, because
> whether there is a valid use case for non-priv user who needs IN_OPEN_EXEC
> event is not the main issue IMO. Even if there isn't, we need an answer for
> the next proposed inotify feature that does have a non-priv user use case.

Here I disagree. How I see it is that *if* there's real serious user for
IN_OPEN_EXEC which cannot currently use FAN_OPEN_EXEC, we should either
make FAN_OPEN_EXEC available to it or bite the bullet, do exception, and
extend inotify. But the "if" part isn't currently true so I don't see
IN_OPEN_EXEC query force us either way...

> [1] https://github.com/amir73il/linux/commits/fanotify_unpriv
> [2] https://lore.kernel.org/linux-fsdevel/20181114135744.GB20704@quack2.suse.cz/
> 
> (*) I got an internal complaint about missing the rename cookie with
> FAN_REPORT_NAME, so I had to carry a small patch internally.
> The problem is not that the rename cookie is really needed, but that without
> the rename cookie, events can be re-ordered across renames and that can
> generate some non-deterministic event sequences.
> 
> So I am thinking of keeping the rename cookie in the kernel event just for
> no-merge indication and then userspace can use object fid to match
> MOVED_FROM/MOVED_TO events.

Well, the event sequences are always non-deterministic due to event
merging. So I'm somewhat surprised that rename events particularly matter.
I suspect the code relying on "determinism" is buggy, it just perhaps
doesn't manifest in practice for other event types...

								Honza
Amir Goldstein Oct. 1, 2020, 1:08 p.m. UTC | #5
On Thu, Oct 1, 2020 at 2:00 PM Jan Kara <jack@suse.cz> wrote:
>
> I'm sorry for late reply on this one...
>
> On Tue 15-09-20 11:33:41, Amir Goldstein wrote:
> > On Tue, Sep 15, 2020 at 10:08 AM Jan Kara <jack@suse.cz> wrote:
> > >
> > > On Tue 15-09-20 01:27:43, Weiping Zhang wrote:
> > > > Now the IN_OPEN event can report all open events for a file, but it can
> > > > not distinguish if the file was opened for execute or read/write.
> > > > This patch add a new event IN_OPEN_EXEC to support that. If user only
> > > > want to monitor a file was opened for execute, they can pass a more
> > > > precise event IN_OPEN_EXEC to inotify_add_watch.
> > > >
> > > > Signed-off-by: Weiping Zhang <zhangweiping@didiglobal.com>
> > >
> > > Thanks for the patch but what I'm missing is a justification for it. Is
> > > there any application that cannot use fanotify that needs to distinguish
> > > IN_OPEN and IN_OPEN_EXEC? The OPEN_EXEC notification is for rather
> > > specialized purposes (e.g. audit) which are generally priviledged and need
> > > to use fanotify anyway so I don't see this as an interesting feature for
> > > inotify...
> >
> > That would be my queue to re- bring up FAN_UNPRIVILEGED [1].
> > Last time this was discussed [2], FAN_UNPRIVILEGED did not have
> > feature parity with inotify, but now it mostly does, short of (AFAIK):
> > 1. Rename cookie (*)
> > 2. System tunables for limits
> >
> > The question is - should I pursue it?
>
> So I think that at this point some form less priviledged fanotify use
> starts to make sense. So let's discuss how it would look like... What comes
> to my mind:
>
> 1) We'd need to make max_user_instances, max_user_watches, and
> max_queued_events configurable similarly as for inotify. The first two
> using ucounts so that the configuration is actually per-namespace as for
> inotify.
>
> 2) I don't quite like the FAN_UNPRIVILEDGED flag. I'd rather see the checks
> being done based on functionality requested in fanotify_init() /
> fanotify_mark(). E.g. FAN_UNLIMITED_QUEUE or permission events will require
> CAP_SYS_ADMIN, mount/sb marks will require CAP_DAC_READ_SEARCH, etc.
> We should also consider which capability checks should be system-global and
> which can be just user-namespace ones...

OK. That is not a problem to do.
But FAN_UNPRIVILEDGED flag also impacts:

    An unprivileged event listener does not get an open file descriptor in
    the event nor the process pid of another process.

Obviously, I can check CAP_SYS_ADMIN on fanotify_init() and set the
FAN_UNPRIVILEDGED flag as an internal flag.

The advantage of explicit FAN_UNPRIVILEDGED flag is that a privileged process
can create an unprivileged listener and pass the fd to another process.
Not a critical functionality at this point.

Thoughts?

Thanks,
Amir.
Amir Goldstein Oct. 1, 2020, 1:23 p.m. UTC | #6
> > (*) I got an internal complaint about missing the rename cookie with
> > FAN_REPORT_NAME, so I had to carry a small patch internally.
> > The problem is not that the rename cookie is really needed, but that without
> > the rename cookie, events can be re-ordered across renames and that can
> > generate some non-deterministic event sequences.
> >
> > So I am thinking of keeping the rename cookie in the kernel event just for
> > no-merge indication and then userspace can use object fid to match
> > MOVED_FROM/MOVED_TO events.
>
> Well, the event sequences are always non-deterministic due to event
> merging. So I'm somewhat surprised that rename events particularly matter.
> I suspect the code relying on "determinism" is buggy, it just perhaps
> doesn't manifest in practice for other event types...
>

Maybe I did not explain the issue correctly.

The use case is matching pairs of MOVED_FROM/MOVED_TO events,
regardless of re-ordering with other event types.

In inotify, both those events carry a unique cookie, so they are never merged
with any other event type. The events themselves have the same cookie but
differ in filename, so are not merged.

In vfs code, fsnotify_move() is called under lock_rename() so:
1. Cross-parent MOVED event pairs are fully serialized
2. Same-parent MOVED event pairs are serialized in each parent...
3. ... but may be interleaved with MOVED event pairs in other parents
4. Subsequent MOVED event pairs of the same object are also
    serialized per moved object

The rules above apply to fanotify MOVED events as well, but in fanotify,
because cookie is not stored in event and not participating in merge,
the MOVED_FROM/MOVED_TO events can be merged with other
event types and therefore re-ordered amongst themselves.

Our userspace service needs to be able to match MOVED_FROM/MOVED_TO
event pairs for several reasons and I believe this is quite a common
need.

This need is regardless of re-ordering the MOVED_FROM/MOVED_TO event
pair with other events such as CREATE/DELETE.

To mention a concrete example, if listener can reliably match a MOVED pair
and the DFID/FID object is found locally in the MOVED_TO name, then a remote
command to the backup destination to rename from the MOVED_FROM
name can be attempted.

All we need to do in order to allow for fanotify listeners to use DFID/FID
to match MOVED_FROM/MOVED_TO interleaved event pairs is to
NOT merge MOVED events with other event types if group has all
these flags (FAN_REPORT_DFID_NAME | FAN_REPORT_FID).

IMO, there is not much to lose with this minor change in event merge
condition and there is much to gain.

The documentation is a bit more tricky, but it is generally sufficient
to document that MOVED_FROM/MOVED_TO events on the same
object (FID) are not re-ordered amongst themselves with respective
group flag combination.

Do you agree?

Thanks,
Amir.
Jan Kara Oct. 2, 2020, 8:27 a.m. UTC | #7
On Thu 01-10-20 16:08:50, Amir Goldstein wrote:
> On Thu, Oct 1, 2020 at 2:00 PM Jan Kara <jack@suse.cz> wrote:
> >
> > I'm sorry for late reply on this one...
> >
> > On Tue 15-09-20 11:33:41, Amir Goldstein wrote:
> > > On Tue, Sep 15, 2020 at 10:08 AM Jan Kara <jack@suse.cz> wrote:
> > > >
> > > > On Tue 15-09-20 01:27:43, Weiping Zhang wrote:
> > > > > Now the IN_OPEN event can report all open events for a file, but it can
> > > > > not distinguish if the file was opened for execute or read/write.
> > > > > This patch add a new event IN_OPEN_EXEC to support that. If user only
> > > > > want to monitor a file was opened for execute, they can pass a more
> > > > > precise event IN_OPEN_EXEC to inotify_add_watch.
> > > > >
> > > > > Signed-off-by: Weiping Zhang <zhangweiping@didiglobal.com>
> > > >
> > > > Thanks for the patch but what I'm missing is a justification for it. Is
> > > > there any application that cannot use fanotify that needs to distinguish
> > > > IN_OPEN and IN_OPEN_EXEC? The OPEN_EXEC notification is for rather
> > > > specialized purposes (e.g. audit) which are generally priviledged and need
> > > > to use fanotify anyway so I don't see this as an interesting feature for
> > > > inotify...
> > >
> > > That would be my queue to re- bring up FAN_UNPRIVILEGED [1].
> > > Last time this was discussed [2], FAN_UNPRIVILEGED did not have
> > > feature parity with inotify, but now it mostly does, short of (AFAIK):
> > > 1. Rename cookie (*)
> > > 2. System tunables for limits
> > >
> > > The question is - should I pursue it?
> >
> > So I think that at this point some form less priviledged fanotify use
> > starts to make sense. So let's discuss how it would look like... What comes
> > to my mind:
> >
> > 1) We'd need to make max_user_instances, max_user_watches, and
> > max_queued_events configurable similarly as for inotify. The first two
> > using ucounts so that the configuration is actually per-namespace as for
> > inotify.
> >
> > 2) I don't quite like the FAN_UNPRIVILEDGED flag. I'd rather see the checks
> > being done based on functionality requested in fanotify_init() /
> > fanotify_mark(). E.g. FAN_UNLIMITED_QUEUE or permission events will require
> > CAP_SYS_ADMIN, mount/sb marks will require CAP_DAC_READ_SEARCH, etc.
> > We should also consider which capability checks should be system-global and
> > which can be just user-namespace ones...
> 
> OK. That is not a problem to do.
> But FAN_UNPRIVILEDGED flag also impacts:
> 
>     An unprivileged event listener does not get an open file descriptor in
>     the event nor the process pid of another process.

Well, are these really sensitive that they should be forbidden? If we allow
only inode marks and given inode is opened in the context of process
reading the event, I don't see how fd would be any sensitive? And similarly
for pid I'd say...

> Obviously, I can check CAP_SYS_ADMIN on fanotify_init() and set the
> FAN_UNPRIVILEDGED flag as an internal flag.
> 
> The advantage of explicit FAN_UNPRIVILEDGED flag is that a privileged process
> can create an unprivileged listener and pass the fd to another process.
> Not a critical functionality at this point.

I'd prefer to keep the flag internal if you're convinced we need one - but
I'm not yet convinced we need even internal FAN_UNPRIVILEDGED flag because
I don't think this will end up being a yes/no thing. I imagine that
depending on exact process capabilities, different kinds of fanotify
functionality will be allowed as I outlined in 2). So we'll be checking
against current process capabilities at the time of action and not against
some internal fanotify flag...

								Honza
Amir Goldstein Oct. 2, 2020, 9:06 a.m. UTC | #8
On Fri, Oct 2, 2020 at 11:27 AM Jan Kara <jack@suse.cz> wrote:
>
> On Thu 01-10-20 16:08:50, Amir Goldstein wrote:
> > On Thu, Oct 1, 2020 at 2:00 PM Jan Kara <jack@suse.cz> wrote:
> > >
> > > I'm sorry for late reply on this one...
> > >
> > > On Tue 15-09-20 11:33:41, Amir Goldstein wrote:
> > > > On Tue, Sep 15, 2020 at 10:08 AM Jan Kara <jack@suse.cz> wrote:
> > > > >
> > > > > On Tue 15-09-20 01:27:43, Weiping Zhang wrote:
> > > > > > Now the IN_OPEN event can report all open events for a file, but it can
> > > > > > not distinguish if the file was opened for execute or read/write.
> > > > > > This patch add a new event IN_OPEN_EXEC to support that. If user only
> > > > > > want to monitor a file was opened for execute, they can pass a more
> > > > > > precise event IN_OPEN_EXEC to inotify_add_watch.
> > > > > >
> > > > > > Signed-off-by: Weiping Zhang <zhangweiping@didiglobal.com>
> > > > >
> > > > > Thanks for the patch but what I'm missing is a justification for it. Is
> > > > > there any application that cannot use fanotify that needs to distinguish
> > > > > IN_OPEN and IN_OPEN_EXEC? The OPEN_EXEC notification is for rather
> > > > > specialized purposes (e.g. audit) which are generally priviledged and need
> > > > > to use fanotify anyway so I don't see this as an interesting feature for
> > > > > inotify...
> > > >
> > > > That would be my queue to re- bring up FAN_UNPRIVILEGED [1].
> > > > Last time this was discussed [2], FAN_UNPRIVILEGED did not have
> > > > feature parity with inotify, but now it mostly does, short of (AFAIK):
> > > > 1. Rename cookie (*)
> > > > 2. System tunables for limits
> > > >
> > > > The question is - should I pursue it?
> > >
> > > So I think that at this point some form less priviledged fanotify use
> > > starts to make sense. So let's discuss how it would look like... What comes
> > > to my mind:
> > >
> > > 1) We'd need to make max_user_instances, max_user_watches, and
> > > max_queued_events configurable similarly as for inotify. The first two
> > > using ucounts so that the configuration is actually per-namespace as for
> > > inotify.
> > >
> > > 2) I don't quite like the FAN_UNPRIVILEDGED flag. I'd rather see the checks
> > > being done based on functionality requested in fanotify_init() /
> > > fanotify_mark(). E.g. FAN_UNLIMITED_QUEUE or permission events will require
> > > CAP_SYS_ADMIN, mount/sb marks will require CAP_DAC_READ_SEARCH, etc.
> > > We should also consider which capability checks should be system-global and
> > > which can be just user-namespace ones...
> >
> > OK. That is not a problem to do.
> > But FAN_UNPRIVILEDGED flag also impacts:
> >
> >     An unprivileged event listener does not get an open file descriptor in
> >     the event nor the process pid of another process.
>
> Well, are these really sensitive that they should be forbidden? If we allow
> only inode marks and given inode is opened in the context of process
> reading the event, I don't see how fd would be any sensitive? And similarly
> for pid I'd say...
>

Because I was under the impression that we are going to allow a dir watch
on children, just like inotify and process may have permission to access dir,
but no permission to open a child.

That said, it's true that we can decide whether or not to export a RDONLY
open fd based on CAP_DAC_READ_SEARCH of the reader process.

Regarding exposing pid, I am not familiar with the capabilities required to
"spy" on another process' actions using other facilities, so I thought we
should take a conservative approach and require at least CAP_SYS_PTRACE
to expose information about the process generating the event.

> > Obviously, I can check CAP_SYS_ADMIN on fanotify_init() and set the
> > FAN_UNPRIVILEDGED flag as an internal flag.
> >
> > The advantage of explicit FAN_UNPRIVILEDGED flag is that a privileged process
> > can create an unprivileged listener and pass the fd to another process.
> > Not a critical functionality at this point.
>
> I'd prefer to keep the flag internal if you're convinced we need one - but
> I'm not yet convinced we need even internal FAN_UNPRIVILEDGED flag because
> I don't think this will end up being a yes/no thing. I imagine that
> depending on exact process capabilities, different kinds of fanotify
> functionality will be allowed as I outlined in 2). So we'll be checking
> against current process capabilities at the time of action and not against
> some internal fanotify flag...

Fair enough. I take a swing at getting rid of the flag entirely.
It may take me a while though to context switch back to fanotify.

Thanks,
Amir.
Jan Kara Oct. 2, 2020, 9:51 a.m. UTC | #9
On Fri 02-10-20 12:06:48, Amir Goldstein wrote:
> On Fri, Oct 2, 2020 at 11:27 AM Jan Kara <jack@suse.cz> wrote:
> >
> > On Thu 01-10-20 16:08:50, Amir Goldstein wrote:
> > > On Thu, Oct 1, 2020 at 2:00 PM Jan Kara <jack@suse.cz> wrote:
> > > >
> > > > I'm sorry for late reply on this one...
> > > >
> > > > On Tue 15-09-20 11:33:41, Amir Goldstein wrote:
> > > > > On Tue, Sep 15, 2020 at 10:08 AM Jan Kara <jack@suse.cz> wrote:
> > > > > >
> > > > > > On Tue 15-09-20 01:27:43, Weiping Zhang wrote:
> > > > > > > Now the IN_OPEN event can report all open events for a file, but it can
> > > > > > > not distinguish if the file was opened for execute or read/write.
> > > > > > > This patch add a new event IN_OPEN_EXEC to support that. If user only
> > > > > > > want to monitor a file was opened for execute, they can pass a more
> > > > > > > precise event IN_OPEN_EXEC to inotify_add_watch.
> > > > > > >
> > > > > > > Signed-off-by: Weiping Zhang <zhangweiping@didiglobal.com>
> > > > > >
> > > > > > Thanks for the patch but what I'm missing is a justification for it. Is
> > > > > > there any application that cannot use fanotify that needs to distinguish
> > > > > > IN_OPEN and IN_OPEN_EXEC? The OPEN_EXEC notification is for rather
> > > > > > specialized purposes (e.g. audit) which are generally priviledged and need
> > > > > > to use fanotify anyway so I don't see this as an interesting feature for
> > > > > > inotify...
> > > > >
> > > > > That would be my queue to re- bring up FAN_UNPRIVILEGED [1].
> > > > > Last time this was discussed [2], FAN_UNPRIVILEGED did not have
> > > > > feature parity with inotify, but now it mostly does, short of (AFAIK):
> > > > > 1. Rename cookie (*)
> > > > > 2. System tunables for limits
> > > > >
> > > > > The question is - should I pursue it?
> > > >
> > > > So I think that at this point some form less priviledged fanotify use
> > > > starts to make sense. So let's discuss how it would look like... What comes
> > > > to my mind:
> > > >
> > > > 1) We'd need to make max_user_instances, max_user_watches, and
> > > > max_queued_events configurable similarly as for inotify. The first two
> > > > using ucounts so that the configuration is actually per-namespace as for
> > > > inotify.
> > > >
> > > > 2) I don't quite like the FAN_UNPRIVILEDGED flag. I'd rather see the checks
> > > > being done based on functionality requested in fanotify_init() /
> > > > fanotify_mark(). E.g. FAN_UNLIMITED_QUEUE or permission events will require
> > > > CAP_SYS_ADMIN, mount/sb marks will require CAP_DAC_READ_SEARCH, etc.
> > > > We should also consider which capability checks should be system-global and
> > > > which can be just user-namespace ones...
> > >
> > > OK. That is not a problem to do.
> > > But FAN_UNPRIVILEDGED flag also impacts:
> > >
> > >     An unprivileged event listener does not get an open file descriptor in
> > >     the event nor the process pid of another process.
> >
> > Well, are these really sensitive that they should be forbidden? If we allow
> > only inode marks and given inode is opened in the context of process
> > reading the event, I don't see how fd would be any sensitive? And similarly
> > for pid I'd say...
> >
> 
> Because I was under the impression that we are going to allow a dir watch
> on children, just like inotify and process may have permission to access dir,
> but no permission to open a child.

Right, I agree FAN_EVENT_ON_CHILD should be allowed with less priviledge as
well. But can't we just check for 'x' permission on parent dir when
generating event to task that does not have CAP_DAC_READ_SEARCH and
may_open() after that? We have all info available when handling
FAN_EVENT_ON_CHILD events AFAICT...

> That said, it's true that we can decide whether or not to export a RDONLY
> open fd based on CAP_DAC_READ_SEARCH of the reader process.

Or that but I guess may_open() check may be still needed...

> Regarding exposing pid, I am not familiar with the capabilities required to
> "spy" on another process' actions using other facilities, so I thought we
> should take a conservative approach and require at least CAP_SYS_PTRACE
> to expose information about the process generating the event.

Anybody can learn PID of a process in his own namespace. So PID itself is
not secret. The fact that someone accessed a file is no secret either (you
can poll atime / mtime). The fact that a particular process accessed a
particular file - well, that's revealing something. Not sure whether it is
relevant but I guess let's be cautious, we can always relax this later.

								Honza
diff mbox series

Patch

diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 186722ba3894..eb42d11a9988 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -819,8 +819,9 @@  static int __init inotify_user_setup(void)
 	BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
 	BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
 	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
+	BUILD_BUG_ON(IN_OPEN_EXEC != FS_OPEN_EXEC);
 
-	BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 22);
+	BUILD_BUG_ON(HWEIGHT32(ALL_INOTIFY_BITS) != 23);
 
 	inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark,
 					       SLAB_PANIC|SLAB_ACCOUNT);
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index 6a24905f6e1e..88fc82c8cf2a 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -15,7 +15,7 @@  extern struct ctl_table inotify_table[]; /* for sysctl */
 #define ALL_INOTIFY_BITS (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
 			  IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | \
 			  IN_MOVED_TO | IN_CREATE | IN_DELETE | \
-			  IN_DELETE_SELF | IN_MOVE_SELF | IN_UNMOUNT | \
+			  IN_DELETE_SELF | IN_MOVE_SELF | IN_OPEN_EXEC | IN_UNMOUNT | \
 			  IN_Q_OVERFLOW | IN_IGNORED | IN_ONLYDIR | \
 			  IN_DONT_FOLLOW | IN_EXCL_UNLINK | IN_MASK_ADD | \
 			  IN_MASK_CREATE | IN_ISDIR | IN_ONESHOT)
diff --git a/include/uapi/linux/inotify.h b/include/uapi/linux/inotify.h
index 884b4846b630..f19ea046cc87 100644
--- a/include/uapi/linux/inotify.h
+++ b/include/uapi/linux/inotify.h
@@ -39,6 +39,7 @@  struct inotify_event {
 #define IN_DELETE		0x00000200	/* Subfile was deleted */
 #define IN_DELETE_SELF		0x00000400	/* Self was deleted */
 #define IN_MOVE_SELF		0x00000800	/* Self was moved */
+#define IN_OPEN_EXEC		0x00001000	/* File was opened */
 
 /* the following are legal events.  they are sent as needed to any watch */
 #define IN_UNMOUNT		0x00002000	/* Backing fs was unmounted */
@@ -66,7 +67,7 @@  struct inotify_event {
 #define IN_ALL_EVENTS	(IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \
 			 IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | \
 			 IN_MOVED_TO | IN_DELETE | IN_CREATE | IN_DELETE_SELF | \
-			 IN_MOVE_SELF)
+			 IN_MOVE_SELF | IN_OPEN_EXEC)
 
 /* Flags for sys_inotify_init1.  */
 #define IN_CLOEXEC O_CLOEXEC