diff mbox series

[v2,1/4] Landlock: Add signal control

Message ID 49557e48c1904d2966b8aa563215d2e1733dad95.1722966592.git.fahimitahera@gmail.com (mailing list archive)
State Handled Elsewhere
Headers show
Series Landlock: Signal Scoping Support | expand

Commit Message

Tahera Fahimi Aug. 6, 2024, 6:10 p.m. UTC
Currently, a sandbox process is not restricted to send a signal
(e.g. SIGKILL) to a process outside of the sandbox environment.
Ability to sending a signal for a sandboxed process should be
scoped the same way abstract unix sockets are scoped. Therefore,
we extend "scoped" field in a ruleset with
"LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
sending any signal from within a sandbox process to its
parent(i.e. any parent sandbox or non-sandboxed procsses).

Signed-off-by: Tahera Fahimi <fahimitahera@gmail.com>
---

Chenges in versions:
V2:
* Remove signal_is_scoped function
* Applying reviews of V1
V1:
* Introducing LANDLOCK_SCOPE_SIGNAL
* Adding two hooks, hook_task_kill and hook_file_send_sigiotask
  for signal scoping.
---
 include/uapi/linux/landlock.h |  3 +++
 security/landlock/limits.h    |  2 +-
 security/landlock/task.c      | 43 +++++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)

Comments

Jann Horn Aug. 6, 2024, 6:56 p.m. UTC | #1
On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> Currently, a sandbox process is not restricted to send a signal
> (e.g. SIGKILL) to a process outside of the sandbox environment.
> Ability to sending a signal for a sandboxed process should be
> scoped the same way abstract unix sockets are scoped. Therefore,
> we extend "scoped" field in a ruleset with
> "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> sending any signal from within a sandbox process to its
> parent(i.e. any parent sandbox or non-sandboxed procsses).
[...]
> diff --git a/security/landlock/task.c b/security/landlock/task.c
> index 7e8579ebae83..a73cff27bb91 100644
> --- a/security/landlock/task.c
> +++ b/security/landlock/task.c
> @@ -261,11 +261,54 @@ static int hook_unix_may_send(struct socket *const sock,
>         return -EPERM;
>  }
>
> +static int hook_task_kill(struct task_struct *const p,
> +                         struct kernel_siginfo *const info, const int sig,
> +                         const struct cred *const cred)
> +{
> +       bool is_scoped;
> +       const struct landlock_ruleset *target_dom;
> +
> +       /* rcu is already locked */
> +       target_dom = landlock_get_task_domain(p);
> +       if (cred)
> +               /* dealing with USB IO */
> +               is_scoped = domain_IPC_scope(landlock_cred(cred)->domain,
> +                                            target_dom,
> +                                            LANDLOCK_SCOPED_SIGNAL);
> +       else
> +               is_scoped = domain_IPC_scope(landlock_get_current_domain(),
> +                                            target_dom,
> +                                            LANDLOCK_SCOPED_SIGNAL);

This might be a bit more concise if you turn it into something like:

/* only USB IO supplies creds */
cred = cred ?: current_cred();
is_scoped = domain_IPC_scope(landlock_cred(cred)->domain,
    target_dom, LANDLOCK_SCOPED_SIGNAL);

but that's just a question of style, feel free to keep it as-is
depending on what you prefer.

> +       if (is_scoped)
> +               return 0;
> +
> +       return -EPERM;
> +}
> +
> +static int hook_file_send_sigiotask(struct task_struct *tsk,
> +                                   struct fown_struct *fown, int signum)
> +{
> +       bool is_scoped;
> +       const struct landlock_ruleset *dom, *target_dom;
> +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);

I'm not an expert on how the fowner stuff works, but I think this will
probably give you "result = NULL" if the file owner PID has already
exited, and then the following landlock_get_task_domain() would
probably crash? But I'm not entirely sure about how this works.

I think the intended way to use this hook would be to instead use the
"file_set_fowner" hook to record the owning domain (though the setup
for that is going to be kind of a pain...), see the Smack and SELinux
definitions of that hook. Or alternatively maybe it would be even
nicer to change the fown_struct to record a cred* instead of a uid and
euid and then use the domain from those credentials for this hook...
I'm not sure which of those would be easier.

> +       /* rcu is already locked! */
> +       dom = landlock_get_task_domain(result);
> +       target_dom = landlock_get_task_domain(tsk);
> +       is_scoped = domain_IPC_scope(dom, target_dom, LANDLOCK_SCOPED_SIGNAL);
> +       put_task_struct(result);
> +       if (is_scoped)
> +               return 0;
> +       return -EPERM;
> +}
Jann Horn Aug. 6, 2024, 9:55 p.m. UTC | #2
On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > Currently, a sandbox process is not restricted to send a signal
> > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > Ability to sending a signal for a sandboxed process should be
> > scoped the same way abstract unix sockets are scoped. Therefore,
> > we extend "scoped" field in a ruleset with
> > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > sending any signal from within a sandbox process to its
> > parent(i.e. any parent sandbox or non-sandboxed procsses).
[...]
> > +       if (is_scoped)
> > +               return 0;
> > +
> > +       return -EPERM;
> > +}
> > +
> > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > +                                   struct fown_struct *fown, int signum)
> > +{
> > +       bool is_scoped;
> > +       const struct landlock_ruleset *dom, *target_dom;
> > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
>
> I'm not an expert on how the fowner stuff works, but I think this will
> probably give you "result = NULL" if the file owner PID has already
> exited, and then the following landlock_get_task_domain() would
> probably crash? But I'm not entirely sure about how this works.
>
> I think the intended way to use this hook would be to instead use the
> "file_set_fowner" hook to record the owning domain (though the setup
> for that is going to be kind of a pain...), see the Smack and SELinux
> definitions of that hook. Or alternatively maybe it would be even
> nicer to change the fown_struct to record a cred* instead of a uid and
> euid and then use the domain from those credentials for this hook...
> I'm not sure which of those would be easier.

(For what it's worth, I think the first option would probably be
easier to implement and ship for now, since you can basically copy
what Smack and SELinux are already doing in their implementations of
these hooks. I think the second option would theoretically result in
nicer code, but it might require a bit more work, and you'd have to
include the maintainers of the file locking code in the review of such
refactoring and have them approve those changes. So if you want to get
this patchset into the kernel quickly, the first option might be
better for now?)
Tahera Fahimi Aug. 6, 2024, 10 p.m. UTC | #3
On Tue, Aug 06, 2024 at 08:56:15PM +0200, Jann Horn wrote:
> On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > Currently, a sandbox process is not restricted to send a signal
> > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > Ability to sending a signal for a sandboxed process should be
> > scoped the same way abstract unix sockets are scoped. Therefore,
> > we extend "scoped" field in a ruleset with
> > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > sending any signal from within a sandbox process to its
> > parent(i.e. any parent sandbox or non-sandboxed procsses).
> [...]
> > diff --git a/security/landlock/task.c b/security/landlock/task.c
> > index 7e8579ebae83..a73cff27bb91 100644
> > --- a/security/landlock/task.c
> > +++ b/security/landlock/task.c
> > @@ -261,11 +261,54 @@ static int hook_unix_may_send(struct socket *const sock,
> >         return -EPERM;
> >  }
> >
> > +static int hook_task_kill(struct task_struct *const p,
> > +                         struct kernel_siginfo *const info, const int sig,
> > +                         const struct cred *const cred)
> > +{
> > +       bool is_scoped;
> > +       const struct landlock_ruleset *target_dom;
> > +
> > +       /* rcu is already locked */
> > +       target_dom = landlock_get_task_domain(p);
> > +       if (cred)
> > +               /* dealing with USB IO */
> > +               is_scoped = domain_IPC_scope(landlock_cred(cred)->domain,
> > +                                            target_dom,
> > +                                            LANDLOCK_SCOPED_SIGNAL);
> > +       else
> > +               is_scoped = domain_IPC_scope(landlock_get_current_domain(),
> > +                                            target_dom,
> > +                                            LANDLOCK_SCOPED_SIGNAL);
> 
> This might be a bit more concise if you turn it into something like:
> 
> /* only USB IO supplies creds */
> cred = cred ?: current_cred();
> is_scoped = domain_IPC_scope(landlock_cred(cred)->domain,
>     target_dom, LANDLOCK_SCOPED_SIGNAL);
> 
> but that's just a question of style, feel free to keep it as-is
> depending on what you prefer.
Hi Jann,
Thanks for the feedback:)
> > +       if (is_scoped)
> > +               return 0;
> > +
> > +       return -EPERM;
> > +}
> > +
> > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > +                                   struct fown_struct *fown, int signum)
> > +{
> > +       bool is_scoped;
> > +       const struct landlock_ruleset *dom, *target_dom;
> > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> 
> I'm not an expert on how the fowner stuff works, but I think this will
> probably give you "result = NULL" if the file owner PID has already
> exited, and then the following landlock_get_task_domain() would
> probably crash? But I'm not entirely sure about how this works.
I considered since the file structure can always be obtained, then the
file owner PID always exist. I can check if we can use the credentials
stored in struct file instead.

> I think the intended way to use this hook would be to instead use the
> "file_set_fowner" hook to record the owning domain (though the setup
> for that is going to be kind of a pain...), see the Smack and SELinux
> definitions of that hook. Or alternatively maybe it would be even
> nicer to change the fown_struct to record a cred* instead of a uid and
> euid and then use the domain from those credentials for this hook...
> I'm not sure which of those would be easier.
Because Landlock does not use any security blob for this purpose, I am
not sure how to record the owner's doamin.
The alternative way looks nice.
> > +       /* rcu is already locked! */
> > +       dom = landlock_get_task_domain(result);
> > +       target_dom = landlock_get_task_domain(tsk);
> > +       is_scoped = domain_IPC_scope(dom, target_dom, LANDLOCK_SCOPED_SIGNAL);
> > +       put_task_struct(result);
> > +       if (is_scoped)
> > +               return 0;
> > +       return -EPERM;
> > +}
Jann Horn Aug. 6, 2024, 10:55 p.m. UTC | #4
Hi!

On Wed, Aug 7, 2024 at 12:00 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> On Tue, Aug 06, 2024 at 08:56:15PM +0200, Jann Horn wrote:
> > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > +       if (is_scoped)
> > > +               return 0;
> > > +
> > > +       return -EPERM;
> > > +}
> > > +
> > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > +                                   struct fown_struct *fown, int signum)
> > > +{
> > > +       bool is_scoped;
> > > +       const struct landlock_ruleset *dom, *target_dom;
> > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> >
> > I'm not an expert on how the fowner stuff works, but I think this will
> > probably give you "result = NULL" if the file owner PID has already
> > exited, and then the following landlock_get_task_domain() would
> > probably crash? But I'm not entirely sure about how this works.
> I considered since the file structure can always be obtained, then the
> file owner PID always exist.

I think the file owner "struct pid" always exists here; but I think
the PID does not necessarily point to a task_struct.

I think you can have a scenario where userspace does something like this:

int fd = <open some file descriptor somehow>;
fcntl(fd, F_SETOWN, <PID of some other process>);
pid_t child_pid = fork();
if (child_pid == 0) {
  /* this executes in the child process */
  sleep(5);
  <do something on the fd that triggers send_sigio() or send_sigurg()>
  sleep(5);
  exit(0);
}
/* this continues executing in the parent process */
exit(0);

At the fcntl(fd, F_SETOWN, ...) call, a reference-counted reference to
the "struct pid" of the parent process will be stored in the file
(this happens in "f_modown", which increments the reference count of
the "struct pid" using "get_pid(...)"). But when the parent process
exits, the pointer from the parent's "struct pid" to the parent's
"task_struct" is removed, and the "task_struct" will eventually be
deleted from memory.

So when, at a later time, something triggers send_sigio() or
send_sigurg() and enters this LSM hook, I think
"get_pid_task(fown->pid, fown->pid_type)" can return NULL.

> I can check if we can use the credentials
> stored in struct file instead.

(sort of relevant: The manpage
https://man7.org/linux/man-pages/man2/fcntl.2.html says "Note: The
F_SETOWN operation records the caller's credentials at the time of the
fcntl() call, and it is these saved credentials that are used for the
permission checks.")

You mean the credentials in the "f_cred" member of "struct file", right? If so:

I don't think Landlock can use the existing credentials stored in
file->f_cred for this. Consider the following scenario:

1. A process (with no landlock restrictions so far) opens a file. At
this point, the caller's credentials (which indicate no landlock
restrictions) are recorded in file->f_cred.
2. The process installs a landlock filter that restricts the ability
to send signals. (This changes the credentials pointer of the process
to a new set of credentials that points to the new landlock domain.)
3. Malicious code starts executing in the sandboxed process.
4. The malicious code uses F_SETOWN on the already-open file.
5. Something causes sending of a signal via send_sigio() or
send_sigurg() on this file.

In this scenario, if the LSM hook used the landlock restrictions
attached to file->f_cred, it would think the operation is not filtered
by any landlock domain.

> > I think the intended way to use this hook would be to instead use the
> > "file_set_fowner" hook to record the owning domain (though the setup
> > for that is going to be kind of a pain...), see the Smack and SELinux
> > definitions of that hook. Or alternatively maybe it would be even
> > nicer to change the fown_struct to record a cred* instead of a uid and
> > euid and then use the domain from those credentials for this hook...
> > I'm not sure which of those would be easier.
> Because Landlock does not use any security blob for this purpose, I am
> not sure how to record the owner's doamin.

I think landlock already has a landlock_file_security blob attached to
"struct file" that you could use to store an extra landlock domain
pointer for this, kinda like the "fown_sid" in SELinux? But doing this
for landlock would be a little more annoying because you'd have to
store a reference-counted pointer to the landlock domain in the
landlock_file_security blob, so you'd have to make sure to also
decrement the reference count when the file is freed (with the
file_free_security hook), and you'd have to use locking (or atomic
operations) to make sure that two concurrent file_set_fowner calls
don't race in a dangerous way...

So I think it's fairly straightforward, but it would be kinda ugly.

> The alternative way looks nice.

Yeah, I agree that it looks nicer. (If you decide to implement it by
refactoring the fown_struct, it would be a good idea to make that a
separate patch in your patch series - partly because that way, the
maintainers of that fs/fcntl.c code can review just the refactoring,
and partly just because splitting logical changes into separate
patches makes it easier to review the individual patches.)

> > > +       /* rcu is already locked! */
> > > +       dom = landlock_get_task_domain(result);
> > > +       target_dom = landlock_get_task_domain(tsk);
> > > +       is_scoped = domain_IPC_scope(dom, target_dom, LANDLOCK_SCOPED_SIGNAL);
> > > +       put_task_struct(result);
> > > +       if (is_scoped)
> > > +               return 0;
> > > +       return -EPERM;
> > > +}
Mickaël Salaün Aug. 7, 2024, 6:16 p.m. UTC | #5
On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > Currently, a sandbox process is not restricted to send a signal
> > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > Ability to sending a signal for a sandboxed process should be
> > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > we extend "scoped" field in a ruleset with
> > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > sending any signal from within a sandbox process to its
> > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> [...]
> > > +       if (is_scoped)
> > > +               return 0;
> > > +
> > > +       return -EPERM;
> > > +}
> > > +
> > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > +                                   struct fown_struct *fown, int signum)

I was wondering if we should handle this case, but I guess it makes
sense to have a consistent policy for all kind of user-triggerable
signals.

> > > +{
> > > +       bool is_scoped;
> > > +       const struct landlock_ruleset *dom, *target_dom;
> > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> >
> > I'm not an expert on how the fowner stuff works, but I think this will
> > probably give you "result = NULL" if the file owner PID has already
> > exited, and then the following landlock_get_task_domain() would
> > probably crash? But I'm not entirely sure about how this works.
> >
> > I think the intended way to use this hook would be to instead use the
> > "file_set_fowner" hook to record the owning domain (though the setup
> > for that is going to be kind of a pain...), see the Smack and SELinux
> > definitions of that hook. Or alternatively maybe it would be even
> > nicer to change the fown_struct to record a cred* instead of a uid and
> > euid and then use the domain from those credentials for this hook...
> > I'm not sure which of those would be easier.
> 
> (For what it's worth, I think the first option would probably be
> easier to implement and ship for now, since you can basically copy
> what Smack and SELinux are already doing in their implementations of
> these hooks. I think the second option would theoretically result in
> nicer code, but it might require a bit more work, and you'd have to
> include the maintainers of the file locking code in the review of such
> refactoring and have them approve those changes. So if you want to get
> this patchset into the kernel quickly, the first option might be
> better for now?)
> 

I agree, let's extend landlock_file_security with a new "fown" pointer
to a Landlock domain. We'll need to call landlock_get_ruleset() in
hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
hook_file_free_security().

I would be nice to to replace the redundant informations in fown_struct
but that can wait.
Tahera Fahimi Aug. 7, 2024, 11:36 p.m. UTC | #6
On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > Currently, a sandbox process is not restricted to send a signal
> > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > Ability to sending a signal for a sandboxed process should be
> > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > we extend "scoped" field in a ruleset with
> > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > sending any signal from within a sandbox process to its
> > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > [...]
> > > > +       if (is_scoped)
> > > > +               return 0;
> > > > +
> > > > +       return -EPERM;
> > > > +}
> > > > +
> > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > +                                   struct fown_struct *fown, int signum)
> 
> I was wondering if we should handle this case, but I guess it makes
> sense to have a consistent policy for all kind of user-triggerable
> signals.
> 
> > > > +{
> > > > +       bool is_scoped;
> > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > >
> > > I'm not an expert on how the fowner stuff works, but I think this will
> > > probably give you "result = NULL" if the file owner PID has already
> > > exited, and then the following landlock_get_task_domain() would
> > > probably crash? But I'm not entirely sure about how this works.
> > >
> > > I think the intended way to use this hook would be to instead use the
> > > "file_set_fowner" hook to record the owning domain (though the setup
> > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > definitions of that hook. Or alternatively maybe it would be even
> > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > euid and then use the domain from those credentials for this hook...
> > > I'm not sure which of those would be easier.
> > 
> > (For what it's worth, I think the first option would probably be
> > easier to implement and ship for now, since you can basically copy
> > what Smack and SELinux are already doing in their implementations of
> > these hooks. I think the second option would theoretically result in
> > nicer code, but it might require a bit more work, and you'd have to
> > include the maintainers of the file locking code in the review of such
> > refactoring and have them approve those changes. So if you want to get
> > this patchset into the kernel quickly, the first option might be
> > better for now?)
> > 
> 
> I agree, let's extend landlock_file_security with a new "fown" pointer
> to a Landlock domain. We'll need to call landlock_get_ruleset() in
> hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> hook_file_free_security().
I think we should add a new hook (hook_file_set_owner()) to initialize
the "fown" pointer and call landlock_get_ruleset() in that? If we do not
have hook_file_set_owner to store domain in "fown", can you please give
me a hint on where to do that?
Thanks 
> I would be nice to to replace the redundant informations in fown_struct
> but that can wait.
Jann Horn Aug. 8, 2024, 1:10 a.m. UTC | #7
On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > Ability to sending a signal for a sandboxed process should be
> > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > we extend "scoped" field in a ruleset with
> > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > sending any signal from within a sandbox process to its
> > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > [...]
> > > > > +       if (is_scoped)
> > > > > +               return 0;
> > > > > +
> > > > > +       return -EPERM;
> > > > > +}
> > > > > +
> > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > +                                   struct fown_struct *fown, int signum)
> >
> > I was wondering if we should handle this case, but I guess it makes
> > sense to have a consistent policy for all kind of user-triggerable
> > signals.
> >
> > > > > +{
> > > > > +       bool is_scoped;
> > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > >
> > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > probably give you "result = NULL" if the file owner PID has already
> > > > exited, and then the following landlock_get_task_domain() would
> > > > probably crash? But I'm not entirely sure about how this works.
> > > >
> > > > I think the intended way to use this hook would be to instead use the
> > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > definitions of that hook. Or alternatively maybe it would be even
> > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > euid and then use the domain from those credentials for this hook...
> > > > I'm not sure which of those would be easier.
> > >
> > > (For what it's worth, I think the first option would probably be
> > > easier to implement and ship for now, since you can basically copy
> > > what Smack and SELinux are already doing in their implementations of
> > > these hooks. I think the second option would theoretically result in
> > > nicer code, but it might require a bit more work, and you'd have to
> > > include the maintainers of the file locking code in the review of such
> > > refactoring and have them approve those changes. So if you want to get
> > > this patchset into the kernel quickly, the first option might be
> > > better for now?)
> > >
> >
> > I agree, let's extend landlock_file_security with a new "fown" pointer
> > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > hook_file_free_security().
> I think we should add a new hook (hook_file_set_owner()) to initialize
> the "fown" pointer and call landlock_get_ruleset() in that?

Yeah. Initialize the pointer in the file_set_fowner hook, and read the
pointer in the file_send_sigiotask hook.

Note that in the file_set_fowner hook, you'll probably need to use
both landlock_get_ruleset() (to take a reference on the ruleset you're
storing in the fown pointer) and landlock_put_ruleset() (to drop the
reference to the ruleset that the fown pointer was pointing to
before). And you'll need to use some kind of lock to protect the fown
pointer - either by adding an appropriate lock next to your fown
pointer or by using some appropriate existing lock in "struct file".
Probably it's cleanest to have your own lock for this? (This lock will
have to be something like a spinlock, not a mutex, since you need to
be able to acquire it in the file_set_fowner hook, which runs inside
an RCU read-side critical section, where sleeping is forbidden -
acquiring a mutex can sleep and therefore is forbidden in this
context, acquiring a spinlock can't sleep.)

> If we do not
> have hook_file_set_owner to store domain in "fown", can you please give
> me a hint on where to do that?
> Thanks
> > I would be nice to to replace the redundant informations in fown_struct
> > but that can wait.
Mickaël Salaün Aug. 8, 2024, 2:09 p.m. UTC | #8
On Thu, Aug 08, 2024 at 03:10:54AM +0200, Jann Horn wrote:
> On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > > Ability to sending a signal for a sandboxed process should be
> > > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > > we extend "scoped" field in a ruleset with
> > > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > > sending any signal from within a sandbox process to its
> > > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > > [...]
> > > > > > +       if (is_scoped)
> > > > > > +               return 0;
> > > > > > +
> > > > > > +       return -EPERM;
> > > > > > +}
> > > > > > +
> > > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > > +                                   struct fown_struct *fown, int signum)
> > >
> > > I was wondering if we should handle this case, but I guess it makes
> > > sense to have a consistent policy for all kind of user-triggerable
> > > signals.
> > >
> > > > > > +{
> > > > > > +       bool is_scoped;
> > > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > > >
> > > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > > probably give you "result = NULL" if the file owner PID has already
> > > > > exited, and then the following landlock_get_task_domain() would
> > > > > probably crash? But I'm not entirely sure about how this works.
> > > > >
> > > > > I think the intended way to use this hook would be to instead use the
> > > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > > definitions of that hook. Or alternatively maybe it would be even
> > > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > > euid and then use the domain from those credentials for this hook...
> > > > > I'm not sure which of those would be easier.
> > > >
> > > > (For what it's worth, I think the first option would probably be
> > > > easier to implement and ship for now, since you can basically copy
> > > > what Smack and SELinux are already doing in their implementations of
> > > > these hooks. I think the second option would theoretically result in
> > > > nicer code, but it might require a bit more work, and you'd have to
> > > > include the maintainers of the file locking code in the review of such
> > > > refactoring and have them approve those changes. So if you want to get
> > > > this patchset into the kernel quickly, the first option might be
> > > > better for now?)
> > > >
> > >
> > > I agree, let's extend landlock_file_security with a new "fown" pointer
> > > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > > hook_file_free_security().
> > I think we should add a new hook (hook_file_set_owner()) to initialize
> > the "fown" pointer and call landlock_get_ruleset() in that?
> 
> Yeah. Initialize the pointer in the file_set_fowner hook, and read the
> pointer in the file_send_sigiotask hook.
> 
> Note that in the file_set_fowner hook, you'll probably need to use
> both landlock_get_ruleset() (to take a reference on the ruleset you're
> storing in the fown pointer) and landlock_put_ruleset() (to drop the
> reference to the ruleset that the fown pointer was pointing to
> before). And you'll need to use some kind of lock to protect the fown
> pointer - either by adding an appropriate lock next to your fown
> pointer or by using some appropriate existing lock in "struct file".
> Probably it's cleanest to have your own lock for this? (This lock will
> have to be something like a spinlock, not a mutex, since you need to
> be able to acquire it in the file_set_fowner hook, which runs inside
> an RCU read-side critical section, where sleeping is forbidden -
> acquiring a mutex can sleep and therefore is forbidden in this
> context, acquiring a spinlock can't sleep.)

Yes, I think this should work for file_set_fowner:

struct landlock_ruleset *prev_dom, *new_dom;

new_dom = landlock_get_current_domain();
landlock_get_ruleset(new_dom);

/* Cf. f_modown() */
write_lock_irq(&filp->f_owner.lock);
prev_dom = rcu_replace_pointer(&landlock_file(file)->fown_domain,
	new_dom, lockdep_is_held(&filp->f_owner.lock));
write_unlock_irq(&filp->f_owner.lock);

landlock_put_ruleset_rcu(prev_dom);


With landlock_put_ruleset_rcu() define with this:

diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index a93bdbf52fff..897116205520 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -524,6 +524,20 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
 	}
 }
 
+static void free_ruleset_rcu(struct rcu_head *const head)
+{
+	struct landlock_ruleset *ruleset;
+
+	ruleset = container_of(head, struct landlock_ruleset, rcu);
+	free_ruleset(ruleset);
+}
+
+void landlock_put_ruleset_rcu(struct landlock_ruleset *const ruleset)
+{
+	if (ruleset && refcount_dec_and_test(&ruleset->usage))
+		call_rcu(&ruleset->rcu, free_ruleset_rcu);
+}
+
 /**
  * landlock_merge_ruleset - Merge a ruleset with a domain
  *
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index c749fa0b3ecd..c930b39174b0 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -190,19 +190,35 @@ struct landlock_ruleset {
 		 * @work_free: Enables to free a ruleset within a lockless
 		 * section.  This is only used by
 		 * landlock_put_ruleset_deferred() when @usage reaches zero.
-		 * The fields @lock, @usage, @num_rules, @num_layers and
+		 * The fields @rcu, @lock, @usage, @num_rules, @num_layers and
 		 * @access_masks are then unused.
 		 */
 		struct work_struct work_free;
 		struct {
-			/**
-			 * @lock: Protects against concurrent modifications of
-			 * @root, if @usage is greater than zero.
-			 */
-			struct mutex lock;
+			union {
+				/**
+				 * @rcu: Protects RCU read-side critical
+				 * sections.  This is only used by
+				 * landlock_put_ruleset_rcu() when @usage
+				 * reaches zero.
+				 *
+				 * Only used for domains.
+				 */
+				struct rcu_head rcu;
+				/**
+				 * @lock: Protects against concurrent
+				 * modifications of @root_inode and
+				 * @root_net_port, if @usage is greater than
+				 * zero.
+				 *
+				 * Only used for rulesets.
+				 */
+				struct mutex lock;
+			};
 			/**
 			 * @usage: Number of processes (i.e. domains) or file
-			 * descriptors referencing this ruleset.
+			 * descriptors referencing this ruleset.  It can be
+			 * zero in RCU read-side critical sections.
 			 */
 			refcount_t usage;
 			/**
@@ -241,6 +257,7 @@ landlock_create_ruleset(const access_mask_t access_mask_fs,
 
 void landlock_put_ruleset(struct landlock_ruleset *const ruleset);
 void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset);
+void landlock_put_ruleset_rcu(struct landlock_ruleset *const ruleset);
 
 int landlock_insert_rule(struct landlock_ruleset *const ruleset,
 			 const struct landlock_id id,
Jann Horn Aug. 8, 2024, 2:42 p.m. UTC | #9
On Thu, Aug 8, 2024 at 4:09 PM Mickaël Salaün <mic@digikod.net> wrote:
> On Thu, Aug 08, 2024 at 03:10:54AM +0200, Jann Horn wrote:
> > On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > > > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > > > Ability to sending a signal for a sandboxed process should be
> > > > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > > > we extend "scoped" field in a ruleset with
> > > > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > > > sending any signal from within a sandbox process to its
> > > > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > > > [...]
> > > > > > > +       if (is_scoped)
> > > > > > > +               return 0;
> > > > > > > +
> > > > > > > +       return -EPERM;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > > > +                                   struct fown_struct *fown, int signum)
> > > >
> > > > I was wondering if we should handle this case, but I guess it makes
> > > > sense to have a consistent policy for all kind of user-triggerable
> > > > signals.
> > > >
> > > > > > > +{
> > > > > > > +       bool is_scoped;
> > > > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > > > >
> > > > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > > > probably give you "result = NULL" if the file owner PID has already
> > > > > > exited, and then the following landlock_get_task_domain() would
> > > > > > probably crash? But I'm not entirely sure about how this works.
> > > > > >
> > > > > > I think the intended way to use this hook would be to instead use the
> > > > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > > > definitions of that hook. Or alternatively maybe it would be even
> > > > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > > > euid and then use the domain from those credentials for this hook...
> > > > > > I'm not sure which of those would be easier.
> > > > >
> > > > > (For what it's worth, I think the first option would probably be
> > > > > easier to implement and ship for now, since you can basically copy
> > > > > what Smack and SELinux are already doing in their implementations of
> > > > > these hooks. I think the second option would theoretically result in
> > > > > nicer code, but it might require a bit more work, and you'd have to
> > > > > include the maintainers of the file locking code in the review of such
> > > > > refactoring and have them approve those changes. So if you want to get
> > > > > this patchset into the kernel quickly, the first option might be
> > > > > better for now?)
> > > > >
> > > >
> > > > I agree, let's extend landlock_file_security with a new "fown" pointer
> > > > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > > > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > > > hook_file_free_security().
> > > I think we should add a new hook (hook_file_set_owner()) to initialize
> > > the "fown" pointer and call landlock_get_ruleset() in that?
> >
> > Yeah. Initialize the pointer in the file_set_fowner hook, and read the
> > pointer in the file_send_sigiotask hook.
> >
> > Note that in the file_set_fowner hook, you'll probably need to use
> > both landlock_get_ruleset() (to take a reference on the ruleset you're
> > storing in the fown pointer) and landlock_put_ruleset() (to drop the
> > reference to the ruleset that the fown pointer was pointing to
> > before). And you'll need to use some kind of lock to protect the fown
> > pointer - either by adding an appropriate lock next to your fown
> > pointer or by using some appropriate existing lock in "struct file".
> > Probably it's cleanest to have your own lock for this? (This lock will
> > have to be something like a spinlock, not a mutex, since you need to
> > be able to acquire it in the file_set_fowner hook, which runs inside
> > an RCU read-side critical section, where sleeping is forbidden -
> > acquiring a mutex can sleep and therefore is forbidden in this
> > context, acquiring a spinlock can't sleep.)
>
> Yes, I think this should work for file_set_fowner:
>
> struct landlock_ruleset *prev_dom, *new_dom;
>
> new_dom = landlock_get_current_domain();
> landlock_get_ruleset(new_dom);
>
> /* Cf. f_modown() */
> write_lock_irq(&filp->f_owner.lock);
> prev_dom = rcu_replace_pointer(&landlock_file(file)->fown_domain,
>         new_dom, lockdep_is_held(&filp->f_owner.lock));
> write_unlock_irq(&filp->f_owner.lock);
>
> landlock_put_ruleset_rcu(prev_dom);
>
>
> With landlock_put_ruleset_rcu() define with this:
>
> diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
> index a93bdbf52fff..897116205520 100644
> --- a/security/landlock/ruleset.c
> +++ b/security/landlock/ruleset.c
> @@ -524,6 +524,20 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
>         }
>  }
>
> +static void free_ruleset_rcu(struct rcu_head *const head)
> +{
> +       struct landlock_ruleset *ruleset;
> +
> +       ruleset = container_of(head, struct landlock_ruleset, rcu);
> +       free_ruleset(ruleset);
> +}

free_ruleset() can block but RCU callbacks aren't allowed to block,
that's why landlock_put_ruleset_deferred() exists.

> +
> +void landlock_put_ruleset_rcu(struct landlock_ruleset *const ruleset)
> +{
> +       if (ruleset && refcount_dec_and_test(&ruleset->usage))
> +               call_rcu(&ruleset->rcu, free_ruleset_rcu);
> +}

No, this pattern of combining refcounting and RCU doesn't work.

One legal pattern is:
*The entire object* is subject to RCU; any refcount decrement that
drops the refcount to 0 does call_rcu().
(This is the usual RCU refcounting pattern in the kernel.)

Another legal pattern is:
One particular *reference* is subject to RCU; when dropping this
reference, *the refcount decrement is delayed with call_rcu()*.
(This is basically the RCU pattern used for stuff like the reference
from "struct pid" to "struct task_struct".)

But you can't use call_rcu() depending on whether the last dropped
reference happened to be a reference that required RCU; what if the
refcount is 2, then you first call landlock_put_ruleset_rcu() which
decrements the refcount to 1, and immediately afterwards you call
landlock_put_ruleset() which drops the refcount to 0 and frees the
object without waiting for an RCU grace period? Like so:

thread A         thread B
========         ========
rcu_read_lock()
ruleset = rcu_dereference(...->fown_domain)
                 ruleset = rcu_replace_pointer(...->fown_domain, new_dom, ...)
                 landlock_put_ruleset_rcu(ruleset)
                 landlock_put_ruleset(ruleset)
                   free_ruleset(ruleset)
                     kfree(ruleset)
access ruleset [UAF]
rcu_read_unlock()

So if you want to use RCU lifetime for this, I think you'll have to
turn landlock_put_ruleset() and landlock_put_ruleset_deferred() into
one common function that always, when reaching refcount 0, schedules
an RCU callback which then schedules a work_struct which then does
free_ruleset().

I think that would be a little ugly, and it would look nicer to just
use normal locking in the file_send_sigiotask hook?
Mickaël Salaün Aug. 9, 2024, 10:59 a.m. UTC | #10
On Thu, Aug 08, 2024 at 04:42:23PM +0200, Jann Horn wrote:
> On Thu, Aug 8, 2024 at 4:09 PM Mickaël Salaün <mic@digikod.net> wrote:
> > On Thu, Aug 08, 2024 at 03:10:54AM +0200, Jann Horn wrote:
> > > On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > > > > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > > > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > > > > Ability to sending a signal for a sandboxed process should be
> > > > > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > > > > we extend "scoped" field in a ruleset with
> > > > > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > > > > sending any signal from within a sandbox process to its
> > > > > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > > > > [...]
> > > > > > > > +       if (is_scoped)
> > > > > > > > +               return 0;
> > > > > > > > +
> > > > > > > > +       return -EPERM;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > > > > +                                   struct fown_struct *fown, int signum)
> > > > >
> > > > > I was wondering if we should handle this case, but I guess it makes
> > > > > sense to have a consistent policy for all kind of user-triggerable
> > > > > signals.
> > > > >
> > > > > > > > +{
> > > > > > > > +       bool is_scoped;
> > > > > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > > > > >
> > > > > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > > > > probably give you "result = NULL" if the file owner PID has already
> > > > > > > exited, and then the following landlock_get_task_domain() would
> > > > > > > probably crash? But I'm not entirely sure about how this works.
> > > > > > >
> > > > > > > I think the intended way to use this hook would be to instead use the
> > > > > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > > > > definitions of that hook. Or alternatively maybe it would be even
> > > > > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > > > > euid and then use the domain from those credentials for this hook...
> > > > > > > I'm not sure which of those would be easier.
> > > > > >
> > > > > > (For what it's worth, I think the first option would probably be
> > > > > > easier to implement and ship for now, since you can basically copy
> > > > > > what Smack and SELinux are already doing in their implementations of
> > > > > > these hooks. I think the second option would theoretically result in
> > > > > > nicer code, but it might require a bit more work, and you'd have to
> > > > > > include the maintainers of the file locking code in the review of such
> > > > > > refactoring and have them approve those changes. So if you want to get
> > > > > > this patchset into the kernel quickly, the first option might be
> > > > > > better for now?)
> > > > > >
> > > > >
> > > > > I agree, let's extend landlock_file_security with a new "fown" pointer
> > > > > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > > > > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > > > > hook_file_free_security().
> > > > I think we should add a new hook (hook_file_set_owner()) to initialize
> > > > the "fown" pointer and call landlock_get_ruleset() in that?
> > >
> > > Yeah. Initialize the pointer in the file_set_fowner hook, and read the
> > > pointer in the file_send_sigiotask hook.
> > >
> > > Note that in the file_set_fowner hook, you'll probably need to use
> > > both landlock_get_ruleset() (to take a reference on the ruleset you're
> > > storing in the fown pointer) and landlock_put_ruleset() (to drop the
> > > reference to the ruleset that the fown pointer was pointing to
> > > before). And you'll need to use some kind of lock to protect the fown
> > > pointer - either by adding an appropriate lock next to your fown
> > > pointer or by using some appropriate existing lock in "struct file".
> > > Probably it's cleanest to have your own lock for this? (This lock will
> > > have to be something like a spinlock, not a mutex, since you need to
> > > be able to acquire it in the file_set_fowner hook, which runs inside
> > > an RCU read-side critical section, where sleeping is forbidden -
> > > acquiring a mutex can sleep and therefore is forbidden in this
> > > context, acquiring a spinlock can't sleep.)
> >
> > Yes, I think this should work for file_set_fowner:
> >
> > struct landlock_ruleset *prev_dom, *new_dom;
> >
> > new_dom = landlock_get_current_domain();
> > landlock_get_ruleset(new_dom);
> >
> > /* Cf. f_modown() */
> > write_lock_irq(&filp->f_owner.lock);
> > prev_dom = rcu_replace_pointer(&landlock_file(file)->fown_domain,
> >         new_dom, lockdep_is_held(&filp->f_owner.lock));
> > write_unlock_irq(&filp->f_owner.lock);
> >
> > landlock_put_ruleset_rcu(prev_dom);
> >
> >
> > With landlock_put_ruleset_rcu() define with this:
> >
> > diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
> > index a93bdbf52fff..897116205520 100644
> > --- a/security/landlock/ruleset.c
> > +++ b/security/landlock/ruleset.c
> > @@ -524,6 +524,20 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
> >         }
> >  }
> >
> > +static void free_ruleset_rcu(struct rcu_head *const head)
> > +{
> > +       struct landlock_ruleset *ruleset;
> > +
> > +       ruleset = container_of(head, struct landlock_ruleset, rcu);
> > +       free_ruleset(ruleset);
> > +}
> 
> free_ruleset() can block but RCU callbacks aren't allowed to block,
> that's why landlock_put_ruleset_deferred() exists.

Yes, but landlock_put_ruleset_deferred() doesn't wait for RCU read-side
critical sections.

> 
> > +
> > +void landlock_put_ruleset_rcu(struct landlock_ruleset *const ruleset)
> > +{
> > +       if (ruleset && refcount_dec_and_test(&ruleset->usage))
> > +               call_rcu(&ruleset->rcu, free_ruleset_rcu);
> > +}
> 
> No, this pattern of combining refcounting and RCU doesn't work.
> 
> One legal pattern is:
> *The entire object* is subject to RCU; any refcount decrement that
> drops the refcount to 0 does call_rcu().
> (This is the usual RCU refcounting pattern in the kernel.)
> 
> Another legal pattern is:
> One particular *reference* is subject to RCU; when dropping this
> reference, *the refcount decrement is delayed with call_rcu()*.
> (This is basically the RCU pattern used for stuff like the reference
> from "struct pid" to "struct task_struct".)
> 
> But you can't use call_rcu() depending on whether the last dropped
> reference happened to be a reference that required RCU; what if the
> refcount is 2, then you first call landlock_put_ruleset_rcu() which
> decrements the refcount to 1, and immediately afterwards you call
> landlock_put_ruleset() which drops the refcount to 0 and frees the
> object without waiting for an RCU grace period? Like so:
> 
> thread A         thread B
> ========         ========
> rcu_read_lock()
> ruleset = rcu_dereference(...->fown_domain)
>                  ruleset = rcu_replace_pointer(...->fown_domain, new_dom, ...)
>                  landlock_put_ruleset_rcu(ruleset)
>                  landlock_put_ruleset(ruleset)
>                    free_ruleset(ruleset)
>                      kfree(ruleset)
> access ruleset [UAF]
> rcu_read_unlock()

Indeed

> 
> So if you want to use RCU lifetime for this, I think you'll have to
> turn landlock_put_ruleset() and landlock_put_ruleset_deferred() into
> one common function that always, when reaching refcount 0, schedules
> an RCU callback which then schedules a work_struct which then does
> free_ruleset().
> 
> I think that would be a little ugly, and it would look nicer to just
> use normal locking in the file_send_sigiotask hook?

I don't see how we can do that without delaying the free_ruleset() call
to after the RCU read-side critical section in f_setown().

What about calling refcount_dec_and_test() in free_ruleset_rcu()?  That
would almost always queue this call but it looks safe.

An alternative might be to call synchronize_rcu() in free_ruleset(), but
it's a big ugly too.

BTW, I don't understand why neither SELinux nor Smack use (explicit)
atomic operations nor lock.  And it looks weird that
security_file_set_fowner() isn't called by f_modown() with the same
locking to avoid races.
Mickaël Salaün Aug. 9, 2024, 12:40 p.m. UTC | #11
On Fri, Aug 09, 2024 at 12:59:48PM +0200, Mickaël Salaün wrote:
> On Thu, Aug 08, 2024 at 04:42:23PM +0200, Jann Horn wrote:
> > On Thu, Aug 8, 2024 at 4:09 PM Mickaël Salaün <mic@digikod.net> wrote:
> > > On Thu, Aug 08, 2024 at 03:10:54AM +0200, Jann Horn wrote:
> > > > On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > > > > > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > > > > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > > > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > > > > > Ability to sending a signal for a sandboxed process should be
> > > > > > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > > > > > we extend "scoped" field in a ruleset with
> > > > > > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > > > > > sending any signal from within a sandbox process to its
> > > > > > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > > > > > [...]
> > > > > > > > > +       if (is_scoped)
> > > > > > > > > +               return 0;
> > > > > > > > > +
> > > > > > > > > +       return -EPERM;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > > > > > +                                   struct fown_struct *fown, int signum)
> > > > > >
> > > > > > I was wondering if we should handle this case, but I guess it makes
> > > > > > sense to have a consistent policy for all kind of user-triggerable
> > > > > > signals.
> > > > > >
> > > > > > > > > +{
> > > > > > > > > +       bool is_scoped;
> > > > > > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > > > > > >
> > > > > > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > > > > > probably give you "result = NULL" if the file owner PID has already
> > > > > > > > exited, and then the following landlock_get_task_domain() would
> > > > > > > > probably crash? But I'm not entirely sure about how this works.
> > > > > > > >
> > > > > > > > I think the intended way to use this hook would be to instead use the
> > > > > > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > > > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > > > > > definitions of that hook. Or alternatively maybe it would be even
> > > > > > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > > > > > euid and then use the domain from those credentials for this hook...
> > > > > > > > I'm not sure which of those would be easier.
> > > > > > >
> > > > > > > (For what it's worth, I think the first option would probably be
> > > > > > > easier to implement and ship for now, since you can basically copy
> > > > > > > what Smack and SELinux are already doing in their implementations of
> > > > > > > these hooks. I think the second option would theoretically result in
> > > > > > > nicer code, but it might require a bit more work, and you'd have to
> > > > > > > include the maintainers of the file locking code in the review of such
> > > > > > > refactoring and have them approve those changes. So if you want to get
> > > > > > > this patchset into the kernel quickly, the first option might be
> > > > > > > better for now?)
> > > > > > >
> > > > > >
> > > > > > I agree, let's extend landlock_file_security with a new "fown" pointer
> > > > > > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > > > > > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > > > > > hook_file_free_security().
> > > > > I think we should add a new hook (hook_file_set_owner()) to initialize
> > > > > the "fown" pointer and call landlock_get_ruleset() in that?
> > > >
> > > > Yeah. Initialize the pointer in the file_set_fowner hook, and read the
> > > > pointer in the file_send_sigiotask hook.
> > > >
> > > > Note that in the file_set_fowner hook, you'll probably need to use
> > > > both landlock_get_ruleset() (to take a reference on the ruleset you're
> > > > storing in the fown pointer) and landlock_put_ruleset() (to drop the
> > > > reference to the ruleset that the fown pointer was pointing to
> > > > before). And you'll need to use some kind of lock to protect the fown
> > > > pointer - either by adding an appropriate lock next to your fown
> > > > pointer or by using some appropriate existing lock in "struct file".
> > > > Probably it's cleanest to have your own lock for this? (This lock will
> > > > have to be something like a spinlock, not a mutex, since you need to
> > > > be able to acquire it in the file_set_fowner hook, which runs inside
> > > > an RCU read-side critical section, where sleeping is forbidden -
> > > > acquiring a mutex can sleep and therefore is forbidden in this
> > > > context, acquiring a spinlock can't sleep.)
> > >
> > > Yes, I think this should work for file_set_fowner:
> > >
> > > struct landlock_ruleset *prev_dom, *new_dom;
> > >
> > > new_dom = landlock_get_current_domain();
> > > landlock_get_ruleset(new_dom);
> > >
> > > /* Cf. f_modown() */
> > > write_lock_irq(&filp->f_owner.lock);
> > > prev_dom = rcu_replace_pointer(&landlock_file(file)->fown_domain,
> > >         new_dom, lockdep_is_held(&filp->f_owner.lock));
> > > write_unlock_irq(&filp->f_owner.lock);
> > >
> > > landlock_put_ruleset_rcu(prev_dom);
> > >
> > >
> > > With landlock_put_ruleset_rcu() define with this:
> > >
> > > diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
> > > index a93bdbf52fff..897116205520 100644
> > > --- a/security/landlock/ruleset.c
> > > +++ b/security/landlock/ruleset.c
> > > @@ -524,6 +524,20 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
> > >         }
> > >  }
> > >
> > > +static void free_ruleset_rcu(struct rcu_head *const head)
> > > +{
> > > +       struct landlock_ruleset *ruleset;
> > > +
> > > +       ruleset = container_of(head, struct landlock_ruleset, rcu);
> > > +       free_ruleset(ruleset);
> > > +}
> > 
> > free_ruleset() can block but RCU callbacks aren't allowed to block,
> > that's why landlock_put_ruleset_deferred() exists.
> 
> Yes, but landlock_put_ruleset_deferred() doesn't wait for RCU read-side
> critical sections.
> 
> > 
> > > +
> > > +void landlock_put_ruleset_rcu(struct landlock_ruleset *const ruleset)
> > > +{
> > > +       if (ruleset && refcount_dec_and_test(&ruleset->usage))
> > > +               call_rcu(&ruleset->rcu, free_ruleset_rcu);
> > > +}
> > 
> > No, this pattern of combining refcounting and RCU doesn't work.
> > 
> > One legal pattern is:
> > *The entire object* is subject to RCU; any refcount decrement that
> > drops the refcount to 0 does call_rcu().
> > (This is the usual RCU refcounting pattern in the kernel.)
> > 
> > Another legal pattern is:
> > One particular *reference* is subject to RCU; when dropping this
> > reference, *the refcount decrement is delayed with call_rcu()*.
> > (This is basically the RCU pattern used for stuff like the reference
> > from "struct pid" to "struct task_struct".)
> > 
> > But you can't use call_rcu() depending on whether the last dropped
> > reference happened to be a reference that required RCU; what if the
> > refcount is 2, then you first call landlock_put_ruleset_rcu() which
> > decrements the refcount to 1, and immediately afterwards you call
> > landlock_put_ruleset() which drops the refcount to 0 and frees the
> > object without waiting for an RCU grace period? Like so:
> > 
> > thread A         thread B
> > ========         ========
> > rcu_read_lock()
> > ruleset = rcu_dereference(...->fown_domain)
> >                  ruleset = rcu_replace_pointer(...->fown_domain, new_dom, ...)
> >                  landlock_put_ruleset_rcu(ruleset)
> >                  landlock_put_ruleset(ruleset)
> >                    free_ruleset(ruleset)
> >                      kfree(ruleset)
> > access ruleset [UAF]
> > rcu_read_unlock()
> 
> Indeed
> 
> > 
> > So if you want to use RCU lifetime for this, I think you'll have to
> > turn landlock_put_ruleset() and landlock_put_ruleset_deferred() into
> > one common function that always, when reaching refcount 0, schedules
> > an RCU callback which then schedules a work_struct which then does
> > free_ruleset().
> > 
> > I think that would be a little ugly, and it would look nicer to just
> > use normal locking in the file_send_sigiotask hook?
> 
> I don't see how we can do that without delaying the free_ruleset() call
> to after the RCU read-side critical section in f_setown().
> 
> What about calling refcount_dec_and_test() in free_ruleset_rcu()?  That
> would almost always queue this call but it looks safe.

That's the second pattern you pointed out just above.  Because we should
only use the related struct rcu_head for this call site (which is
protected with f_owner.lock), we should make that explicit with a
"_fown" suffix for the function (landlock_put_ruleset_rcu_fown) and the
ruleset's field (rcu_fown).

> 
> An alternative might be to call synchronize_rcu() in free_ruleset(), but
> it's a big ugly too.
> 
> BTW, I don't understand why neither SELinux nor Smack use (explicit)
> atomic operations nor lock.  And it looks weird that
> security_file_set_fowner() isn't called by f_modown() with the same
> locking to avoid races.
Jann Horn Aug. 9, 2024, 12:44 p.m. UTC | #12
On Fri, Aug 9, 2024 at 12:59 PM Mickaël Salaün <mic@digikod.net> wrote:
> On Thu, Aug 08, 2024 at 04:42:23PM +0200, Jann Horn wrote:
> > On Thu, Aug 8, 2024 at 4:09 PM Mickaël Salaün <mic@digikod.net> wrote:
> > > On Thu, Aug 08, 2024 at 03:10:54AM +0200, Jann Horn wrote:
> > > > On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > > > > > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > > > > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > > > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > > > > > Ability to sending a signal for a sandboxed process should be
> > > > > > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > > > > > we extend "scoped" field in a ruleset with
> > > > > > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > > > > > sending any signal from within a sandbox process to its
> > > > > > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > > > > > [...]
> > > > > > > > > +       if (is_scoped)
> > > > > > > > > +               return 0;
> > > > > > > > > +
> > > > > > > > > +       return -EPERM;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > > > > > +                                   struct fown_struct *fown, int signum)
> > > > > >
> > > > > > I was wondering if we should handle this case, but I guess it makes
> > > > > > sense to have a consistent policy for all kind of user-triggerable
> > > > > > signals.
> > > > > >
> > > > > > > > > +{
> > > > > > > > > +       bool is_scoped;
> > > > > > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > > > > > >
> > > > > > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > > > > > probably give you "result = NULL" if the file owner PID has already
> > > > > > > > exited, and then the following landlock_get_task_domain() would
> > > > > > > > probably crash? But I'm not entirely sure about how this works.
> > > > > > > >
> > > > > > > > I think the intended way to use this hook would be to instead use the
> > > > > > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > > > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > > > > > definitions of that hook. Or alternatively maybe it would be even
> > > > > > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > > > > > euid and then use the domain from those credentials for this hook...
> > > > > > > > I'm not sure which of those would be easier.
> > > > > > >
> > > > > > > (For what it's worth, I think the first option would probably be
> > > > > > > easier to implement and ship for now, since you can basically copy
> > > > > > > what Smack and SELinux are already doing in their implementations of
> > > > > > > these hooks. I think the second option would theoretically result in
> > > > > > > nicer code, but it might require a bit more work, and you'd have to
> > > > > > > include the maintainers of the file locking code in the review of such
> > > > > > > refactoring and have them approve those changes. So if you want to get
> > > > > > > this patchset into the kernel quickly, the first option might be
> > > > > > > better for now?)
> > > > > > >
> > > > > >
> > > > > > I agree, let's extend landlock_file_security with a new "fown" pointer
> > > > > > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > > > > > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > > > > > hook_file_free_security().
> > > > > I think we should add a new hook (hook_file_set_owner()) to initialize
> > > > > the "fown" pointer and call landlock_get_ruleset() in that?
> > > >
> > > > Yeah. Initialize the pointer in the file_set_fowner hook, and read the
> > > > pointer in the file_send_sigiotask hook.
> > > >
> > > > Note that in the file_set_fowner hook, you'll probably need to use
> > > > both landlock_get_ruleset() (to take a reference on the ruleset you're
> > > > storing in the fown pointer) and landlock_put_ruleset() (to drop the
> > > > reference to the ruleset that the fown pointer was pointing to
> > > > before). And you'll need to use some kind of lock to protect the fown
> > > > pointer - either by adding an appropriate lock next to your fown
> > > > pointer or by using some appropriate existing lock in "struct file".
> > > > Probably it's cleanest to have your own lock for this? (This lock will
> > > > have to be something like a spinlock, not a mutex, since you need to
> > > > be able to acquire it in the file_set_fowner hook, which runs inside
> > > > an RCU read-side critical section, where sleeping is forbidden -
> > > > acquiring a mutex can sleep and therefore is forbidden in this
> > > > context, acquiring a spinlock can't sleep.)
> > >
> > > Yes, I think this should work for file_set_fowner:
> > >
> > > struct landlock_ruleset *prev_dom, *new_dom;
> > >
> > > new_dom = landlock_get_current_domain();
> > > landlock_get_ruleset(new_dom);
> > >
> > > /* Cf. f_modown() */
> > > write_lock_irq(&filp->f_owner.lock);
> > > prev_dom = rcu_replace_pointer(&landlock_file(file)->fown_domain,
> > >         new_dom, lockdep_is_held(&filp->f_owner.lock));
> > > write_unlock_irq(&filp->f_owner.lock);
> > >
> > > landlock_put_ruleset_rcu(prev_dom);
> > >
> > >
> > > With landlock_put_ruleset_rcu() define with this:
> > >
> > > diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
> > > index a93bdbf52fff..897116205520 100644
> > > --- a/security/landlock/ruleset.c
> > > +++ b/security/landlock/ruleset.c
> > > @@ -524,6 +524,20 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
> > >         }
> > >  }
> > >
> > > +static void free_ruleset_rcu(struct rcu_head *const head)
> > > +{
> > > +       struct landlock_ruleset *ruleset;
> > > +
> > > +       ruleset = container_of(head, struct landlock_ruleset, rcu);
> > > +       free_ruleset(ruleset);
> > > +}
> >
> > free_ruleset() can block but RCU callbacks aren't allowed to block,
> > that's why landlock_put_ruleset_deferred() exists.
>
> Yes, but landlock_put_ruleset_deferred() doesn't wait for RCU read-side
> critical sections.

Ah, I phrased that badly - I didn't mean to suggest that you should
use landlock_put_ruleset_deferred() as a replacement for call_rcu().

[...]
> > So if you want to use RCU lifetime for this, I think you'll have to
> > turn landlock_put_ruleset() and landlock_put_ruleset_deferred() into
> > one common function that always, when reaching refcount 0, schedules
> > an RCU callback which then schedules a work_struct which then does
> > free_ruleset().
> >
> > I think that would be a little ugly, and it would look nicer to just
> > use normal locking in the file_send_sigiotask hook?
>
> I don't see how we can do that without delaying the free_ruleset() call
> to after the RCU read-side critical section in f_setown().

It should work if you used landlock_put_ruleset_deferred() instead of
landlock_put_ruleset().

> What about calling refcount_dec_and_test() in free_ruleset_rcu()?  That
> would almost always queue this call but it looks safe.

Every queued RCU invocation needs to have its own rcu_head - I think
the approach you're suggesting could end up queuing the same rcu_head
multiple times?

> An alternative might be to call synchronize_rcu() in free_ruleset(), but
> it's a big ugly too.
>
> BTW, I don't understand why neither SELinux nor Smack use (explicit)
> atomic operations nor lock.

Yeah, I think they're sloppy and kinda wrong - but it sorta works in
practice mostly because they don't have to do any refcounting around
this?

> And it looks weird that
> security_file_set_fowner() isn't called by f_modown() with the same
> locking to avoid races.

True. I imagine maybe the thought behind this design could have been
that LSMs should have their own locking, and that calling an LSM hook
with IRQs off is a little weird? But the way the LSMs actually use the
hook now, it might make sense to call the LSM with the lock held and
IRQs off...
Mickaël Salaün Aug. 9, 2024, 12:45 p.m. UTC | #13
On Fri, Aug 09, 2024 at 02:40:17PM +0200, Mickaël Salaün wrote:
> On Fri, Aug 09, 2024 at 12:59:48PM +0200, Mickaël Salaün wrote:
> > On Thu, Aug 08, 2024 at 04:42:23PM +0200, Jann Horn wrote:
> > > On Thu, Aug 8, 2024 at 4:09 PM Mickaël Salaün <mic@digikod.net> wrote:
> > > > On Thu, Aug 08, 2024 at 03:10:54AM +0200, Jann Horn wrote:
> > > > > On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > > > > > > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > > > > > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > > > > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > > > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > > > > > > Ability to sending a signal for a sandboxed process should be
> > > > > > > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > > > > > > we extend "scoped" field in a ruleset with
> > > > > > > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > > > > > > sending any signal from within a sandbox process to its
> > > > > > > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > > > > > > [...]
> > > > > > > > > > +       if (is_scoped)
> > > > > > > > > > +               return 0;
> > > > > > > > > > +
> > > > > > > > > > +       return -EPERM;
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > > > > > > +                                   struct fown_struct *fown, int signum)
> > > > > > >
> > > > > > > I was wondering if we should handle this case, but I guess it makes
> > > > > > > sense to have a consistent policy for all kind of user-triggerable
> > > > > > > signals.
> > > > > > >
> > > > > > > > > > +{
> > > > > > > > > > +       bool is_scoped;
> > > > > > > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > > > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > > > > > > >
> > > > > > > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > > > > > > probably give you "result = NULL" if the file owner PID has already
> > > > > > > > > exited, and then the following landlock_get_task_domain() would
> > > > > > > > > probably crash? But I'm not entirely sure about how this works.
> > > > > > > > >
> > > > > > > > > I think the intended way to use this hook would be to instead use the
> > > > > > > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > > > > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > > > > > > definitions of that hook. Or alternatively maybe it would be even
> > > > > > > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > > > > > > euid and then use the domain from those credentials for this hook...
> > > > > > > > > I'm not sure which of those would be easier.
> > > > > > > >
> > > > > > > > (For what it's worth, I think the first option would probably be
> > > > > > > > easier to implement and ship for now, since you can basically copy
> > > > > > > > what Smack and SELinux are already doing in their implementations of
> > > > > > > > these hooks. I think the second option would theoretically result in
> > > > > > > > nicer code, but it might require a bit more work, and you'd have to
> > > > > > > > include the maintainers of the file locking code in the review of such
> > > > > > > > refactoring and have them approve those changes. So if you want to get
> > > > > > > > this patchset into the kernel quickly, the first option might be
> > > > > > > > better for now?)
> > > > > > > >
> > > > > > >
> > > > > > > I agree, let's extend landlock_file_security with a new "fown" pointer
> > > > > > > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > > > > > > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > > > > > > hook_file_free_security().
> > > > > > I think we should add a new hook (hook_file_set_owner()) to initialize
> > > > > > the "fown" pointer and call landlock_get_ruleset() in that?
> > > > >
> > > > > Yeah. Initialize the pointer in the file_set_fowner hook, and read the
> > > > > pointer in the file_send_sigiotask hook.
> > > > >
> > > > > Note that in the file_set_fowner hook, you'll probably need to use
> > > > > both landlock_get_ruleset() (to take a reference on the ruleset you're
> > > > > storing in the fown pointer) and landlock_put_ruleset() (to drop the
> > > > > reference to the ruleset that the fown pointer was pointing to
> > > > > before). And you'll need to use some kind of lock to protect the fown
> > > > > pointer - either by adding an appropriate lock next to your fown
> > > > > pointer or by using some appropriate existing lock in "struct file".
> > > > > Probably it's cleanest to have your own lock for this? (This lock will
> > > > > have to be something like a spinlock, not a mutex, since you need to
> > > > > be able to acquire it in the file_set_fowner hook, which runs inside
> > > > > an RCU read-side critical section, where sleeping is forbidden -
> > > > > acquiring a mutex can sleep and therefore is forbidden in this
> > > > > context, acquiring a spinlock can't sleep.)
> > > >
> > > > Yes, I think this should work for file_set_fowner:
> > > >
> > > > struct landlock_ruleset *prev_dom, *new_dom;
> > > >
> > > > new_dom = landlock_get_current_domain();
> > > > landlock_get_ruleset(new_dom);
> > > >
> > > > /* Cf. f_modown() */
> > > > write_lock_irq(&filp->f_owner.lock);
> > > > prev_dom = rcu_replace_pointer(&landlock_file(file)->fown_domain,
> > > >         new_dom, lockdep_is_held(&filp->f_owner.lock));
> > > > write_unlock_irq(&filp->f_owner.lock);
> > > >
> > > > landlock_put_ruleset_rcu(prev_dom);
> > > >
> > > >
> > > > With landlock_put_ruleset_rcu() define with this:
> > > >
> > > > diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
> > > > index a93bdbf52fff..897116205520 100644
> > > > --- a/security/landlock/ruleset.c
> > > > +++ b/security/landlock/ruleset.c
> > > > @@ -524,6 +524,20 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
> > > >         }
> > > >  }
> > > >
> > > > +static void free_ruleset_rcu(struct rcu_head *const head)
> > > > +{
> > > > +       struct landlock_ruleset *ruleset;
> > > > +
> > > > +       ruleset = container_of(head, struct landlock_ruleset, rcu);
> > > > +       free_ruleset(ruleset);
> > > > +}
> > > 
> > > free_ruleset() can block but RCU callbacks aren't allowed to block,
> > > that's why landlock_put_ruleset_deferred() exists.
> > 
> > Yes, but landlock_put_ruleset_deferred() doesn't wait for RCU read-side
> > critical sections.
> > 
> > > 
> > > > +
> > > > +void landlock_put_ruleset_rcu(struct landlock_ruleset *const ruleset)
> > > > +{
> > > > +       if (ruleset && refcount_dec_and_test(&ruleset->usage))
> > > > +               call_rcu(&ruleset->rcu, free_ruleset_rcu);
> > > > +}
> > > 
> > > No, this pattern of combining refcounting and RCU doesn't work.
> > > 
> > > One legal pattern is:
> > > *The entire object* is subject to RCU; any refcount decrement that
> > > drops the refcount to 0 does call_rcu().
> > > (This is the usual RCU refcounting pattern in the kernel.)
> > > 
> > > Another legal pattern is:
> > > One particular *reference* is subject to RCU; when dropping this
> > > reference, *the refcount decrement is delayed with call_rcu()*.
> > > (This is basically the RCU pattern used for stuff like the reference
> > > from "struct pid" to "struct task_struct".)
> > > 
> > > But you can't use call_rcu() depending on whether the last dropped
> > > reference happened to be a reference that required RCU; what if the
> > > refcount is 2, then you first call landlock_put_ruleset_rcu() which
> > > decrements the refcount to 1, and immediately afterwards you call
> > > landlock_put_ruleset() which drops the refcount to 0 and frees the
> > > object without waiting for an RCU grace period? Like so:
> > > 
> > > thread A         thread B
> > > ========         ========
> > > rcu_read_lock()
> > > ruleset = rcu_dereference(...->fown_domain)
> > >                  ruleset = rcu_replace_pointer(...->fown_domain, new_dom, ...)
> > >                  landlock_put_ruleset_rcu(ruleset)
> > >                  landlock_put_ruleset(ruleset)
> > >                    free_ruleset(ruleset)
> > >                      kfree(ruleset)
> > > access ruleset [UAF]
> > > rcu_read_unlock()
> > 
> > Indeed
> > 
> > > 
> > > So if you want to use RCU lifetime for this, I think you'll have to
> > > turn landlock_put_ruleset() and landlock_put_ruleset_deferred() into
> > > one common function that always, when reaching refcount 0, schedules
> > > an RCU callback which then schedules a work_struct which then does
> > > free_ruleset().
> > > 
> > > I think that would be a little ugly, and it would look nicer to just
> > > use normal locking in the file_send_sigiotask hook?
> > 
> > I don't see how we can do that without delaying the free_ruleset() call
> > to after the RCU read-side critical section in f_setown().
> > 
> > What about calling refcount_dec_and_test() in free_ruleset_rcu()?  That
> > would almost always queue this call but it looks safe.
> 
> That's the second pattern you pointed out just above.  Because we should
> only use the related struct rcu_head for this call site (which is
> protected with f_owner.lock), we should make that explicit with a
> "_fown" suffix for the function (landlock_put_ruleset_rcu_fown) and the
> ruleset's field (rcu_fown).

Hmm, this is not enough because this function site could still be called
with the same ruleset several time, which could lead to memory leak and
potential rcu_head pointers inconsistencies...

> 
> > 
> > An alternative might be to call synchronize_rcu() in free_ruleset(), but
> > it's a big ugly too.
> > 
> > BTW, I don't understand why neither SELinux nor Smack use (explicit)
> > atomic operations nor lock.  And it looks weird that
> > security_file_set_fowner() isn't called by f_modown() with the same
> > locking to avoid races.
Mickaël Salaün Aug. 9, 2024, 1:17 p.m. UTC | #14
Talking about f_modown() and security_file_set_fowner(), it looks like
there are some issues:

On Fri, Aug 09, 2024 at 02:44:06PM +0200, Jann Horn wrote:
> On Fri, Aug 9, 2024 at 12:59 PM Mickaël Salaün <mic@digikod.net> wrote:

[...]

> > BTW, I don't understand why neither SELinux nor Smack use (explicit)
> > atomic operations nor lock.
> 
> Yeah, I think they're sloppy and kinda wrong - but it sorta works in
> practice mostly because they don't have to do any refcounting around
> this?
> 
> > And it looks weird that
> > security_file_set_fowner() isn't called by f_modown() with the same
> > locking to avoid races.
> 
> True. I imagine maybe the thought behind this design could have been
> that LSMs should have their own locking, and that calling an LSM hook
> with IRQs off is a little weird? But the way the LSMs actually use the
> hook now, it might make sense to call the LSM with the lock held and
> IRQs off...
> 

Would it be OK (for VFS, SELinux, and Smack maintainers) to move the
security_file_set_fowner() call into f_modown(), especially where
UID/EUID are populated.  That would only call security_file_set_fowner()
when the fown is actually set, which I think could also fix a bug for
SELinux and Smack.

Could we replace the uid and euid fields with a pointer to the current
credentials?  This would enables LSMs to not copy the same kind of
credential informations and save some memory, simplify credential
management, and improve consistency.
Mickaël Salaün Aug. 9, 2024, 1:37 p.m. UTC | #15
On Fri, Aug 09, 2024 at 02:44:06PM +0200, Jann Horn wrote:
> On Fri, Aug 9, 2024 at 12:59 PM Mickaël Salaün <mic@digikod.net> wrote:
> > On Thu, Aug 08, 2024 at 04:42:23PM +0200, Jann Horn wrote:
> > > On Thu, Aug 8, 2024 at 4:09 PM Mickaël Salaün <mic@digikod.net> wrote:
> > > > On Thu, Aug 08, 2024 at 03:10:54AM +0200, Jann Horn wrote:
> > > > > On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > > > > > > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > > > > > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > > > > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > > > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > > > > > > Ability to sending a signal for a sandboxed process should be
> > > > > > > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > > > > > > we extend "scoped" field in a ruleset with
> > > > > > > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > > > > > > sending any signal from within a sandbox process to its
> > > > > > > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > > > > > > [...]
> > > > > > > > > > +       if (is_scoped)
> > > > > > > > > > +               return 0;
> > > > > > > > > > +
> > > > > > > > > > +       return -EPERM;
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > > > > > > +                                   struct fown_struct *fown, int signum)
> > > > > > >
> > > > > > > I was wondering if we should handle this case, but I guess it makes
> > > > > > > sense to have a consistent policy for all kind of user-triggerable
> > > > > > > signals.
> > > > > > >
> > > > > > > > > > +{
> > > > > > > > > > +       bool is_scoped;
> > > > > > > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > > > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > > > > > > >
> > > > > > > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > > > > > > probably give you "result = NULL" if the file owner PID has already
> > > > > > > > > exited, and then the following landlock_get_task_domain() would
> > > > > > > > > probably crash? But I'm not entirely sure about how this works.
> > > > > > > > >
> > > > > > > > > I think the intended way to use this hook would be to instead use the
> > > > > > > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > > > > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > > > > > > definitions of that hook. Or alternatively maybe it would be even
> > > > > > > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > > > > > > euid and then use the domain from those credentials for this hook...
> > > > > > > > > I'm not sure which of those would be easier.
> > > > > > > >
> > > > > > > > (For what it's worth, I think the first option would probably be
> > > > > > > > easier to implement and ship for now, since you can basically copy
> > > > > > > > what Smack and SELinux are already doing in their implementations of
> > > > > > > > these hooks. I think the second option would theoretically result in
> > > > > > > > nicer code, but it might require a bit more work, and you'd have to
> > > > > > > > include the maintainers of the file locking code in the review of such
> > > > > > > > refactoring and have them approve those changes. So if you want to get
> > > > > > > > this patchset into the kernel quickly, the first option might be
> > > > > > > > better for now?)
> > > > > > > >
> > > > > > >
> > > > > > > I agree, let's extend landlock_file_security with a new "fown" pointer
> > > > > > > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > > > > > > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > > > > > > hook_file_free_security().
> > > > > > I think we should add a new hook (hook_file_set_owner()) to initialize
> > > > > > the "fown" pointer and call landlock_get_ruleset() in that?
> > > > >
> > > > > Yeah. Initialize the pointer in the file_set_fowner hook, and read the
> > > > > pointer in the file_send_sigiotask hook.
> > > > >
> > > > > Note that in the file_set_fowner hook, you'll probably need to use
> > > > > both landlock_get_ruleset() (to take a reference on the ruleset you're
> > > > > storing in the fown pointer) and landlock_put_ruleset() (to drop the
> > > > > reference to the ruleset that the fown pointer was pointing to
> > > > > before). And you'll need to use some kind of lock to protect the fown
> > > > > pointer - either by adding an appropriate lock next to your fown
> > > > > pointer or by using some appropriate existing lock in "struct file".
> > > > > Probably it's cleanest to have your own lock for this? (This lock will
> > > > > have to be something like a spinlock, not a mutex, since you need to
> > > > > be able to acquire it in the file_set_fowner hook, which runs inside
> > > > > an RCU read-side critical section, where sleeping is forbidden -
> > > > > acquiring a mutex can sleep and therefore is forbidden in this
> > > > > context, acquiring a spinlock can't sleep.)
> > > >
> > > > Yes, I think this should work for file_set_fowner:
> > > >
> > > > struct landlock_ruleset *prev_dom, *new_dom;
> > > >
> > > > new_dom = landlock_get_current_domain();
> > > > landlock_get_ruleset(new_dom);
> > > >
> > > > /* Cf. f_modown() */
> > > > write_lock_irq(&filp->f_owner.lock);
> > > > prev_dom = rcu_replace_pointer(&landlock_file(file)->fown_domain,
> > > >         new_dom, lockdep_is_held(&filp->f_owner.lock));
> > > > write_unlock_irq(&filp->f_owner.lock);
> > > >
> > > > landlock_put_ruleset_rcu(prev_dom);
> > > >
> > > >
> > > > With landlock_put_ruleset_rcu() define with this:
> > > >
> > > > diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
> > > > index a93bdbf52fff..897116205520 100644
> > > > --- a/security/landlock/ruleset.c
> > > > +++ b/security/landlock/ruleset.c
> > > > @@ -524,6 +524,20 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
> > > >         }
> > > >  }
> > > >
> > > > +static void free_ruleset_rcu(struct rcu_head *const head)
> > > > +{
> > > > +       struct landlock_ruleset *ruleset;
> > > > +
> > > > +       ruleset = container_of(head, struct landlock_ruleset, rcu);
> > > > +       free_ruleset(ruleset);
> > > > +}
> > >
> > > free_ruleset() can block but RCU callbacks aren't allowed to block,
> > > that's why landlock_put_ruleset_deferred() exists.
> >
> > Yes, but landlock_put_ruleset_deferred() doesn't wait for RCU read-side
> > critical sections.
> 
> Ah, I phrased that badly - I didn't mean to suggest that you should
> use landlock_put_ruleset_deferred() as a replacement for call_rcu().
> 
> [...]
> > > So if you want to use RCU lifetime for this, I think you'll have to
> > > turn landlock_put_ruleset() and landlock_put_ruleset_deferred() into
> > > one common function that always, when reaching refcount 0, schedules
> > > an RCU callback which then schedules a work_struct which then does
> > > free_ruleset().
> > >
> > > I think that would be a little ugly, and it would look nicer to just
> > > use normal locking in the file_send_sigiotask hook?
> >
> > I don't see how we can do that without delaying the free_ruleset() call
> > to after the RCU read-side critical section in f_setown().
> 
> It should work if you used landlock_put_ruleset_deferred() instead of
> landlock_put_ruleset().

Calling landlock_put_ruleset_deferred() in hook_file_set_fowner() or
replacing all landlock_put_ruleset() calls?

The deferred work queue is not guarantee to run after all concurrent RCU
read-side critical sections right?  Calling synchronize_rcu() in
free_ruleset_work() should give this guarantee, but it's not nice.  We
could add a boolean in landlock_ruleset to only call synchronize_rcu()
when required (i.e. called from file_set_fowner).

> 
> > What about calling refcount_dec_and_test() in free_ruleset_rcu()?  That
> > would almost always queue this call but it looks safe.
> 
> Every queued RCU invocation needs to have its own rcu_head - I think
> the approach you're suggesting could end up queuing the same rcu_head
> multiple times?

Right

> 
> > An alternative might be to call synchronize_rcu() in free_ruleset(), but
> > it's a big ugly too.
Jann Horn Aug. 9, 2024, 1:57 p.m. UTC | #16
On Fri, Aug 9, 2024 at 3:37 PM Mickaël Salaün <mic@digikod.net> wrote:
> On Fri, Aug 09, 2024 at 02:44:06PM +0200, Jann Horn wrote:
> > On Fri, Aug 9, 2024 at 12:59 PM Mickaël Salaün <mic@digikod.net> wrote:
> > > On Thu, Aug 08, 2024 at 04:42:23PM +0200, Jann Horn wrote:
[...]
> > > > So if you want to use RCU lifetime for this, I think you'll have to
> > > > turn landlock_put_ruleset() and landlock_put_ruleset_deferred() into
> > > > one common function that always, when reaching refcount 0, schedules
> > > > an RCU callback which then schedules a work_struct which then does
> > > > free_ruleset().
> > > >
> > > > I think that would be a little ugly, and it would look nicer to just
> > > > use normal locking in the file_send_sigiotask hook?
> > >
> > > I don't see how we can do that without delaying the free_ruleset() call
> > > to after the RCU read-side critical section in f_setown().
> >
> > It should work if you used landlock_put_ruleset_deferred() instead of
> > landlock_put_ruleset().
>
> Calling landlock_put_ruleset_deferred() in hook_file_set_fowner() or
> replacing all landlock_put_ruleset() calls?

Calling landlock_put_ruleset_deferred() in hook_file_set_fowner().

> The deferred work queue is not guarantee to run after all concurrent RCU
> read-side critical sections right?

Yes, I was talking about my "it would look nicer to just use normal
locking in the file_send_sigiotask hook" suggestion - don't use any
RCU stuff, just use the same lock in file_set_fowner and
file_send_sigiotask.
Jann Horn Aug. 9, 2024, 2 p.m. UTC | #17
On Fri, Aug 9, 2024 at 3:18 PM Mickaël Salaün <mic@digikod.net> wrote:
> Talking about f_modown() and security_file_set_fowner(), it looks like
> there are some issues:
>
> On Fri, Aug 09, 2024 at 02:44:06PM +0200, Jann Horn wrote:
> > On Fri, Aug 9, 2024 at 12:59 PM Mickaël Salaün <mic@digikod.net> wrote:
>
> [...]
>
> > > BTW, I don't understand why neither SELinux nor Smack use (explicit)
> > > atomic operations nor lock.
> >
> > Yeah, I think they're sloppy and kinda wrong - but it sorta works in
> > practice mostly because they don't have to do any refcounting around
> > this?
> >
> > > And it looks weird that
> > > security_file_set_fowner() isn't called by f_modown() with the same
> > > locking to avoid races.
> >
> > True. I imagine maybe the thought behind this design could have been
> > that LSMs should have their own locking, and that calling an LSM hook
> > with IRQs off is a little weird? But the way the LSMs actually use the
> > hook now, it might make sense to call the LSM with the lock held and
> > IRQs off...
> >
>
> Would it be OK (for VFS, SELinux, and Smack maintainers) to move the
> security_file_set_fowner() call into f_modown(), especially where
> UID/EUID are populated.  That would only call security_file_set_fowner()
> when the fown is actually set, which I think could also fix a bug for
> SELinux and Smack.
>
> Could we replace the uid and euid fields with a pointer to the current
> credentials?  This would enables LSMs to not copy the same kind of
> credential informations and save some memory, simplify credential
> management, and improve consistency.

To clarify: These two paragraphs are supposed to be two alternative
options, right? One option is to call security_file_set_fowner() with
the lock held, the other option is to completely rip out the
security_file_set_fowner() hook and instead let the VFS provide LSMs
with the creds they need for the file_send_sigiotask hook?
Christian Brauner Aug. 9, 2024, 2:44 p.m. UTC | #18
On Fri, Aug 09, 2024 at 04:00:41PM GMT, Jann Horn wrote:
> On Fri, Aug 9, 2024 at 3:18 PM Mickaël Salaün <mic@digikod.net> wrote:
> > Talking about f_modown() and security_file_set_fowner(), it looks like
> > there are some issues:
> >
> > On Fri, Aug 09, 2024 at 02:44:06PM +0200, Jann Horn wrote:
> > > On Fri, Aug 9, 2024 at 12:59 PM Mickaël Salaün <mic@digikod.net> wrote:
> >
> > [...]
> >
> > > > BTW, I don't understand why neither SELinux nor Smack use (explicit)
> > > > atomic operations nor lock.
> > >
> > > Yeah, I think they're sloppy and kinda wrong - but it sorta works in
> > > practice mostly because they don't have to do any refcounting around
> > > this?
> > >
> > > > And it looks weird that
> > > > security_file_set_fowner() isn't called by f_modown() with the same
> > > > locking to avoid races.
> > >
> > > True. I imagine maybe the thought behind this design could have been
> > > that LSMs should have their own locking, and that calling an LSM hook
> > > with IRQs off is a little weird? But the way the LSMs actually use the
> > > hook now, it might make sense to call the LSM with the lock held and
> > > IRQs off...
> > >
> >
> > Would it be OK (for VFS, SELinux, and Smack maintainers) to move the
> > security_file_set_fowner() call into f_modown(), especially where
> > UID/EUID are populated.  That would only call security_file_set_fowner()
> > when the fown is actually set, which I think could also fix a bug for
> > SELinux and Smack.
> >
> > Could we replace the uid and euid fields with a pointer to the current
> > credentials?  This would enables LSMs to not copy the same kind of
> > credential informations and save some memory, simplify credential
> > management, and improve consistency.
> 
> To clarify: These two paragraphs are supposed to be two alternative
> options, right? One option is to call security_file_set_fowner() with
> the lock held, the other option is to completely rip out the
> security_file_set_fowner() hook and instead let the VFS provide LSMs
> with the creds they need for the file_send_sigiotask hook?

I think it would be fine to stash the credentials in struct fown_struct
same as we do for struct file itself. Calling security_file_set_fowner()
with the irq lock held seems suboptimal to me. Plus, this also means one
less LSM hook and that seems like a win too.
Paul Moore Aug. 11, 2024, 10:04 p.m. UTC | #19
On Fri, Aug 9, 2024 at 10:01 AM Jann Horn <jannh@google.com> wrote:
> On Fri, Aug 9, 2024 at 3:18 PM Mickaël Salaün <mic@digikod.net> wrote:
> > Talking about f_modown() and security_file_set_fowner(), it looks like
> > there are some issues:
> >
> > On Fri, Aug 09, 2024 at 02:44:06PM +0200, Jann Horn wrote:
> > > On Fri, Aug 9, 2024 at 12:59 PM Mickaël Salaün <mic@digikod.net> wrote:
> >
> > [...]
> >
> > > > BTW, I don't understand why neither SELinux nor Smack use (explicit)
> > > > atomic operations nor lock.
> > >
> > > Yeah, I think they're sloppy and kinda wrong - but it sorta works in
> > > practice mostly because they don't have to do any refcounting around
> > > this?
> > >
> > > > And it looks weird that
> > > > security_file_set_fowner() isn't called by f_modown() with the same
> > > > locking to avoid races.
> > >
> > > True. I imagine maybe the thought behind this design could have been
> > > that LSMs should have their own locking, and that calling an LSM hook
> > > with IRQs off is a little weird? But the way the LSMs actually use the
> > > hook now, it might make sense to call the LSM with the lock held and
> > > IRQs off...
> > >
> >
> > Would it be OK (for VFS, SELinux, and Smack maintainers) to move the
> > security_file_set_fowner() call into f_modown(), especially where
> > UID/EUID are populated.  That would only call security_file_set_fowner()
> > when the fown is actually set, which I think could also fix a bug for
> > SELinux and Smack.
> >
> > Could we replace the uid and euid fields with a pointer to the current
> > credentials?  This would enables LSMs to not copy the same kind of
> > credential informations and save some memory, simplify credential
> > management, and improve consistency.
>
> To clarify: These two paragraphs are supposed to be two alternative
> options, right? One option is to call security_file_set_fowner() with
> the lock held, the other option is to completely rip out the
> security_file_set_fowner() hook and instead let the VFS provide LSMs
> with the creds they need for the file_send_sigiotask hook?

I'm not entirely clear on what is being proposed either.  Some quick
pseudo code might do wonders here to help clarify things.

From a LSM perspective I suspect we are always going to need some sort
of hook in the F_SETOWN code path as the LSM needs to potentially
capture state/attributes/something-LSM-specific at that
context/point-in-time.  While I think it is okay if we want to
consider relocating the security_file_set_fowner() within the F_SETOWN
call path, I don't think we can remove it, even if we add additional
LSM security blobs.
Jann Horn Aug. 12, 2024, 1:09 p.m. UTC | #20
On Mon, Aug 12, 2024 at 12:04 AM Paul Moore <paul@paul-moore.com> wrote:
>
> On Fri, Aug 9, 2024 at 10:01 AM Jann Horn <jannh@google.com> wrote:
> > On Fri, Aug 9, 2024 at 3:18 PM Mickaël Salaün <mic@digikod.net> wrote:
> > > Talking about f_modown() and security_file_set_fowner(), it looks like
> > > there are some issues:
> > >
> > > On Fri, Aug 09, 2024 at 02:44:06PM +0200, Jann Horn wrote:
> > > > On Fri, Aug 9, 2024 at 12:59 PM Mickaël Salaün <mic@digikod.net> wrote:
> > >
> > > [...]
> > >
> > > > > BTW, I don't understand why neither SELinux nor Smack use (explicit)
> > > > > atomic operations nor lock.
> > > >
> > > > Yeah, I think they're sloppy and kinda wrong - but it sorta works in
> > > > practice mostly because they don't have to do any refcounting around
> > > > this?
> > > >
> > > > > And it looks weird that
> > > > > security_file_set_fowner() isn't called by f_modown() with the same
> > > > > locking to avoid races.
> > > >
> > > > True. I imagine maybe the thought behind this design could have been
> > > > that LSMs should have their own locking, and that calling an LSM hook
> > > > with IRQs off is a little weird? But the way the LSMs actually use the
> > > > hook now, it might make sense to call the LSM with the lock held and
> > > > IRQs off...
> > > >
> > >
> > > Would it be OK (for VFS, SELinux, and Smack maintainers) to move the
> > > security_file_set_fowner() call into f_modown(), especially where
> > > UID/EUID are populated.  That would only call security_file_set_fowner()
> > > when the fown is actually set, which I think could also fix a bug for
> > > SELinux and Smack.
> > >
> > > Could we replace the uid and euid fields with a pointer to the current
> > > credentials?  This would enables LSMs to not copy the same kind of
> > > credential informations and save some memory, simplify credential
> > > management, and improve consistency.
> >
> > To clarify: These two paragraphs are supposed to be two alternative
> > options, right? One option is to call security_file_set_fowner() with
> > the lock held, the other option is to completely rip out the
> > security_file_set_fowner() hook and instead let the VFS provide LSMs
> > with the creds they need for the file_send_sigiotask hook?
>
> I'm not entirely clear on what is being proposed either.  Some quick
> pseudo code might do wonders here to help clarify things.
>
> From a LSM perspective I suspect we are always going to need some sort
> of hook in the F_SETOWN code path as the LSM needs to potentially
> capture state/attributes/something-LSM-specific at that
> context/point-in-time.

The only thing LSMs currently do there is capture state from
current->cred. So if the VFS takes care of capturing current->cred
there, we should be able to rip out all the file_set_fowner stuff.
Something like this (totally untested):

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 300e5d9ad913..17f159bf625f 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -98,8 +98,9 @@ static void f_modown(struct file *filp, struct pid
*pid, enum pid_type type,

                 if (pid) {
                         const struct cred *cred = current_cred();
-                        filp->f_owner.uid = cred->uid;
-                        filp->f_owner.euid = cred->euid;
+                        if (filp->f_owner.owner_cred)
+                                put_cred(filp->f_owner.owner_cred);
+                        filp->f_owner.owner_cred = get_current_cred();
                 }
         }
         write_unlock_irq(&filp->f_owner.lock);
@@ -108,7 +109,6 @@ static void f_modown(struct file *filp, struct pid
*pid, enum pid_type type,
 void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
                 int force)
 {
-        security_file_set_fowner(filp);
         f_modown(filp, pid, type, force);
 }
 EXPORT_SYMBOL(__f_setown);
diff --git a/fs/file_table.c b/fs/file_table.c
index ca7843dde56d..440796fc8e91 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -426,6 +426,8 @@ static void __fput(struct file *file)
         }
         fops_put(file->f_op);
         put_pid(file->f_owner.pid);
+        if (file->f_owner.owner_cred)
+                put_cred(file->f_owner.owner_cred);
         put_file_access(file);
         dput(dentry);
         if (unlikely(mode & FMODE_NEED_UNMOUNT))
diff --git a/include/linux/fs.h b/include/linux/fs.h
index fd34b5755c0b..43bfad373bf9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -950,7 +950,7 @@ struct fown_struct {
         rwlock_t lock;          /* protects pid, uid, euid fields */
         struct pid *pid;        /* pid or -pgrp where SIGIO should be sent */
         enum pid_type pid_type;        /* Kind of process group SIGIO
should be sent to */
-        kuid_t uid, euid;        /* uid/euid of process setting the owner */
+        const struct cred __rcu *owner_cred;
         int signum;                /* posix.1b rt signal to be
delivered on IO */
 };

diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 855db460e08b..2c0935dd079e 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -197,7 +197,6 @@ LSM_HOOK(int, 0, file_mprotect, struct vm_area_struct *vma,
 LSM_HOOK(int, 0, file_lock, struct file *file, unsigned int cmd)
 LSM_HOOK(int, 0, file_fcntl, struct file *file, unsigned int cmd,
          unsigned long arg)
-LSM_HOOK(void, LSM_RET_VOID, file_set_fowner, struct file *file)
 LSM_HOOK(int, 0, file_send_sigiotask, struct task_struct *tsk,
          struct fown_struct *fown, int sig)
 LSM_HOOK(int, 0, file_receive, struct file *file)
diff --git a/include/linux/security.h b/include/linux/security.h
index 1390f1efb4f0..3343db05fa2e 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1079,11 +1079,6 @@ static inline int security_file_fcntl(struct
file *file, unsigned int cmd,
         return 0;
 }

-static inline void security_file_set_fowner(struct file *file)
-{
-        return;
-}
-
 static inline int security_file_send_sigiotask(struct task_struct *tsk,
                                                struct fown_struct *fown,
                                                int sig)
diff --git a/security/security.c b/security/security.c
index 8cee5b6c6e6d..a53d8d7fe815 100644
--- a/security/security.c
+++ b/security/security.c
@@ -2924,20 +2924,6 @@ int security_file_fcntl(struct file *file,
unsigned int cmd, unsigned long arg)
         return call_int_hook(file_fcntl, file, cmd, arg);
 }

-/**
- * security_file_set_fowner() - Set the file owner info in the LSM blob
- * @file: the file
- *
- * Save owner security information (typically from current->security) in
- * file->f_security for later use by the send_sigiotask hook.
- *
- * Return: Returns 0 on success.
- */
-void security_file_set_fowner(struct file *file)
-{
-        call_void_hook(file_set_fowner, file);
-}
-
 /**
  * security_file_send_sigiotask() - Check if sending SIGIO/SIGURG is allowed
  * @tsk: target task
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 55c78c318ccd..37675d280837 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3649,7 +3649,6 @@ static int selinux_file_alloc_security(struct file *file)
         u32 sid = current_sid();

         fsec->sid = sid;
-        fsec->fown_sid = sid;

         return 0;
 }
@@ -3923,24 +3922,16 @@ static int selinux_file_fcntl(struct file
*file, unsigned int cmd,
         return err;
 }

-static void selinux_file_set_fowner(struct file *file)
-{
-        struct file_security_struct *fsec;
-
-        fsec = selinux_file(file);
-        fsec->fown_sid = current_sid();
-}
-
 static int selinux_file_send_sigiotask(struct task_struct *tsk,
                                        struct fown_struct *fown, int signum)
 {
-        struct file *file;
+        /* struct fown_struct is never outside the context of a struct file */
+        struct file *file = container_of(fown, struct file, f_owner);
         u32 sid = task_sid_obj(tsk);
         u32 perm;
         struct file_security_struct *fsec;
-
-        /* struct fown_struct is never outside the context of a struct file */
-        file = container_of(fown, struct file, f_owner);
+        struct cred_struct *fown_cred = rcu_dereference(fown->owner_cred);
+        u32 fown_sid = cred_sid(fown_cred ?: file->f_cred);

         fsec = selinux_file(file);

@@ -3949,7 +3940,7 @@ static int selinux_file_send_sigiotask(struct
task_struct *tsk,
         else
                 perm = signal_to_av(signum);

-        return avc_has_perm(fsec->fown_sid, sid,
+        return avc_has_perm(fown_sid, sid,
                             SECCLASS_PROCESS, perm, NULL);
 }

diff --git a/security/selinux/include/objsec.h
b/security/selinux/include/objsec.h
index dea1d6f3ed2d..d55b7f8d3a3d 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -56,7 +56,6 @@ struct inode_security_struct {

 struct file_security_struct {
         u32 sid; /* SID of open file description */
-        u32 fown_sid; /* SID of file owner (for SIGIO) */
         u32 isid; /* SID of inode at the time of file open */
         u32 pseqno; /* Policy seqno at the time of file open */
 };
diff --git a/security/smack/smack.h b/security/smack/smack.h
index 041688e5a77a..06bac00cc796 100644
--- a/security/smack/smack.h
+++ b/security/smack/smack.h
@@ -328,12 +328,6 @@ static inline struct task_smack *smack_cred(const
struct cred *cred)
         return cred->security + smack_blob_sizes.lbs_cred;
 }

-static inline struct smack_known **smack_file(const struct file *file)
-{
-        return (struct smack_known **)(file->f_security +
-                                       smack_blob_sizes.lbs_file);
-}
-
 static inline struct inode_smack *smack_inode(const struct inode *inode)
 {
         return inode->i_security + smack_blob_sizes.lbs_inode;
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 4164699cd4f6..02caa8b9d456 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1675,26 +1675,6 @@ static void smack_inode_getsecid(struct inode
*inode, u32 *secid)
  * label changing that SELinux does.
  */

-/**
- * smack_file_alloc_security - assign a file security blob
- * @file: the object
- *
- * The security blob for a file is a pointer to the master
- * label list, so no allocation is done.
- *
- * f_security is the owner security information. It
- * isn't used on file access checks, it's for send_sigio.
- *
- * Returns 0
- */
-static int smack_file_alloc_security(struct file *file)
-{
-        struct smack_known **blob = smack_file(file);
-
-        *blob = smk_of_current();
-        return 0;
-}
-
 /**
  * smack_file_ioctl - Smack check on ioctls
  * @file: the object
@@ -1913,18 +1893,6 @@ static int smack_mmap_file(struct file *file,
         return rc;
 }

-/**
- * smack_file_set_fowner - set the file security blob value
- * @file: object in question
- *
- */
-static void smack_file_set_fowner(struct file *file)
-{
-        struct smack_known **blob = smack_file(file);
-
-        *blob = smk_of_current();
-}
-
 /**
  * smack_file_send_sigiotask - Smack on sigio
  * @tsk: The target task
@@ -1946,6 +1914,7 @@ static int smack_file_send_sigiotask(struct
task_struct *tsk,
         struct file *file;
         int rc;
         struct smk_audit_info ad;
+        struct cred_struct *fown_cred = rcu_dereference(fown->owner_cred);

         /*
          * struct fown_struct is never outside the context of a struct file
@@ -1953,8 +1922,7 @@ static int smack_file_send_sigiotask(struct
task_struct *tsk,
         file = container_of(fown, struct file, f_owner);

         /* we don't log here as rc can be overriden */
-        blob = smack_file(file);
-        skp = *blob;
+        skp = smk_of_task(fown_cred ?: file->f_cred);
         rc = smk_access(skp, tkp, MAY_DELIVER, NULL);
         rc = smk_bu_note("sigiotask", skp, tkp, MAY_DELIVER, rc);

@@ -5045,7 +5013,6 @@ static int smack_uring_cmd(struct io_uring_cmd *ioucmd)

 struct lsm_blob_sizes smack_blob_sizes __ro_after_init = {
         .lbs_cred = sizeof(struct task_smack),
-        .lbs_file = sizeof(struct smack_known *),
         .lbs_inode = sizeof(struct inode_smack),
         .lbs_ipc = sizeof(struct smack_known *),
         .lbs_msg_msg = sizeof(struct smack_known *),
@@ -5104,7 +5071,6 @@ static struct security_hook_list smack_hooks[]
__ro_after_init = {
         LSM_HOOK_INIT(file_fcntl, smack_file_fcntl),
         LSM_HOOK_INIT(mmap_file, smack_mmap_file),
         LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
-        LSM_HOOK_INIT(file_set_fowner, smack_file_set_fowner),
         LSM_HOOK_INIT(file_send_sigiotask, smack_file_send_sigiotask),
         LSM_HOOK_INIT(file_receive, smack_file_receive),


> While I think it is okay if we want to
> consider relocating the security_file_set_fowner() within the F_SETOWN
> call path, I don't think we can remove it, even if we add additional
> LSM security blobs.
Mickaël Salaün Aug. 12, 2024, 2:55 p.m. UTC | #21
On Mon, Aug 12, 2024 at 03:09:08PM +0200, Jann Horn wrote:
> On Mon, Aug 12, 2024 at 12:04 AM Paul Moore <paul@paul-moore.com> wrote:
> >
> > On Fri, Aug 9, 2024 at 10:01 AM Jann Horn <jannh@google.com> wrote:
> > > On Fri, Aug 9, 2024 at 3:18 PM Mickaël Salaün <mic@digikod.net> wrote:
> > > > Talking about f_modown() and security_file_set_fowner(), it looks like
> > > > there are some issues:
> > > >
> > > > On Fri, Aug 09, 2024 at 02:44:06PM +0200, Jann Horn wrote:
> > > > > On Fri, Aug 9, 2024 at 12:59 PM Mickaël Salaün <mic@digikod.net> wrote:
> > > >
> > > > [...]
> > > >
> > > > > > BTW, I don't understand why neither SELinux nor Smack use (explicit)
> > > > > > atomic operations nor lock.
> > > > >
> > > > > Yeah, I think they're sloppy and kinda wrong - but it sorta works in
> > > > > practice mostly because they don't have to do any refcounting around
> > > > > this?
> > > > >
> > > > > > And it looks weird that
> > > > > > security_file_set_fowner() isn't called by f_modown() with the same
> > > > > > locking to avoid races.
> > > > >
> > > > > True. I imagine maybe the thought behind this design could have been
> > > > > that LSMs should have their own locking, and that calling an LSM hook
> > > > > with IRQs off is a little weird? But the way the LSMs actually use the
> > > > > hook now, it might make sense to call the LSM with the lock held and
> > > > > IRQs off...
> > > > >
> > > >
> > > > Would it be OK (for VFS, SELinux, and Smack maintainers) to move the
> > > > security_file_set_fowner() call into f_modown(), especially where
> > > > UID/EUID are populated.  That would only call security_file_set_fowner()
> > > > when the fown is actually set, which I think could also fix a bug for
> > > > SELinux and Smack.
> > > >
> > > > Could we replace the uid and euid fields with a pointer to the current
> > > > credentials?  This would enables LSMs to not copy the same kind of
> > > > credential informations and save some memory, simplify credential
> > > > management, and improve consistency.
> > >
> > > To clarify: These two paragraphs are supposed to be two alternative
> > > options, right? One option is to call security_file_set_fowner() with
> > > the lock held, the other option is to completely rip out the
> > > security_file_set_fowner() hook and instead let the VFS provide LSMs
> > > with the creds they need for the file_send_sigiotask hook?
> >
> > I'm not entirely clear on what is being proposed either.  Some quick
> > pseudo code might do wonders here to help clarify things.
> >
> > From a LSM perspective I suspect we are always going to need some sort
> > of hook in the F_SETOWN code path as the LSM needs to potentially
> > capture state/attributes/something-LSM-specific at that
> > context/point-in-time.
> 
> The only thing LSMs currently do there is capture state from
> current->cred. So if the VFS takes care of capturing current->cred
> there, we should be able to rip out all the file_set_fowner stuff.
> Something like this (totally untested):

I just sent a quite similar patch just before syncing my emails...  The
main difference seems to be related to the initialization of the
f_owner's credentials.

> 
> diff --git a/fs/fcntl.c b/fs/fcntl.c
> index 300e5d9ad913..17f159bf625f 100644
> --- a/fs/fcntl.c
> +++ b/fs/fcntl.c
> @@ -98,8 +98,9 @@ static void f_modown(struct file *filp, struct pid
> *pid, enum pid_type type,
> 
>                  if (pid) {
>                          const struct cred *cred = current_cred();
> -                        filp->f_owner.uid = cred->uid;
> -                        filp->f_owner.euid = cred->euid;
> +                        if (filp->f_owner.owner_cred)
> +                                put_cred(filp->f_owner.owner_cred);
> +                        filp->f_owner.owner_cred = get_current_cred();
>                  }
>          }
>          write_unlock_irq(&filp->f_owner.lock);
> @@ -108,7 +109,6 @@ static void f_modown(struct file *filp, struct pid
> *pid, enum pid_type type,
>  void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
>                  int force)
>  {
> -        security_file_set_fowner(filp);
>          f_modown(filp, pid, type, force);
>  }
>  EXPORT_SYMBOL(__f_setown);
> diff --git a/fs/file_table.c b/fs/file_table.c
> index ca7843dde56d..440796fc8e91 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -426,6 +426,8 @@ static void __fput(struct file *file)
>          }
>          fops_put(file->f_op);
>          put_pid(file->f_owner.pid);
> +        if (file->f_owner.owner_cred)
> +                put_cred(file->f_owner.owner_cred);
>          put_file_access(file);
>          dput(dentry);
>          if (unlikely(mode & FMODE_NEED_UNMOUNT))
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index fd34b5755c0b..43bfad373bf9 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -950,7 +950,7 @@ struct fown_struct {
>          rwlock_t lock;          /* protects pid, uid, euid fields */
>          struct pid *pid;        /* pid or -pgrp where SIGIO should be sent */
>          enum pid_type pid_type;        /* Kind of process group SIGIO
> should be sent to */
> -        kuid_t uid, euid;        /* uid/euid of process setting the owner */
> +        const struct cred __rcu *owner_cred;
>          int signum;                /* posix.1b rt signal to be
> delivered on IO */
>  };
> 
> diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
> index 855db460e08b..2c0935dd079e 100644
> --- a/include/linux/lsm_hook_defs.h
> +++ b/include/linux/lsm_hook_defs.h
> @@ -197,7 +197,6 @@ LSM_HOOK(int, 0, file_mprotect, struct vm_area_struct *vma,
>  LSM_HOOK(int, 0, file_lock, struct file *file, unsigned int cmd)
>  LSM_HOOK(int, 0, file_fcntl, struct file *file, unsigned int cmd,
>           unsigned long arg)
> -LSM_HOOK(void, LSM_RET_VOID, file_set_fowner, struct file *file)
>  LSM_HOOK(int, 0, file_send_sigiotask, struct task_struct *tsk,
>           struct fown_struct *fown, int sig)
>  LSM_HOOK(int, 0, file_receive, struct file *file)
> diff --git a/include/linux/security.h b/include/linux/security.h
> index 1390f1efb4f0..3343db05fa2e 100644
> --- a/include/linux/security.h
> +++ b/include/linux/security.h
> @@ -1079,11 +1079,6 @@ static inline int security_file_fcntl(struct
> file *file, unsigned int cmd,
>          return 0;
>  }
> 
> -static inline void security_file_set_fowner(struct file *file)
> -{
> -        return;
> -}
> -
>  static inline int security_file_send_sigiotask(struct task_struct *tsk,
>                                                 struct fown_struct *fown,
>                                                 int sig)
> diff --git a/security/security.c b/security/security.c
> index 8cee5b6c6e6d..a53d8d7fe815 100644
> --- a/security/security.c
> +++ b/security/security.c
> @@ -2924,20 +2924,6 @@ int security_file_fcntl(struct file *file,
> unsigned int cmd, unsigned long arg)
>          return call_int_hook(file_fcntl, file, cmd, arg);
>  }
> 
> -/**
> - * security_file_set_fowner() - Set the file owner info in the LSM blob
> - * @file: the file
> - *
> - * Save owner security information (typically from current->security) in
> - * file->f_security for later use by the send_sigiotask hook.
> - *
> - * Return: Returns 0 on success.
> - */
> -void security_file_set_fowner(struct file *file)
> -{
> -        call_void_hook(file_set_fowner, file);
> -}
> -
>  /**
>   * security_file_send_sigiotask() - Check if sending SIGIO/SIGURG is allowed
>   * @tsk: target task
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index 55c78c318ccd..37675d280837 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -3649,7 +3649,6 @@ static int selinux_file_alloc_security(struct file *file)
>          u32 sid = current_sid();
> 
>          fsec->sid = sid;
> -        fsec->fown_sid = sid;
> 
>          return 0;
>  }
> @@ -3923,24 +3922,16 @@ static int selinux_file_fcntl(struct file
> *file, unsigned int cmd,
>          return err;
>  }
> 
> -static void selinux_file_set_fowner(struct file *file)
> -{
> -        struct file_security_struct *fsec;
> -
> -        fsec = selinux_file(file);
> -        fsec->fown_sid = current_sid();
> -}
> -
>  static int selinux_file_send_sigiotask(struct task_struct *tsk,
>                                         struct fown_struct *fown, int signum)
>  {
> -        struct file *file;
> +        /* struct fown_struct is never outside the context of a struct file */
> +        struct file *file = container_of(fown, struct file, f_owner);
>          u32 sid = task_sid_obj(tsk);
>          u32 perm;
>          struct file_security_struct *fsec;
> -
> -        /* struct fown_struct is never outside the context of a struct file */
> -        file = container_of(fown, struct file, f_owner);
> +        struct cred_struct *fown_cred = rcu_dereference(fown->owner_cred);
> +        u32 fown_sid = cred_sid(fown_cred ?: file->f_cred);
> 
>          fsec = selinux_file(file);
> 
> @@ -3949,7 +3940,7 @@ static int selinux_file_send_sigiotask(struct
> task_struct *tsk,
>          else
>                  perm = signal_to_av(signum);
> 
> -        return avc_has_perm(fsec->fown_sid, sid,
> +        return avc_has_perm(fown_sid, sid,
>                              SECCLASS_PROCESS, perm, NULL);
>  }
> 
> diff --git a/security/selinux/include/objsec.h
> b/security/selinux/include/objsec.h
> index dea1d6f3ed2d..d55b7f8d3a3d 100644
> --- a/security/selinux/include/objsec.h
> +++ b/security/selinux/include/objsec.h
> @@ -56,7 +56,6 @@ struct inode_security_struct {
> 
>  struct file_security_struct {
>          u32 sid; /* SID of open file description */
> -        u32 fown_sid; /* SID of file owner (for SIGIO) */
>          u32 isid; /* SID of inode at the time of file open */
>          u32 pseqno; /* Policy seqno at the time of file open */
>  };
> diff --git a/security/smack/smack.h b/security/smack/smack.h
> index 041688e5a77a..06bac00cc796 100644
> --- a/security/smack/smack.h
> +++ b/security/smack/smack.h
> @@ -328,12 +328,6 @@ static inline struct task_smack *smack_cred(const
> struct cred *cred)
>          return cred->security + smack_blob_sizes.lbs_cred;
>  }
> 
> -static inline struct smack_known **smack_file(const struct file *file)
> -{
> -        return (struct smack_known **)(file->f_security +
> -                                       smack_blob_sizes.lbs_file);
> -}
> -
>  static inline struct inode_smack *smack_inode(const struct inode *inode)
>  {
>          return inode->i_security + smack_blob_sizes.lbs_inode;
> diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
> index 4164699cd4f6..02caa8b9d456 100644
> --- a/security/smack/smack_lsm.c
> +++ b/security/smack/smack_lsm.c
> @@ -1675,26 +1675,6 @@ static void smack_inode_getsecid(struct inode
> *inode, u32 *secid)
>   * label changing that SELinux does.
>   */
> 
> -/**
> - * smack_file_alloc_security - assign a file security blob
> - * @file: the object
> - *
> - * The security blob for a file is a pointer to the master
> - * label list, so no allocation is done.
> - *
> - * f_security is the owner security information. It
> - * isn't used on file access checks, it's for send_sigio.
> - *
> - * Returns 0
> - */
> -static int smack_file_alloc_security(struct file *file)
> -{
> -        struct smack_known **blob = smack_file(file);
> -
> -        *blob = smk_of_current();
> -        return 0;
> -}
> -
>  /**
>   * smack_file_ioctl - Smack check on ioctls
>   * @file: the object
> @@ -1913,18 +1893,6 @@ static int smack_mmap_file(struct file *file,
>          return rc;
>  }
> 
> -/**
> - * smack_file_set_fowner - set the file security blob value
> - * @file: object in question
> - *
> - */
> -static void smack_file_set_fowner(struct file *file)
> -{
> -        struct smack_known **blob = smack_file(file);
> -
> -        *blob = smk_of_current();
> -}
> -
>  /**
>   * smack_file_send_sigiotask - Smack on sigio
>   * @tsk: The target task
> @@ -1946,6 +1914,7 @@ static int smack_file_send_sigiotask(struct
> task_struct *tsk,
>          struct file *file;
>          int rc;
>          struct smk_audit_info ad;
> +        struct cred_struct *fown_cred = rcu_dereference(fown->owner_cred);
> 
>          /*
>           * struct fown_struct is never outside the context of a struct file
> @@ -1953,8 +1922,7 @@ static int smack_file_send_sigiotask(struct
> task_struct *tsk,
>          file = container_of(fown, struct file, f_owner);
> 
>          /* we don't log here as rc can be overriden */
> -        blob = smack_file(file);
> -        skp = *blob;
> +        skp = smk_of_task(fown_cred ?: file->f_cred);
>          rc = smk_access(skp, tkp, MAY_DELIVER, NULL);
>          rc = smk_bu_note("sigiotask", skp, tkp, MAY_DELIVER, rc);
> 
> @@ -5045,7 +5013,6 @@ static int smack_uring_cmd(struct io_uring_cmd *ioucmd)
> 
>  struct lsm_blob_sizes smack_blob_sizes __ro_after_init = {
>          .lbs_cred = sizeof(struct task_smack),
> -        .lbs_file = sizeof(struct smack_known *),
>          .lbs_inode = sizeof(struct inode_smack),
>          .lbs_ipc = sizeof(struct smack_known *),
>          .lbs_msg_msg = sizeof(struct smack_known *),
> @@ -5104,7 +5071,6 @@ static struct security_hook_list smack_hooks[]
> __ro_after_init = {
>          LSM_HOOK_INIT(file_fcntl, smack_file_fcntl),
>          LSM_HOOK_INIT(mmap_file, smack_mmap_file),
>          LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
> -        LSM_HOOK_INIT(file_set_fowner, smack_file_set_fowner),
>          LSM_HOOK_INIT(file_send_sigiotask, smack_file_send_sigiotask),
>          LSM_HOOK_INIT(file_receive, smack_file_receive),
> 
> 
> > While I think it is okay if we want to
> > consider relocating the security_file_set_fowner() within the F_SETOWN
> > call path, I don't think we can remove it, even if we add additional
> > LSM security blobs.

>
Paul Moore Aug. 12, 2024, 2:57 p.m. UTC | #22
On Mon, Aug 12, 2024 at 9:09 AM Jann Horn <jannh@google.com> wrote:
> On Mon, Aug 12, 2024 at 12:04 AM Paul Moore <paul@paul-moore.com> wrote:

...

> > From a LSM perspective I suspect we are always going to need some sort
> > of hook in the F_SETOWN code path as the LSM needs to potentially
> > capture state/attributes/something-LSM-specific at that
> > context/point-in-time.
>
> The only thing LSMs currently do there is capture state from
> current->cred. So if the VFS takes care of capturing current->cred
> there, we should be able to rip out all the file_set_fowner stuff.
> Something like this (totally untested):

I've very hesitant to drop the LSM hook from the F_SETOWN path both
because it is reasonable that other LSMs may want to do other things
here, and adding a LSM hook to the kernel, even if it is re-adding a
hook that was previously removed, is a difficult and painful process
with an uncertain outcome.
Jann Horn Aug. 12, 2024, 3:06 p.m. UTC | #23
On Mon, Aug 12, 2024 at 4:57 PM Paul Moore <paul@paul-moore.com> wrote:
> On Mon, Aug 12, 2024 at 9:09 AM Jann Horn <jannh@google.com> wrote:
> > On Mon, Aug 12, 2024 at 12:04 AM Paul Moore <paul@paul-moore.com> wrote:
>
> ...
>
> > > From a LSM perspective I suspect we are always going to need some sort
> > > of hook in the F_SETOWN code path as the LSM needs to potentially
> > > capture state/attributes/something-LSM-specific at that
> > > context/point-in-time.
> >
> > The only thing LSMs currently do there is capture state from
> > current->cred. So if the VFS takes care of capturing current->cred
> > there, we should be able to rip out all the file_set_fowner stuff.
> > Something like this (totally untested):
>
> I've very hesitant to drop the LSM hook from the F_SETOWN path both
> because it is reasonable that other LSMs may want to do other things
> here,

What is an example for other things an LSM might want to do there? As
far as I understand, the whole point of this hook is to record the
identity of the sender of signals - are you talking about an LSM that
might not be storing credentials in struct cred, or something like
that?

> and adding a LSM hook to the kernel, even if it is re-adding a
> hook that was previously removed, is a difficult and painful process
> with an uncertain outcome.

Do you mean that even if the LSM hook ends up with zero users
remaining, you'd still want to keep it around in case it's needed
again later?
Paul Moore Aug. 12, 2024, 4:30 p.m. UTC | #24
On Mon, Aug 12, 2024 at 11:06 AM Jann Horn <jannh@google.com> wrote:
> On Mon, Aug 12, 2024 at 4:57 PM Paul Moore <paul@paul-moore.com> wrote:
> > On Mon, Aug 12, 2024 at 9:09 AM Jann Horn <jannh@google.com> wrote:
> > > On Mon, Aug 12, 2024 at 12:04 AM Paul Moore <paul@paul-moore.com> wrote:
> >
> > ...
> >
> > > > From a LSM perspective I suspect we are always going to need some sort
> > > > of hook in the F_SETOWN code path as the LSM needs to potentially
> > > > capture state/attributes/something-LSM-specific at that
> > > > context/point-in-time.
> > >
> > > The only thing LSMs currently do there is capture state from
> > > current->cred. So if the VFS takes care of capturing current->cred
> > > there, we should be able to rip out all the file_set_fowner stuff.
> > > Something like this (totally untested):
> >
> > I've very hesitant to drop the LSM hook from the F_SETOWN path both
> > because it is reasonable that other LSMs may want to do other things
> > here,
>
> What is an example for other things an LSM might want to do there? As
> far as I understand, the whole point of this hook is to record the
> identity of the sender of signals - are you talking about an LSM that
> might not be storing credentials in struct cred, or something like
> that?

Sure.  The LSM framework is intentionally very vague and limited on
what restrictions it places on individual LSMs; we want to be able to
support a wide range of security models and concepts.  I view the
F_SETOWN hook are important because it is a control point that is used
to set/copy/transfer/whatever security attributes from the current
process to a file/fd for the purpose of managing signals on the fd.

> > and adding a LSM hook to the kernel, even if it is re-adding a
> > hook that was previously removed, is a difficult and painful process
> > with an uncertain outcome.
>
> Do you mean that even if the LSM hook ends up with zero users
> remaining, you'd still want to keep it around in case it's needed
> again later?

I want the security_file_set_fowner() hook to remain a viable hook for
capturing the current task's security attributes, regardless of what
security attributes the LSM is interested in capturing and where those
attributes are stored.
Mickaël Salaün Aug. 12, 2024, 5:27 p.m. UTC | #25
On Mon, Aug 12, 2024 at 12:30:03PM -0400, Paul Moore wrote:
> On Mon, Aug 12, 2024 at 11:06 AM Jann Horn <jannh@google.com> wrote:
> > On Mon, Aug 12, 2024 at 4:57 PM Paul Moore <paul@paul-moore.com> wrote:
> > > On Mon, Aug 12, 2024 at 9:09 AM Jann Horn <jannh@google.com> wrote:
> > > > On Mon, Aug 12, 2024 at 12:04 AM Paul Moore <paul@paul-moore.com> wrote:
> > >
> > > ...
> > >
> > > > > From a LSM perspective I suspect we are always going to need some sort
> > > > > of hook in the F_SETOWN code path as the LSM needs to potentially
> > > > > capture state/attributes/something-LSM-specific at that
> > > > > context/point-in-time.
> > > >
> > > > The only thing LSMs currently do there is capture state from
> > > > current->cred. So if the VFS takes care of capturing current->cred
> > > > there, we should be able to rip out all the file_set_fowner stuff.
> > > > Something like this (totally untested):
> > >
> > > I've very hesitant to drop the LSM hook from the F_SETOWN path both
> > > because it is reasonable that other LSMs may want to do other things
> > > here,
> >
> > What is an example for other things an LSM might want to do there? As
> > far as I understand, the whole point of this hook is to record the
> > identity of the sender of signals - are you talking about an LSM that
> > might not be storing credentials in struct cred, or something like
> > that?
> 
> Sure.  The LSM framework is intentionally very vague and limited on
> what restrictions it places on individual LSMs; we want to be able to
> support a wide range of security models and concepts.  I view the
> F_SETOWN hook are important because it is a control point that is used
> to set/copy/transfer/whatever security attributes from the current
> process to a file/fd for the purpose of managing signals on the fd.
> 
> > > and adding a LSM hook to the kernel, even if it is re-adding a
> > > hook that was previously removed, is a difficult and painful process
> > > with an uncertain outcome.
> >
> > Do you mean that even if the LSM hook ends up with zero users
> > remaining, you'd still want to keep it around in case it's needed
> > again later?
> 
> I want the security_file_set_fowner() hook to remain a viable hook for
> capturing the current task's security attributes, regardless of what
> security attributes the LSM is interested in capturing and where those
> attributes are stored.

I don't see the point to keep an unused hook, we could add it back later
if there is a valid use case, but I'll send a v2 without this removal.
Paul Moore Aug. 12, 2024, 6:17 p.m. UTC | #26
On Mon, Aug 12, 2024 at 1:28 PM Mickaël Salaün <mic@digikod.net> wrote:
> On Mon, Aug 12, 2024 at 12:30:03PM -0400, Paul Moore wrote:
> > On Mon, Aug 12, 2024 at 11:06 AM Jann Horn <jannh@google.com> wrote:
> > > On Mon, Aug 12, 2024 at 4:57 PM Paul Moore <paul@paul-moore.com> wrote:
> > > > On Mon, Aug 12, 2024 at 9:09 AM Jann Horn <jannh@google.com> wrote:
> > > > > On Mon, Aug 12, 2024 at 12:04 AM Paul Moore <paul@paul-moore.com> wrote:
> > > >
> > > > ...
> > > >
> > > > > > From a LSM perspective I suspect we are always going to need some sort
> > > > > > of hook in the F_SETOWN code path as the LSM needs to potentially
> > > > > > capture state/attributes/something-LSM-specific at that
> > > > > > context/point-in-time.
> > > > >
> > > > > The only thing LSMs currently do there is capture state from
> > > > > current->cred. So if the VFS takes care of capturing current->cred
> > > > > there, we should be able to rip out all the file_set_fowner stuff.
> > > > > Something like this (totally untested):
> > > >
> > > > I've very hesitant to drop the LSM hook from the F_SETOWN path both
> > > > because it is reasonable that other LSMs may want to do other things
> > > > here,
> > >
> > > What is an example for other things an LSM might want to do there? As
> > > far as I understand, the whole point of this hook is to record the
> > > identity of the sender of signals - are you talking about an LSM that
> > > might not be storing credentials in struct cred, or something like
> > > that?
> >
> > Sure.  The LSM framework is intentionally very vague and limited on
> > what restrictions it places on individual LSMs; we want to be able to
> > support a wide range of security models and concepts.  I view the
> > F_SETOWN hook are important because it is a control point that is used
> > to set/copy/transfer/whatever security attributes from the current
> > process to a file/fd for the purpose of managing signals on the fd.
> >
> > > > and adding a LSM hook to the kernel, even if it is re-adding a
> > > > hook that was previously removed, is a difficult and painful process
> > > > with an uncertain outcome.
> > >
> > > Do you mean that even if the LSM hook ends up with zero users
> > > remaining, you'd still want to keep it around in case it's needed
> > > again later?
> >
> > I want the security_file_set_fowner() hook to remain a viable hook for
> > capturing the current task's security attributes, regardless of what
> > security attributes the LSM is interested in capturing and where those
> > attributes are stored.
>
> I don't see the point to keep an unused hook, we could add it back later
> if there is a valid use case, but I'll send a v2 without this removal.

If it was simple to add LSM hooks, then I would agree, but history has
shown that not to be the case.
diff mbox series

Patch

diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index ab31e9f53e55..a65fdb507d39 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -292,8 +292,11 @@  struct landlock_net_port_attr {
  *   from connecting to an abstract unix socket created by a process
  *   outside the related Landlock domain (e.g. a parent domain or a
  *   non-sandboxed process).
+ * - %LANDLOCK_SCOPED_SIGNAL: Restrict a sandboxed process from sending a signal
+ *   to another process outside sandbox domain.
  */
 /* clang-format off */
 #define LANDLOCK_SCOPED_ABSTRACT_UNIX_SOCKET		(1ULL << 0)
+#define LANDLOCK_SCOPED_SIGNAL		                (1ULL << 1)
 /* clang-format on*/
 #endif /* _UAPI_LINUX_LANDLOCK_H */
diff --git a/security/landlock/limits.h b/security/landlock/limits.h
index eb01d0fb2165..fa28f9236407 100644
--- a/security/landlock/limits.h
+++ b/security/landlock/limits.h
@@ -26,7 +26,7 @@ 
 #define LANDLOCK_MASK_ACCESS_NET	((LANDLOCK_LAST_ACCESS_NET << 1) - 1)
 #define LANDLOCK_NUM_ACCESS_NET		__const_hweight64(LANDLOCK_MASK_ACCESS_NET)
 
-#define LANDLOCK_LAST_SCOPE		LANDLOCK_SCOPED_ABSTRACT_UNIX_SOCKET
+#define LANDLOCK_LAST_SCOPE		LANDLOCK_SCOPED_SIGNAL
 #define LANDLOCK_MASK_SCOPE		((LANDLOCK_LAST_SCOPE << 1) - 1)
 #define LANDLOCK_NUM_SCOPE		__const_hweight64(LANDLOCK_MASK_SCOPE)
 /* clang-format on */
diff --git a/security/landlock/task.c b/security/landlock/task.c
index 7e8579ebae83..a73cff27bb91 100644
--- a/security/landlock/task.c
+++ b/security/landlock/task.c
@@ -261,11 +261,54 @@  static int hook_unix_may_send(struct socket *const sock,
 	return -EPERM;
 }
 
+static int hook_task_kill(struct task_struct *const p,
+			  struct kernel_siginfo *const info, const int sig,
+			  const struct cred *const cred)
+{
+	bool is_scoped;
+	const struct landlock_ruleset *target_dom;
+
+	/* rcu is already locked */
+	target_dom = landlock_get_task_domain(p);
+	if (cred)
+		/* dealing with USB IO */
+		is_scoped = domain_IPC_scope(landlock_cred(cred)->domain,
+					     target_dom,
+					     LANDLOCK_SCOPED_SIGNAL);
+	else
+		is_scoped = domain_IPC_scope(landlock_get_current_domain(),
+					     target_dom,
+					     LANDLOCK_SCOPED_SIGNAL);
+	if (is_scoped)
+		return 0;
+
+	return -EPERM;
+}
+
+static int hook_file_send_sigiotask(struct task_struct *tsk,
+				    struct fown_struct *fown, int signum)
+{
+	bool is_scoped;
+	const struct landlock_ruleset *dom, *target_dom;
+	struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
+
+	/* rcu is already locked! */
+	dom = landlock_get_task_domain(result);
+	target_dom = landlock_get_task_domain(tsk);
+	is_scoped = domain_IPC_scope(dom, target_dom, LANDLOCK_SCOPED_SIGNAL);
+	put_task_struct(result);
+	if (is_scoped)
+		return 0;
+	return -EPERM;
+}
+
 static struct security_hook_list landlock_hooks[] __ro_after_init = {
 	LSM_HOOK_INIT(ptrace_access_check, hook_ptrace_access_check),
 	LSM_HOOK_INIT(ptrace_traceme, hook_ptrace_traceme),
 	LSM_HOOK_INIT(unix_stream_connect, hook_unix_stream_connect),
 	LSM_HOOK_INIT(unix_may_send, hook_unix_may_send),
+	LSM_HOOK_INIT(task_kill, hook_task_kill),
+	LSM_HOOK_INIT(file_send_sigiotask, hook_file_send_sigiotask),
 };
 
 __init void landlock_add_task_hooks(void)