diff mbox series

[v2,1/4] Landlock: Add signal control

Message ID 49557e48c1904d2966b8aa563215d2e1733dad95.1722966592.git.fahimitahera@gmail.com (mailing list archive)
State Superseded
Headers show
Series Landlock: Signal Scoping Support | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Tahera Fahimi Aug. 6, 2024, 6:10 p.m. UTC
Currently, a sandbox process is not restricted to send a signal
(e.g. SIGKILL) to a process outside of the sandbox environment.
Ability to sending a signal for a sandboxed process should be
scoped the same way abstract unix sockets are scoped. Therefore,
we extend "scoped" field in a ruleset with
"LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
sending any signal from within a sandbox process to its
parent(i.e. any parent sandbox or non-sandboxed procsses).

Signed-off-by: Tahera Fahimi <fahimitahera@gmail.com>
---

Chenges in versions:
V2:
* Remove signal_is_scoped function
* Applying reviews of V1
V1:
* Introducing LANDLOCK_SCOPE_SIGNAL
* Adding two hooks, hook_task_kill and hook_file_send_sigiotask
  for signal scoping.
---
 include/uapi/linux/landlock.h |  3 +++
 security/landlock/limits.h    |  2 +-
 security/landlock/task.c      | 43 +++++++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)

Comments

Jann Horn Aug. 6, 2024, 6:56 p.m. UTC | #1
On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> Currently, a sandbox process is not restricted to send a signal
> (e.g. SIGKILL) to a process outside of the sandbox environment.
> Ability to sending a signal for a sandboxed process should be
> scoped the same way abstract unix sockets are scoped. Therefore,
> we extend "scoped" field in a ruleset with
> "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> sending any signal from within a sandbox process to its
> parent(i.e. any parent sandbox or non-sandboxed procsses).
[...]
> diff --git a/security/landlock/task.c b/security/landlock/task.c
> index 7e8579ebae83..a73cff27bb91 100644
> --- a/security/landlock/task.c
> +++ b/security/landlock/task.c
> @@ -261,11 +261,54 @@ static int hook_unix_may_send(struct socket *const sock,
>         return -EPERM;
>  }
>
> +static int hook_task_kill(struct task_struct *const p,
> +                         struct kernel_siginfo *const info, const int sig,
> +                         const struct cred *const cred)
> +{
> +       bool is_scoped;
> +       const struct landlock_ruleset *target_dom;
> +
> +       /* rcu is already locked */
> +       target_dom = landlock_get_task_domain(p);
> +       if (cred)
> +               /* dealing with USB IO */
> +               is_scoped = domain_IPC_scope(landlock_cred(cred)->domain,
> +                                            target_dom,
> +                                            LANDLOCK_SCOPED_SIGNAL);
> +       else
> +               is_scoped = domain_IPC_scope(landlock_get_current_domain(),
> +                                            target_dom,
> +                                            LANDLOCK_SCOPED_SIGNAL);

This might be a bit more concise if you turn it into something like:

/* only USB IO supplies creds */
cred = cred ?: current_cred();
is_scoped = domain_IPC_scope(landlock_cred(cred)->domain,
    target_dom, LANDLOCK_SCOPED_SIGNAL);

but that's just a question of style, feel free to keep it as-is
depending on what you prefer.

> +       if (is_scoped)
> +               return 0;
> +
> +       return -EPERM;
> +}
> +
> +static int hook_file_send_sigiotask(struct task_struct *tsk,
> +                                   struct fown_struct *fown, int signum)
> +{
> +       bool is_scoped;
> +       const struct landlock_ruleset *dom, *target_dom;
> +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);

I'm not an expert on how the fowner stuff works, but I think this will
probably give you "result = NULL" if the file owner PID has already
exited, and then the following landlock_get_task_domain() would
probably crash? But I'm not entirely sure about how this works.

I think the intended way to use this hook would be to instead use the
"file_set_fowner" hook to record the owning domain (though the setup
for that is going to be kind of a pain...), see the Smack and SELinux
definitions of that hook. Or alternatively maybe it would be even
nicer to change the fown_struct to record a cred* instead of a uid and
euid and then use the domain from those credentials for this hook...
I'm not sure which of those would be easier.

> +       /* rcu is already locked! */
> +       dom = landlock_get_task_domain(result);
> +       target_dom = landlock_get_task_domain(tsk);
> +       is_scoped = domain_IPC_scope(dom, target_dom, LANDLOCK_SCOPED_SIGNAL);
> +       put_task_struct(result);
> +       if (is_scoped)
> +               return 0;
> +       return -EPERM;
> +}
Jann Horn Aug. 6, 2024, 9:55 p.m. UTC | #2
On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > Currently, a sandbox process is not restricted to send a signal
> > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > Ability to sending a signal for a sandboxed process should be
> > scoped the same way abstract unix sockets are scoped. Therefore,
> > we extend "scoped" field in a ruleset with
> > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > sending any signal from within a sandbox process to its
> > parent(i.e. any parent sandbox or non-sandboxed procsses).
[...]
> > +       if (is_scoped)
> > +               return 0;
> > +
> > +       return -EPERM;
> > +}
> > +
> > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > +                                   struct fown_struct *fown, int signum)
> > +{
> > +       bool is_scoped;
> > +       const struct landlock_ruleset *dom, *target_dom;
> > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
>
> I'm not an expert on how the fowner stuff works, but I think this will
> probably give you "result = NULL" if the file owner PID has already
> exited, and then the following landlock_get_task_domain() would
> probably crash? But I'm not entirely sure about how this works.
>
> I think the intended way to use this hook would be to instead use the
> "file_set_fowner" hook to record the owning domain (though the setup
> for that is going to be kind of a pain...), see the Smack and SELinux
> definitions of that hook. Or alternatively maybe it would be even
> nicer to change the fown_struct to record a cred* instead of a uid and
> euid and then use the domain from those credentials for this hook...
> I'm not sure which of those would be easier.

(For what it's worth, I think the first option would probably be
easier to implement and ship for now, since you can basically copy
what Smack and SELinux are already doing in their implementations of
these hooks. I think the second option would theoretically result in
nicer code, but it might require a bit more work, and you'd have to
include the maintainers of the file locking code in the review of such
refactoring and have them approve those changes. So if you want to get
this patchset into the kernel quickly, the first option might be
better for now?)
Tahera Fahimi Aug. 6, 2024, 10 p.m. UTC | #3
On Tue, Aug 06, 2024 at 08:56:15PM +0200, Jann Horn wrote:
> On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > Currently, a sandbox process is not restricted to send a signal
> > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > Ability to sending a signal for a sandboxed process should be
> > scoped the same way abstract unix sockets are scoped. Therefore,
> > we extend "scoped" field in a ruleset with
> > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > sending any signal from within a sandbox process to its
> > parent(i.e. any parent sandbox or non-sandboxed procsses).
> [...]
> > diff --git a/security/landlock/task.c b/security/landlock/task.c
> > index 7e8579ebae83..a73cff27bb91 100644
> > --- a/security/landlock/task.c
> > +++ b/security/landlock/task.c
> > @@ -261,11 +261,54 @@ static int hook_unix_may_send(struct socket *const sock,
> >         return -EPERM;
> >  }
> >
> > +static int hook_task_kill(struct task_struct *const p,
> > +                         struct kernel_siginfo *const info, const int sig,
> > +                         const struct cred *const cred)
> > +{
> > +       bool is_scoped;
> > +       const struct landlock_ruleset *target_dom;
> > +
> > +       /* rcu is already locked */
> > +       target_dom = landlock_get_task_domain(p);
> > +       if (cred)
> > +               /* dealing with USB IO */
> > +               is_scoped = domain_IPC_scope(landlock_cred(cred)->domain,
> > +                                            target_dom,
> > +                                            LANDLOCK_SCOPED_SIGNAL);
> > +       else
> > +               is_scoped = domain_IPC_scope(landlock_get_current_domain(),
> > +                                            target_dom,
> > +                                            LANDLOCK_SCOPED_SIGNAL);
> 
> This might be a bit more concise if you turn it into something like:
> 
> /* only USB IO supplies creds */
> cred = cred ?: current_cred();
> is_scoped = domain_IPC_scope(landlock_cred(cred)->domain,
>     target_dom, LANDLOCK_SCOPED_SIGNAL);
> 
> but that's just a question of style, feel free to keep it as-is
> depending on what you prefer.
Hi Jann,
Thanks for the feedback:)
> > +       if (is_scoped)
> > +               return 0;
> > +
> > +       return -EPERM;
> > +}
> > +
> > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > +                                   struct fown_struct *fown, int signum)
> > +{
> > +       bool is_scoped;
> > +       const struct landlock_ruleset *dom, *target_dom;
> > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> 
> I'm not an expert on how the fowner stuff works, but I think this will
> probably give you "result = NULL" if the file owner PID has already
> exited, and then the following landlock_get_task_domain() would
> probably crash? But I'm not entirely sure about how this works.
I considered since the file structure can always be obtained, then the
file owner PID always exist. I can check if we can use the credentials
stored in struct file instead.

> I think the intended way to use this hook would be to instead use the
> "file_set_fowner" hook to record the owning domain (though the setup
> for that is going to be kind of a pain...), see the Smack and SELinux
> definitions of that hook. Or alternatively maybe it would be even
> nicer to change the fown_struct to record a cred* instead of a uid and
> euid and then use the domain from those credentials for this hook...
> I'm not sure which of those would be easier.
Because Landlock does not use any security blob for this purpose, I am
not sure how to record the owner's doamin.
The alternative way looks nice.
> > +       /* rcu is already locked! */
> > +       dom = landlock_get_task_domain(result);
> > +       target_dom = landlock_get_task_domain(tsk);
> > +       is_scoped = domain_IPC_scope(dom, target_dom, LANDLOCK_SCOPED_SIGNAL);
> > +       put_task_struct(result);
> > +       if (is_scoped)
> > +               return 0;
> > +       return -EPERM;
> > +}
Jann Horn Aug. 6, 2024, 10:55 p.m. UTC | #4
Hi!

On Wed, Aug 7, 2024 at 12:00 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> On Tue, Aug 06, 2024 at 08:56:15PM +0200, Jann Horn wrote:
> > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > +       if (is_scoped)
> > > +               return 0;
> > > +
> > > +       return -EPERM;
> > > +}
> > > +
> > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > +                                   struct fown_struct *fown, int signum)
> > > +{
> > > +       bool is_scoped;
> > > +       const struct landlock_ruleset *dom, *target_dom;
> > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> >
> > I'm not an expert on how the fowner stuff works, but I think this will
> > probably give you "result = NULL" if the file owner PID has already
> > exited, and then the following landlock_get_task_domain() would
> > probably crash? But I'm not entirely sure about how this works.
> I considered since the file structure can always be obtained, then the
> file owner PID always exist.

I think the file owner "struct pid" always exists here; but I think
the PID does not necessarily point to a task_struct.

I think you can have a scenario where userspace does something like this:

int fd = <open some file descriptor somehow>;
fcntl(fd, F_SETOWN, <PID of some other process>);
pid_t child_pid = fork();
if (child_pid == 0) {
  /* this executes in the child process */
  sleep(5);
  <do something on the fd that triggers send_sigio() or send_sigurg()>
  sleep(5);
  exit(0);
}
/* this continues executing in the parent process */
exit(0);

At the fcntl(fd, F_SETOWN, ...) call, a reference-counted reference to
the "struct pid" of the parent process will be stored in the file
(this happens in "f_modown", which increments the reference count of
the "struct pid" using "get_pid(...)"). But when the parent process
exits, the pointer from the parent's "struct pid" to the parent's
"task_struct" is removed, and the "task_struct" will eventually be
deleted from memory.

So when, at a later time, something triggers send_sigio() or
send_sigurg() and enters this LSM hook, I think
"get_pid_task(fown->pid, fown->pid_type)" can return NULL.

> I can check if we can use the credentials
> stored in struct file instead.

(sort of relevant: The manpage
https://man7.org/linux/man-pages/man2/fcntl.2.html says "Note: The
F_SETOWN operation records the caller's credentials at the time of the
fcntl() call, and it is these saved credentials that are used for the
permission checks.")

You mean the credentials in the "f_cred" member of "struct file", right? If so:

I don't think Landlock can use the existing credentials stored in
file->f_cred for this. Consider the following scenario:

1. A process (with no landlock restrictions so far) opens a file. At
this point, the caller's credentials (which indicate no landlock
restrictions) are recorded in file->f_cred.
2. The process installs a landlock filter that restricts the ability
to send signals. (This changes the credentials pointer of the process
to a new set of credentials that points to the new landlock domain.)
3. Malicious code starts executing in the sandboxed process.
4. The malicious code uses F_SETOWN on the already-open file.
5. Something causes sending of a signal via send_sigio() or
send_sigurg() on this file.

In this scenario, if the LSM hook used the landlock restrictions
attached to file->f_cred, it would think the operation is not filtered
by any landlock domain.

> > I think the intended way to use this hook would be to instead use the
> > "file_set_fowner" hook to record the owning domain (though the setup
> > for that is going to be kind of a pain...), see the Smack and SELinux
> > definitions of that hook. Or alternatively maybe it would be even
> > nicer to change the fown_struct to record a cred* instead of a uid and
> > euid and then use the domain from those credentials for this hook...
> > I'm not sure which of those would be easier.
> Because Landlock does not use any security blob for this purpose, I am
> not sure how to record the owner's doamin.

I think landlock already has a landlock_file_security blob attached to
"struct file" that you could use to store an extra landlock domain
pointer for this, kinda like the "fown_sid" in SELinux? But doing this
for landlock would be a little more annoying because you'd have to
store a reference-counted pointer to the landlock domain in the
landlock_file_security blob, so you'd have to make sure to also
decrement the reference count when the file is freed (with the
file_free_security hook), and you'd have to use locking (or atomic
operations) to make sure that two concurrent file_set_fowner calls
don't race in a dangerous way...

So I think it's fairly straightforward, but it would be kinda ugly.

> The alternative way looks nice.

Yeah, I agree that it looks nicer. (If you decide to implement it by
refactoring the fown_struct, it would be a good idea to make that a
separate patch in your patch series - partly because that way, the
maintainers of that fs/fcntl.c code can review just the refactoring,
and partly just because splitting logical changes into separate
patches makes it easier to review the individual patches.)

> > > +       /* rcu is already locked! */
> > > +       dom = landlock_get_task_domain(result);
> > > +       target_dom = landlock_get_task_domain(tsk);
> > > +       is_scoped = domain_IPC_scope(dom, target_dom, LANDLOCK_SCOPED_SIGNAL);
> > > +       put_task_struct(result);
> > > +       if (is_scoped)
> > > +               return 0;
> > > +       return -EPERM;
> > > +}
Mickaël Salaün Aug. 7, 2024, 6:16 p.m. UTC | #5
On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > Currently, a sandbox process is not restricted to send a signal
> > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > Ability to sending a signal for a sandboxed process should be
> > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > we extend "scoped" field in a ruleset with
> > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > sending any signal from within a sandbox process to its
> > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> [...]
> > > +       if (is_scoped)
> > > +               return 0;
> > > +
> > > +       return -EPERM;
> > > +}
> > > +
> > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > +                                   struct fown_struct *fown, int signum)

I was wondering if we should handle this case, but I guess it makes
sense to have a consistent policy for all kind of user-triggerable
signals.

> > > +{
> > > +       bool is_scoped;
> > > +       const struct landlock_ruleset *dom, *target_dom;
> > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> >
> > I'm not an expert on how the fowner stuff works, but I think this will
> > probably give you "result = NULL" if the file owner PID has already
> > exited, and then the following landlock_get_task_domain() would
> > probably crash? But I'm not entirely sure about how this works.
> >
> > I think the intended way to use this hook would be to instead use the
> > "file_set_fowner" hook to record the owning domain (though the setup
> > for that is going to be kind of a pain...), see the Smack and SELinux
> > definitions of that hook. Or alternatively maybe it would be even
> > nicer to change the fown_struct to record a cred* instead of a uid and
> > euid and then use the domain from those credentials for this hook...
> > I'm not sure which of those would be easier.
> 
> (For what it's worth, I think the first option would probably be
> easier to implement and ship for now, since you can basically copy
> what Smack and SELinux are already doing in their implementations of
> these hooks. I think the second option would theoretically result in
> nicer code, but it might require a bit more work, and you'd have to
> include the maintainers of the file locking code in the review of such
> refactoring and have them approve those changes. So if you want to get
> this patchset into the kernel quickly, the first option might be
> better for now?)
> 

I agree, let's extend landlock_file_security with a new "fown" pointer
to a Landlock domain. We'll need to call landlock_get_ruleset() in
hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
hook_file_free_security().

I would be nice to to replace the redundant informations in fown_struct
but that can wait.
Tahera Fahimi Aug. 7, 2024, 11:36 p.m. UTC | #6
On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > Currently, a sandbox process is not restricted to send a signal
> > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > Ability to sending a signal for a sandboxed process should be
> > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > we extend "scoped" field in a ruleset with
> > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > sending any signal from within a sandbox process to its
> > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > [...]
> > > > +       if (is_scoped)
> > > > +               return 0;
> > > > +
> > > > +       return -EPERM;
> > > > +}
> > > > +
> > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > +                                   struct fown_struct *fown, int signum)
> 
> I was wondering if we should handle this case, but I guess it makes
> sense to have a consistent policy for all kind of user-triggerable
> signals.
> 
> > > > +{
> > > > +       bool is_scoped;
> > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > >
> > > I'm not an expert on how the fowner stuff works, but I think this will
> > > probably give you "result = NULL" if the file owner PID has already
> > > exited, and then the following landlock_get_task_domain() would
> > > probably crash? But I'm not entirely sure about how this works.
> > >
> > > I think the intended way to use this hook would be to instead use the
> > > "file_set_fowner" hook to record the owning domain (though the setup
> > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > definitions of that hook. Or alternatively maybe it would be even
> > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > euid and then use the domain from those credentials for this hook...
> > > I'm not sure which of those would be easier.
> > 
> > (For what it's worth, I think the first option would probably be
> > easier to implement and ship for now, since you can basically copy
> > what Smack and SELinux are already doing in their implementations of
> > these hooks. I think the second option would theoretically result in
> > nicer code, but it might require a bit more work, and you'd have to
> > include the maintainers of the file locking code in the review of such
> > refactoring and have them approve those changes. So if you want to get
> > this patchset into the kernel quickly, the first option might be
> > better for now?)
> > 
> 
> I agree, let's extend landlock_file_security with a new "fown" pointer
> to a Landlock domain. We'll need to call landlock_get_ruleset() in
> hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> hook_file_free_security().
I think we should add a new hook (hook_file_set_owner()) to initialize
the "fown" pointer and call landlock_get_ruleset() in that? If we do not
have hook_file_set_owner to store domain in "fown", can you please give
me a hint on where to do that?
Thanks 
> I would be nice to to replace the redundant informations in fown_struct
> but that can wait.
Jann Horn Aug. 8, 2024, 1:10 a.m. UTC | #7
On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > Ability to sending a signal for a sandboxed process should be
> > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > we extend "scoped" field in a ruleset with
> > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > sending any signal from within a sandbox process to its
> > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > [...]
> > > > > +       if (is_scoped)
> > > > > +               return 0;
> > > > > +
> > > > > +       return -EPERM;
> > > > > +}
> > > > > +
> > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > +                                   struct fown_struct *fown, int signum)
> >
> > I was wondering if we should handle this case, but I guess it makes
> > sense to have a consistent policy for all kind of user-triggerable
> > signals.
> >
> > > > > +{
> > > > > +       bool is_scoped;
> > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > >
> > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > probably give you "result = NULL" if the file owner PID has already
> > > > exited, and then the following landlock_get_task_domain() would
> > > > probably crash? But I'm not entirely sure about how this works.
> > > >
> > > > I think the intended way to use this hook would be to instead use the
> > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > definitions of that hook. Or alternatively maybe it would be even
> > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > euid and then use the domain from those credentials for this hook...
> > > > I'm not sure which of those would be easier.
> > >
> > > (For what it's worth, I think the first option would probably be
> > > easier to implement and ship for now, since you can basically copy
> > > what Smack and SELinux are already doing in their implementations of
> > > these hooks. I think the second option would theoretically result in
> > > nicer code, but it might require a bit more work, and you'd have to
> > > include the maintainers of the file locking code in the review of such
> > > refactoring and have them approve those changes. So if you want to get
> > > this patchset into the kernel quickly, the first option might be
> > > better for now?)
> > >
> >
> > I agree, let's extend landlock_file_security with a new "fown" pointer
> > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > hook_file_free_security().
> I think we should add a new hook (hook_file_set_owner()) to initialize
> the "fown" pointer and call landlock_get_ruleset() in that?

Yeah. Initialize the pointer in the file_set_fowner hook, and read the
pointer in the file_send_sigiotask hook.

Note that in the file_set_fowner hook, you'll probably need to use
both landlock_get_ruleset() (to take a reference on the ruleset you're
storing in the fown pointer) and landlock_put_ruleset() (to drop the
reference to the ruleset that the fown pointer was pointing to
before). And you'll need to use some kind of lock to protect the fown
pointer - either by adding an appropriate lock next to your fown
pointer or by using some appropriate existing lock in "struct file".
Probably it's cleanest to have your own lock for this? (This lock will
have to be something like a spinlock, not a mutex, since you need to
be able to acquire it in the file_set_fowner hook, which runs inside
an RCU read-side critical section, where sleeping is forbidden -
acquiring a mutex can sleep and therefore is forbidden in this
context, acquiring a spinlock can't sleep.)

> If we do not
> have hook_file_set_owner to store domain in "fown", can you please give
> me a hint on where to do that?
> Thanks
> > I would be nice to to replace the redundant informations in fown_struct
> > but that can wait.
Mickaël Salaün Aug. 8, 2024, 2:09 p.m. UTC | #8
On Thu, Aug 08, 2024 at 03:10:54AM +0200, Jann Horn wrote:
> On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > > Ability to sending a signal for a sandboxed process should be
> > > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > > we extend "scoped" field in a ruleset with
> > > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > > sending any signal from within a sandbox process to its
> > > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > > [...]
> > > > > > +       if (is_scoped)
> > > > > > +               return 0;
> > > > > > +
> > > > > > +       return -EPERM;
> > > > > > +}
> > > > > > +
> > > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > > +                                   struct fown_struct *fown, int signum)
> > >
> > > I was wondering if we should handle this case, but I guess it makes
> > > sense to have a consistent policy for all kind of user-triggerable
> > > signals.
> > >
> > > > > > +{
> > > > > > +       bool is_scoped;
> > > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > > >
> > > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > > probably give you "result = NULL" if the file owner PID has already
> > > > > exited, and then the following landlock_get_task_domain() would
> > > > > probably crash? But I'm not entirely sure about how this works.
> > > > >
> > > > > I think the intended way to use this hook would be to instead use the
> > > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > > definitions of that hook. Or alternatively maybe it would be even
> > > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > > euid and then use the domain from those credentials for this hook...
> > > > > I'm not sure which of those would be easier.
> > > >
> > > > (For what it's worth, I think the first option would probably be
> > > > easier to implement and ship for now, since you can basically copy
> > > > what Smack and SELinux are already doing in their implementations of
> > > > these hooks. I think the second option would theoretically result in
> > > > nicer code, but it might require a bit more work, and you'd have to
> > > > include the maintainers of the file locking code in the review of such
> > > > refactoring and have them approve those changes. So if you want to get
> > > > this patchset into the kernel quickly, the first option might be
> > > > better for now?)
> > > >
> > >
> > > I agree, let's extend landlock_file_security with a new "fown" pointer
> > > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > > hook_file_free_security().
> > I think we should add a new hook (hook_file_set_owner()) to initialize
> > the "fown" pointer and call landlock_get_ruleset() in that?
> 
> Yeah. Initialize the pointer in the file_set_fowner hook, and read the
> pointer in the file_send_sigiotask hook.
> 
> Note that in the file_set_fowner hook, you'll probably need to use
> both landlock_get_ruleset() (to take a reference on the ruleset you're
> storing in the fown pointer) and landlock_put_ruleset() (to drop the
> reference to the ruleset that the fown pointer was pointing to
> before). And you'll need to use some kind of lock to protect the fown
> pointer - either by adding an appropriate lock next to your fown
> pointer or by using some appropriate existing lock in "struct file".
> Probably it's cleanest to have your own lock for this? (This lock will
> have to be something like a spinlock, not a mutex, since you need to
> be able to acquire it in the file_set_fowner hook, which runs inside
> an RCU read-side critical section, where sleeping is forbidden -
> acquiring a mutex can sleep and therefore is forbidden in this
> context, acquiring a spinlock can't sleep.)

Yes, I think this should work for file_set_fowner:

struct landlock_ruleset *prev_dom, *new_dom;

new_dom = landlock_get_current_domain();
landlock_get_ruleset(new_dom);

/* Cf. f_modown() */
write_lock_irq(&filp->f_owner.lock);
prev_dom = rcu_replace_pointer(&landlock_file(file)->fown_domain,
	new_dom, lockdep_is_held(&filp->f_owner.lock));
write_unlock_irq(&filp->f_owner.lock);

landlock_put_ruleset_rcu(prev_dom);


With landlock_put_ruleset_rcu() define with this:

diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
index a93bdbf52fff..897116205520 100644
--- a/security/landlock/ruleset.c
+++ b/security/landlock/ruleset.c
@@ -524,6 +524,20 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
 	}
 }
 
+static void free_ruleset_rcu(struct rcu_head *const head)
+{
+	struct landlock_ruleset *ruleset;
+
+	ruleset = container_of(head, struct landlock_ruleset, rcu);
+	free_ruleset(ruleset);
+}
+
+void landlock_put_ruleset_rcu(struct landlock_ruleset *const ruleset)
+{
+	if (ruleset && refcount_dec_and_test(&ruleset->usage))
+		call_rcu(&ruleset->rcu, free_ruleset_rcu);
+}
+
 /**
  * landlock_merge_ruleset - Merge a ruleset with a domain
  *
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
index c749fa0b3ecd..c930b39174b0 100644
--- a/security/landlock/ruleset.h
+++ b/security/landlock/ruleset.h
@@ -190,19 +190,35 @@ struct landlock_ruleset {
 		 * @work_free: Enables to free a ruleset within a lockless
 		 * section.  This is only used by
 		 * landlock_put_ruleset_deferred() when @usage reaches zero.
-		 * The fields @lock, @usage, @num_rules, @num_layers and
+		 * The fields @rcu, @lock, @usage, @num_rules, @num_layers and
 		 * @access_masks are then unused.
 		 */
 		struct work_struct work_free;
 		struct {
-			/**
-			 * @lock: Protects against concurrent modifications of
-			 * @root, if @usage is greater than zero.
-			 */
-			struct mutex lock;
+			union {
+				/**
+				 * @rcu: Protects RCU read-side critical
+				 * sections.  This is only used by
+				 * landlock_put_ruleset_rcu() when @usage
+				 * reaches zero.
+				 *
+				 * Only used for domains.
+				 */
+				struct rcu_head rcu;
+				/**
+				 * @lock: Protects against concurrent
+				 * modifications of @root_inode and
+				 * @root_net_port, if @usage is greater than
+				 * zero.
+				 *
+				 * Only used for rulesets.
+				 */
+				struct mutex lock;
+			};
 			/**
 			 * @usage: Number of processes (i.e. domains) or file
-			 * descriptors referencing this ruleset.
+			 * descriptors referencing this ruleset.  It can be
+			 * zero in RCU read-side critical sections.
 			 */
 			refcount_t usage;
 			/**
@@ -241,6 +257,7 @@ landlock_create_ruleset(const access_mask_t access_mask_fs,
 
 void landlock_put_ruleset(struct landlock_ruleset *const ruleset);
 void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset);
+void landlock_put_ruleset_rcu(struct landlock_ruleset *const ruleset);
 
 int landlock_insert_rule(struct landlock_ruleset *const ruleset,
 			 const struct landlock_id id,
Jann Horn Aug. 8, 2024, 2:42 p.m. UTC | #9
On Thu, Aug 8, 2024 at 4:09 PM Mickaël Salaün <mic@digikod.net> wrote:
> On Thu, Aug 08, 2024 at 03:10:54AM +0200, Jann Horn wrote:
> > On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > > > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > > > Ability to sending a signal for a sandboxed process should be
> > > > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > > > we extend "scoped" field in a ruleset with
> > > > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > > > sending any signal from within a sandbox process to its
> > > > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > > > [...]
> > > > > > > +       if (is_scoped)
> > > > > > > +               return 0;
> > > > > > > +
> > > > > > > +       return -EPERM;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > > > +                                   struct fown_struct *fown, int signum)
> > > >
> > > > I was wondering if we should handle this case, but I guess it makes
> > > > sense to have a consistent policy for all kind of user-triggerable
> > > > signals.
> > > >
> > > > > > > +{
> > > > > > > +       bool is_scoped;
> > > > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > > > >
> > > > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > > > probably give you "result = NULL" if the file owner PID has already
> > > > > > exited, and then the following landlock_get_task_domain() would
> > > > > > probably crash? But I'm not entirely sure about how this works.
> > > > > >
> > > > > > I think the intended way to use this hook would be to instead use the
> > > > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > > > definitions of that hook. Or alternatively maybe it would be even
> > > > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > > > euid and then use the domain from those credentials for this hook...
> > > > > > I'm not sure which of those would be easier.
> > > > >
> > > > > (For what it's worth, I think the first option would probably be
> > > > > easier to implement and ship for now, since you can basically copy
> > > > > what Smack and SELinux are already doing in their implementations of
> > > > > these hooks. I think the second option would theoretically result in
> > > > > nicer code, but it might require a bit more work, and you'd have to
> > > > > include the maintainers of the file locking code in the review of such
> > > > > refactoring and have them approve those changes. So if you want to get
> > > > > this patchset into the kernel quickly, the first option might be
> > > > > better for now?)
> > > > >
> > > >
> > > > I agree, let's extend landlock_file_security with a new "fown" pointer
> > > > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > > > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > > > hook_file_free_security().
> > > I think we should add a new hook (hook_file_set_owner()) to initialize
> > > the "fown" pointer and call landlock_get_ruleset() in that?
> >
> > Yeah. Initialize the pointer in the file_set_fowner hook, and read the
> > pointer in the file_send_sigiotask hook.
> >
> > Note that in the file_set_fowner hook, you'll probably need to use
> > both landlock_get_ruleset() (to take a reference on the ruleset you're
> > storing in the fown pointer) and landlock_put_ruleset() (to drop the
> > reference to the ruleset that the fown pointer was pointing to
> > before). And you'll need to use some kind of lock to protect the fown
> > pointer - either by adding an appropriate lock next to your fown
> > pointer or by using some appropriate existing lock in "struct file".
> > Probably it's cleanest to have your own lock for this? (This lock will
> > have to be something like a spinlock, not a mutex, since you need to
> > be able to acquire it in the file_set_fowner hook, which runs inside
> > an RCU read-side critical section, where sleeping is forbidden -
> > acquiring a mutex can sleep and therefore is forbidden in this
> > context, acquiring a spinlock can't sleep.)
>
> Yes, I think this should work for file_set_fowner:
>
> struct landlock_ruleset *prev_dom, *new_dom;
>
> new_dom = landlock_get_current_domain();
> landlock_get_ruleset(new_dom);
>
> /* Cf. f_modown() */
> write_lock_irq(&filp->f_owner.lock);
> prev_dom = rcu_replace_pointer(&landlock_file(file)->fown_domain,
>         new_dom, lockdep_is_held(&filp->f_owner.lock));
> write_unlock_irq(&filp->f_owner.lock);
>
> landlock_put_ruleset_rcu(prev_dom);
>
>
> With landlock_put_ruleset_rcu() define with this:
>
> diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
> index a93bdbf52fff..897116205520 100644
> --- a/security/landlock/ruleset.c
> +++ b/security/landlock/ruleset.c
> @@ -524,6 +524,20 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
>         }
>  }
>
> +static void free_ruleset_rcu(struct rcu_head *const head)
> +{
> +       struct landlock_ruleset *ruleset;
> +
> +       ruleset = container_of(head, struct landlock_ruleset, rcu);
> +       free_ruleset(ruleset);
> +}

free_ruleset() can block but RCU callbacks aren't allowed to block,
that's why landlock_put_ruleset_deferred() exists.

> +
> +void landlock_put_ruleset_rcu(struct landlock_ruleset *const ruleset)
> +{
> +       if (ruleset && refcount_dec_and_test(&ruleset->usage))
> +               call_rcu(&ruleset->rcu, free_ruleset_rcu);
> +}

No, this pattern of combining refcounting and RCU doesn't work.

One legal pattern is:
*The entire object* is subject to RCU; any refcount decrement that
drops the refcount to 0 does call_rcu().
(This is the usual RCU refcounting pattern in the kernel.)

Another legal pattern is:
One particular *reference* is subject to RCU; when dropping this
reference, *the refcount decrement is delayed with call_rcu()*.
(This is basically the RCU pattern used for stuff like the reference
from "struct pid" to "struct task_struct".)

But you can't use call_rcu() depending on whether the last dropped
reference happened to be a reference that required RCU; what if the
refcount is 2, then you first call landlock_put_ruleset_rcu() which
decrements the refcount to 1, and immediately afterwards you call
landlock_put_ruleset() which drops the refcount to 0 and frees the
object without waiting for an RCU grace period? Like so:

thread A         thread B
========         ========
rcu_read_lock()
ruleset = rcu_dereference(...->fown_domain)
                 ruleset = rcu_replace_pointer(...->fown_domain, new_dom, ...)
                 landlock_put_ruleset_rcu(ruleset)
                 landlock_put_ruleset(ruleset)
                   free_ruleset(ruleset)
                     kfree(ruleset)
access ruleset [UAF]
rcu_read_unlock()

So if you want to use RCU lifetime for this, I think you'll have to
turn landlock_put_ruleset() and landlock_put_ruleset_deferred() into
one common function that always, when reaching refcount 0, schedules
an RCU callback which then schedules a work_struct which then does
free_ruleset().

I think that would be a little ugly, and it would look nicer to just
use normal locking in the file_send_sigiotask hook?
Mickaël Salaün Aug. 9, 2024, 10:59 a.m. UTC | #10
On Thu, Aug 08, 2024 at 04:42:23PM +0200, Jann Horn wrote:
> On Thu, Aug 8, 2024 at 4:09 PM Mickaël Salaün <mic@digikod.net> wrote:
> > On Thu, Aug 08, 2024 at 03:10:54AM +0200, Jann Horn wrote:
> > > On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > > > > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > > > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > > > > Ability to sending a signal for a sandboxed process should be
> > > > > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > > > > we extend "scoped" field in a ruleset with
> > > > > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > > > > sending any signal from within a sandbox process to its
> > > > > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > > > > [...]
> > > > > > > > +       if (is_scoped)
> > > > > > > > +               return 0;
> > > > > > > > +
> > > > > > > > +       return -EPERM;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > > > > +                                   struct fown_struct *fown, int signum)
> > > > >
> > > > > I was wondering if we should handle this case, but I guess it makes
> > > > > sense to have a consistent policy for all kind of user-triggerable
> > > > > signals.
> > > > >
> > > > > > > > +{
> > > > > > > > +       bool is_scoped;
> > > > > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > > > > >
> > > > > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > > > > probably give you "result = NULL" if the file owner PID has already
> > > > > > > exited, and then the following landlock_get_task_domain() would
> > > > > > > probably crash? But I'm not entirely sure about how this works.
> > > > > > >
> > > > > > > I think the intended way to use this hook would be to instead use the
> > > > > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > > > > definitions of that hook. Or alternatively maybe it would be even
> > > > > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > > > > euid and then use the domain from those credentials for this hook...
> > > > > > > I'm not sure which of those would be easier.
> > > > > >
> > > > > > (For what it's worth, I think the first option would probably be
> > > > > > easier to implement and ship for now, since you can basically copy
> > > > > > what Smack and SELinux are already doing in their implementations of
> > > > > > these hooks. I think the second option would theoretically result in
> > > > > > nicer code, but it might require a bit more work, and you'd have to
> > > > > > include the maintainers of the file locking code in the review of such
> > > > > > refactoring and have them approve those changes. So if you want to get
> > > > > > this patchset into the kernel quickly, the first option might be
> > > > > > better for now?)
> > > > > >
> > > > >
> > > > > I agree, let's extend landlock_file_security with a new "fown" pointer
> > > > > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > > > > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > > > > hook_file_free_security().
> > > > I think we should add a new hook (hook_file_set_owner()) to initialize
> > > > the "fown" pointer and call landlock_get_ruleset() in that?
> > >
> > > Yeah. Initialize the pointer in the file_set_fowner hook, and read the
> > > pointer in the file_send_sigiotask hook.
> > >
> > > Note that in the file_set_fowner hook, you'll probably need to use
> > > both landlock_get_ruleset() (to take a reference on the ruleset you're
> > > storing in the fown pointer) and landlock_put_ruleset() (to drop the
> > > reference to the ruleset that the fown pointer was pointing to
> > > before). And you'll need to use some kind of lock to protect the fown
> > > pointer - either by adding an appropriate lock next to your fown
> > > pointer or by using some appropriate existing lock in "struct file".
> > > Probably it's cleanest to have your own lock for this? (This lock will
> > > have to be something like a spinlock, not a mutex, since you need to
> > > be able to acquire it in the file_set_fowner hook, which runs inside
> > > an RCU read-side critical section, where sleeping is forbidden -
> > > acquiring a mutex can sleep and therefore is forbidden in this
> > > context, acquiring a spinlock can't sleep.)
> >
> > Yes, I think this should work for file_set_fowner:
> >
> > struct landlock_ruleset *prev_dom, *new_dom;
> >
> > new_dom = landlock_get_current_domain();
> > landlock_get_ruleset(new_dom);
> >
> > /* Cf. f_modown() */
> > write_lock_irq(&filp->f_owner.lock);
> > prev_dom = rcu_replace_pointer(&landlock_file(file)->fown_domain,
> >         new_dom, lockdep_is_held(&filp->f_owner.lock));
> > write_unlock_irq(&filp->f_owner.lock);
> >
> > landlock_put_ruleset_rcu(prev_dom);
> >
> >
> > With landlock_put_ruleset_rcu() define with this:
> >
> > diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
> > index a93bdbf52fff..897116205520 100644
> > --- a/security/landlock/ruleset.c
> > +++ b/security/landlock/ruleset.c
> > @@ -524,6 +524,20 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
> >         }
> >  }
> >
> > +static void free_ruleset_rcu(struct rcu_head *const head)
> > +{
> > +       struct landlock_ruleset *ruleset;
> > +
> > +       ruleset = container_of(head, struct landlock_ruleset, rcu);
> > +       free_ruleset(ruleset);
> > +}
> 
> free_ruleset() can block but RCU callbacks aren't allowed to block,
> that's why landlock_put_ruleset_deferred() exists.

Yes, but landlock_put_ruleset_deferred() doesn't wait for RCU read-side
critical sections.

> 
> > +
> > +void landlock_put_ruleset_rcu(struct landlock_ruleset *const ruleset)
> > +{
> > +       if (ruleset && refcount_dec_and_test(&ruleset->usage))
> > +               call_rcu(&ruleset->rcu, free_ruleset_rcu);
> > +}
> 
> No, this pattern of combining refcounting and RCU doesn't work.
> 
> One legal pattern is:
> *The entire object* is subject to RCU; any refcount decrement that
> drops the refcount to 0 does call_rcu().
> (This is the usual RCU refcounting pattern in the kernel.)
> 
> Another legal pattern is:
> One particular *reference* is subject to RCU; when dropping this
> reference, *the refcount decrement is delayed with call_rcu()*.
> (This is basically the RCU pattern used for stuff like the reference
> from "struct pid" to "struct task_struct".)
> 
> But you can't use call_rcu() depending on whether the last dropped
> reference happened to be a reference that required RCU; what if the
> refcount is 2, then you first call landlock_put_ruleset_rcu() which
> decrements the refcount to 1, and immediately afterwards you call
> landlock_put_ruleset() which drops the refcount to 0 and frees the
> object without waiting for an RCU grace period? Like so:
> 
> thread A         thread B
> ========         ========
> rcu_read_lock()
> ruleset = rcu_dereference(...->fown_domain)
>                  ruleset = rcu_replace_pointer(...->fown_domain, new_dom, ...)
>                  landlock_put_ruleset_rcu(ruleset)
>                  landlock_put_ruleset(ruleset)
>                    free_ruleset(ruleset)
>                      kfree(ruleset)
> access ruleset [UAF]
> rcu_read_unlock()

Indeed

> 
> So if you want to use RCU lifetime for this, I think you'll have to
> turn landlock_put_ruleset() and landlock_put_ruleset_deferred() into
> one common function that always, when reaching refcount 0, schedules
> an RCU callback which then schedules a work_struct which then does
> free_ruleset().
> 
> I think that would be a little ugly, and it would look nicer to just
> use normal locking in the file_send_sigiotask hook?

I don't see how we can do that without delaying the free_ruleset() call
to after the RCU read-side critical section in f_setown().

What about calling refcount_dec_and_test() in free_ruleset_rcu()?  That
would almost always queue this call but it looks safe.

An alternative might be to call synchronize_rcu() in free_ruleset(), but
it's a big ugly too.

BTW, I don't understand why neither SELinux nor Smack use (explicit)
atomic operations nor lock.  And it looks weird that
security_file_set_fowner() isn't called by f_modown() with the same
locking to avoid races.
Mickaël Salaün Aug. 9, 2024, 12:40 p.m. UTC | #11
On Fri, Aug 09, 2024 at 12:59:48PM +0200, Mickaël Salaün wrote:
> On Thu, Aug 08, 2024 at 04:42:23PM +0200, Jann Horn wrote:
> > On Thu, Aug 8, 2024 at 4:09 PM Mickaël Salaün <mic@digikod.net> wrote:
> > > On Thu, Aug 08, 2024 at 03:10:54AM +0200, Jann Horn wrote:
> > > > On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > > > > > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > > > > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > > > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > > > > > Ability to sending a signal for a sandboxed process should be
> > > > > > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > > > > > we extend "scoped" field in a ruleset with
> > > > > > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > > > > > sending any signal from within a sandbox process to its
> > > > > > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > > > > > [...]
> > > > > > > > > +       if (is_scoped)
> > > > > > > > > +               return 0;
> > > > > > > > > +
> > > > > > > > > +       return -EPERM;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > > > > > +                                   struct fown_struct *fown, int signum)
> > > > > >
> > > > > > I was wondering if we should handle this case, but I guess it makes
> > > > > > sense to have a consistent policy for all kind of user-triggerable
> > > > > > signals.
> > > > > >
> > > > > > > > > +{
> > > > > > > > > +       bool is_scoped;
> > > > > > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > > > > > >
> > > > > > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > > > > > probably give you "result = NULL" if the file owner PID has already
> > > > > > > > exited, and then the following landlock_get_task_domain() would
> > > > > > > > probably crash? But I'm not entirely sure about how this works.
> > > > > > > >
> > > > > > > > I think the intended way to use this hook would be to instead use the
> > > > > > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > > > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > > > > > definitions of that hook. Or alternatively maybe it would be even
> > > > > > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > > > > > euid and then use the domain from those credentials for this hook...
> > > > > > > > I'm not sure which of those would be easier.
> > > > > > >
> > > > > > > (For what it's worth, I think the first option would probably be
> > > > > > > easier to implement and ship for now, since you can basically copy
> > > > > > > what Smack and SELinux are already doing in their implementations of
> > > > > > > these hooks. I think the second option would theoretically result in
> > > > > > > nicer code, but it might require a bit more work, and you'd have to
> > > > > > > include the maintainers of the file locking code in the review of such
> > > > > > > refactoring and have them approve those changes. So if you want to get
> > > > > > > this patchset into the kernel quickly, the first option might be
> > > > > > > better for now?)
> > > > > > >
> > > > > >
> > > > > > I agree, let's extend landlock_file_security with a new "fown" pointer
> > > > > > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > > > > > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > > > > > hook_file_free_security().
> > > > > I think we should add a new hook (hook_file_set_owner()) to initialize
> > > > > the "fown" pointer and call landlock_get_ruleset() in that?
> > > >
> > > > Yeah. Initialize the pointer in the file_set_fowner hook, and read the
> > > > pointer in the file_send_sigiotask hook.
> > > >
> > > > Note that in the file_set_fowner hook, you'll probably need to use
> > > > both landlock_get_ruleset() (to take a reference on the ruleset you're
> > > > storing in the fown pointer) and landlock_put_ruleset() (to drop the
> > > > reference to the ruleset that the fown pointer was pointing to
> > > > before). And you'll need to use some kind of lock to protect the fown
> > > > pointer - either by adding an appropriate lock next to your fown
> > > > pointer or by using some appropriate existing lock in "struct file".
> > > > Probably it's cleanest to have your own lock for this? (This lock will
> > > > have to be something like a spinlock, not a mutex, since you need to
> > > > be able to acquire it in the file_set_fowner hook, which runs inside
> > > > an RCU read-side critical section, where sleeping is forbidden -
> > > > acquiring a mutex can sleep and therefore is forbidden in this
> > > > context, acquiring a spinlock can't sleep.)
> > >
> > > Yes, I think this should work for file_set_fowner:
> > >
> > > struct landlock_ruleset *prev_dom, *new_dom;
> > >
> > > new_dom = landlock_get_current_domain();
> > > landlock_get_ruleset(new_dom);
> > >
> > > /* Cf. f_modown() */
> > > write_lock_irq(&filp->f_owner.lock);
> > > prev_dom = rcu_replace_pointer(&landlock_file(file)->fown_domain,
> > >         new_dom, lockdep_is_held(&filp->f_owner.lock));
> > > write_unlock_irq(&filp->f_owner.lock);
> > >
> > > landlock_put_ruleset_rcu(prev_dom);
> > >
> > >
> > > With landlock_put_ruleset_rcu() define with this:
> > >
> > > diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
> > > index a93bdbf52fff..897116205520 100644
> > > --- a/security/landlock/ruleset.c
> > > +++ b/security/landlock/ruleset.c
> > > @@ -524,6 +524,20 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
> > >         }
> > >  }
> > >
> > > +static void free_ruleset_rcu(struct rcu_head *const head)
> > > +{
> > > +       struct landlock_ruleset *ruleset;
> > > +
> > > +       ruleset = container_of(head, struct landlock_ruleset, rcu);
> > > +       free_ruleset(ruleset);
> > > +}
> > 
> > free_ruleset() can block but RCU callbacks aren't allowed to block,
> > that's why landlock_put_ruleset_deferred() exists.
> 
> Yes, but landlock_put_ruleset_deferred() doesn't wait for RCU read-side
> critical sections.
> 
> > 
> > > +
> > > +void landlock_put_ruleset_rcu(struct landlock_ruleset *const ruleset)
> > > +{
> > > +       if (ruleset && refcount_dec_and_test(&ruleset->usage))
> > > +               call_rcu(&ruleset->rcu, free_ruleset_rcu);
> > > +}
> > 
> > No, this pattern of combining refcounting and RCU doesn't work.
> > 
> > One legal pattern is:
> > *The entire object* is subject to RCU; any refcount decrement that
> > drops the refcount to 0 does call_rcu().
> > (This is the usual RCU refcounting pattern in the kernel.)
> > 
> > Another legal pattern is:
> > One particular *reference* is subject to RCU; when dropping this
> > reference, *the refcount decrement is delayed with call_rcu()*.
> > (This is basically the RCU pattern used for stuff like the reference
> > from "struct pid" to "struct task_struct".)
> > 
> > But you can't use call_rcu() depending on whether the last dropped
> > reference happened to be a reference that required RCU; what if the
> > refcount is 2, then you first call landlock_put_ruleset_rcu() which
> > decrements the refcount to 1, and immediately afterwards you call
> > landlock_put_ruleset() which drops the refcount to 0 and frees the
> > object without waiting for an RCU grace period? Like so:
> > 
> > thread A         thread B
> > ========         ========
> > rcu_read_lock()
> > ruleset = rcu_dereference(...->fown_domain)
> >                  ruleset = rcu_replace_pointer(...->fown_domain, new_dom, ...)
> >                  landlock_put_ruleset_rcu(ruleset)
> >                  landlock_put_ruleset(ruleset)
> >                    free_ruleset(ruleset)
> >                      kfree(ruleset)
> > access ruleset [UAF]
> > rcu_read_unlock()
> 
> Indeed
> 
> > 
> > So if you want to use RCU lifetime for this, I think you'll have to
> > turn landlock_put_ruleset() and landlock_put_ruleset_deferred() into
> > one common function that always, when reaching refcount 0, schedules
> > an RCU callback which then schedules a work_struct which then does
> > free_ruleset().
> > 
> > I think that would be a little ugly, and it would look nicer to just
> > use normal locking in the file_send_sigiotask hook?
> 
> I don't see how we can do that without delaying the free_ruleset() call
> to after the RCU read-side critical section in f_setown().
> 
> What about calling refcount_dec_and_test() in free_ruleset_rcu()?  That
> would almost always queue this call but it looks safe.

That's the second pattern you pointed out just above.  Because we should
only use the related struct rcu_head for this call site (which is
protected with f_owner.lock), we should make that explicit with a
"_fown" suffix for the function (landlock_put_ruleset_rcu_fown) and the
ruleset's field (rcu_fown).

> 
> An alternative might be to call synchronize_rcu() in free_ruleset(), but
> it's a big ugly too.
> 
> BTW, I don't understand why neither SELinux nor Smack use (explicit)
> atomic operations nor lock.  And it looks weird that
> security_file_set_fowner() isn't called by f_modown() with the same
> locking to avoid races.
Jann Horn Aug. 9, 2024, 12:44 p.m. UTC | #12
On Fri, Aug 9, 2024 at 12:59 PM Mickaël Salaün <mic@digikod.net> wrote:
> On Thu, Aug 08, 2024 at 04:42:23PM +0200, Jann Horn wrote:
> > On Thu, Aug 8, 2024 at 4:09 PM Mickaël Salaün <mic@digikod.net> wrote:
> > > On Thu, Aug 08, 2024 at 03:10:54AM +0200, Jann Horn wrote:
> > > > On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > > > > > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > > > > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > > > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > > > > > Ability to sending a signal for a sandboxed process should be
> > > > > > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > > > > > we extend "scoped" field in a ruleset with
> > > > > > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > > > > > sending any signal from within a sandbox process to its
> > > > > > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > > > > > [...]
> > > > > > > > > +       if (is_scoped)
> > > > > > > > > +               return 0;
> > > > > > > > > +
> > > > > > > > > +       return -EPERM;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > > > > > +                                   struct fown_struct *fown, int signum)
> > > > > >
> > > > > > I was wondering if we should handle this case, but I guess it makes
> > > > > > sense to have a consistent policy for all kind of user-triggerable
> > > > > > signals.
> > > > > >
> > > > > > > > > +{
> > > > > > > > > +       bool is_scoped;
> > > > > > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > > > > > >
> > > > > > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > > > > > probably give you "result = NULL" if the file owner PID has already
> > > > > > > > exited, and then the following landlock_get_task_domain() would
> > > > > > > > probably crash? But I'm not entirely sure about how this works.
> > > > > > > >
> > > > > > > > I think the intended way to use this hook would be to instead use the
> > > > > > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > > > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > > > > > definitions of that hook. Or alternatively maybe it would be even
> > > > > > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > > > > > euid and then use the domain from those credentials for this hook...
> > > > > > > > I'm not sure which of those would be easier.
> > > > > > >
> > > > > > > (For what it's worth, I think the first option would probably be
> > > > > > > easier to implement and ship for now, since you can basically copy
> > > > > > > what Smack and SELinux are already doing in their implementations of
> > > > > > > these hooks. I think the second option would theoretically result in
> > > > > > > nicer code, but it might require a bit more work, and you'd have to
> > > > > > > include the maintainers of the file locking code in the review of such
> > > > > > > refactoring and have them approve those changes. So if you want to get
> > > > > > > this patchset into the kernel quickly, the first option might be
> > > > > > > better for now?)
> > > > > > >
> > > > > >
> > > > > > I agree, let's extend landlock_file_security with a new "fown" pointer
> > > > > > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > > > > > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > > > > > hook_file_free_security().
> > > > > I think we should add a new hook (hook_file_set_owner()) to initialize
> > > > > the "fown" pointer and call landlock_get_ruleset() in that?
> > > >
> > > > Yeah. Initialize the pointer in the file_set_fowner hook, and read the
> > > > pointer in the file_send_sigiotask hook.
> > > >
> > > > Note that in the file_set_fowner hook, you'll probably need to use
> > > > both landlock_get_ruleset() (to take a reference on the ruleset you're
> > > > storing in the fown pointer) and landlock_put_ruleset() (to drop the
> > > > reference to the ruleset that the fown pointer was pointing to
> > > > before). And you'll need to use some kind of lock to protect the fown
> > > > pointer - either by adding an appropriate lock next to your fown
> > > > pointer or by using some appropriate existing lock in "struct file".
> > > > Probably it's cleanest to have your own lock for this? (This lock will
> > > > have to be something like a spinlock, not a mutex, since you need to
> > > > be able to acquire it in the file_set_fowner hook, which runs inside
> > > > an RCU read-side critical section, where sleeping is forbidden -
> > > > acquiring a mutex can sleep and therefore is forbidden in this
> > > > context, acquiring a spinlock can't sleep.)
> > >
> > > Yes, I think this should work for file_set_fowner:
> > >
> > > struct landlock_ruleset *prev_dom, *new_dom;
> > >
> > > new_dom = landlock_get_current_domain();
> > > landlock_get_ruleset(new_dom);
> > >
> > > /* Cf. f_modown() */
> > > write_lock_irq(&filp->f_owner.lock);
> > > prev_dom = rcu_replace_pointer(&landlock_file(file)->fown_domain,
> > >         new_dom, lockdep_is_held(&filp->f_owner.lock));
> > > write_unlock_irq(&filp->f_owner.lock);
> > >
> > > landlock_put_ruleset_rcu(prev_dom);
> > >
> > >
> > > With landlock_put_ruleset_rcu() define with this:
> > >
> > > diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
> > > index a93bdbf52fff..897116205520 100644
> > > --- a/security/landlock/ruleset.c
> > > +++ b/security/landlock/ruleset.c
> > > @@ -524,6 +524,20 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
> > >         }
> > >  }
> > >
> > > +static void free_ruleset_rcu(struct rcu_head *const head)
> > > +{
> > > +       struct landlock_ruleset *ruleset;
> > > +
> > > +       ruleset = container_of(head, struct landlock_ruleset, rcu);
> > > +       free_ruleset(ruleset);
> > > +}
> >
> > free_ruleset() can block but RCU callbacks aren't allowed to block,
> > that's why landlock_put_ruleset_deferred() exists.
>
> Yes, but landlock_put_ruleset_deferred() doesn't wait for RCU read-side
> critical sections.

Ah, I phrased that badly - I didn't mean to suggest that you should
use landlock_put_ruleset_deferred() as a replacement for call_rcu().

[...]
> > So if you want to use RCU lifetime for this, I think you'll have to
> > turn landlock_put_ruleset() and landlock_put_ruleset_deferred() into
> > one common function that always, when reaching refcount 0, schedules
> > an RCU callback which then schedules a work_struct which then does
> > free_ruleset().
> >
> > I think that would be a little ugly, and it would look nicer to just
> > use normal locking in the file_send_sigiotask hook?
>
> I don't see how we can do that without delaying the free_ruleset() call
> to after the RCU read-side critical section in f_setown().

It should work if you used landlock_put_ruleset_deferred() instead of
landlock_put_ruleset().

> What about calling refcount_dec_and_test() in free_ruleset_rcu()?  That
> would almost always queue this call but it looks safe.

Every queued RCU invocation needs to have its own rcu_head - I think
the approach you're suggesting could end up queuing the same rcu_head
multiple times?

> An alternative might be to call synchronize_rcu() in free_ruleset(), but
> it's a big ugly too.
>
> BTW, I don't understand why neither SELinux nor Smack use (explicit)
> atomic operations nor lock.

Yeah, I think they're sloppy and kinda wrong - but it sorta works in
practice mostly because they don't have to do any refcounting around
this?

> And it looks weird that
> security_file_set_fowner() isn't called by f_modown() with the same
> locking to avoid races.

True. I imagine maybe the thought behind this design could have been
that LSMs should have their own locking, and that calling an LSM hook
with IRQs off is a little weird? But the way the LSMs actually use the
hook now, it might make sense to call the LSM with the lock held and
IRQs off...
Mickaël Salaün Aug. 9, 2024, 12:45 p.m. UTC | #13
On Fri, Aug 09, 2024 at 02:40:17PM +0200, Mickaël Salaün wrote:
> On Fri, Aug 09, 2024 at 12:59:48PM +0200, Mickaël Salaün wrote:
> > On Thu, Aug 08, 2024 at 04:42:23PM +0200, Jann Horn wrote:
> > > On Thu, Aug 8, 2024 at 4:09 PM Mickaël Salaün <mic@digikod.net> wrote:
> > > > On Thu, Aug 08, 2024 at 03:10:54AM +0200, Jann Horn wrote:
> > > > > On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > > > > > > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > > > > > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > > > > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > > > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > > > > > > Ability to sending a signal for a sandboxed process should be
> > > > > > > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > > > > > > we extend "scoped" field in a ruleset with
> > > > > > > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > > > > > > sending any signal from within a sandbox process to its
> > > > > > > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > > > > > > [...]
> > > > > > > > > > +       if (is_scoped)
> > > > > > > > > > +               return 0;
> > > > > > > > > > +
> > > > > > > > > > +       return -EPERM;
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > > > > > > +                                   struct fown_struct *fown, int signum)
> > > > > > >
> > > > > > > I was wondering if we should handle this case, but I guess it makes
> > > > > > > sense to have a consistent policy for all kind of user-triggerable
> > > > > > > signals.
> > > > > > >
> > > > > > > > > > +{
> > > > > > > > > > +       bool is_scoped;
> > > > > > > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > > > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > > > > > > >
> > > > > > > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > > > > > > probably give you "result = NULL" if the file owner PID has already
> > > > > > > > > exited, and then the following landlock_get_task_domain() would
> > > > > > > > > probably crash? But I'm not entirely sure about how this works.
> > > > > > > > >
> > > > > > > > > I think the intended way to use this hook would be to instead use the
> > > > > > > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > > > > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > > > > > > definitions of that hook. Or alternatively maybe it would be even
> > > > > > > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > > > > > > euid and then use the domain from those credentials for this hook...
> > > > > > > > > I'm not sure which of those would be easier.
> > > > > > > >
> > > > > > > > (For what it's worth, I think the first option would probably be
> > > > > > > > easier to implement and ship for now, since you can basically copy
> > > > > > > > what Smack and SELinux are already doing in their implementations of
> > > > > > > > these hooks. I think the second option would theoretically result in
> > > > > > > > nicer code, but it might require a bit more work, and you'd have to
> > > > > > > > include the maintainers of the file locking code in the review of such
> > > > > > > > refactoring and have them approve those changes. So if you want to get
> > > > > > > > this patchset into the kernel quickly, the first option might be
> > > > > > > > better for now?)
> > > > > > > >
> > > > > > >
> > > > > > > I agree, let's extend landlock_file_security with a new "fown" pointer
> > > > > > > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > > > > > > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > > > > > > hook_file_free_security().
> > > > > > I think we should add a new hook (hook_file_set_owner()) to initialize
> > > > > > the "fown" pointer and call landlock_get_ruleset() in that?
> > > > >
> > > > > Yeah. Initialize the pointer in the file_set_fowner hook, and read the
> > > > > pointer in the file_send_sigiotask hook.
> > > > >
> > > > > Note that in the file_set_fowner hook, you'll probably need to use
> > > > > both landlock_get_ruleset() (to take a reference on the ruleset you're
> > > > > storing in the fown pointer) and landlock_put_ruleset() (to drop the
> > > > > reference to the ruleset that the fown pointer was pointing to
> > > > > before). And you'll need to use some kind of lock to protect the fown
> > > > > pointer - either by adding an appropriate lock next to your fown
> > > > > pointer or by using some appropriate existing lock in "struct file".
> > > > > Probably it's cleanest to have your own lock for this? (This lock will
> > > > > have to be something like a spinlock, not a mutex, since you need to
> > > > > be able to acquire it in the file_set_fowner hook, which runs inside
> > > > > an RCU read-side critical section, where sleeping is forbidden -
> > > > > acquiring a mutex can sleep and therefore is forbidden in this
> > > > > context, acquiring a spinlock can't sleep.)
> > > >
> > > > Yes, I think this should work for file_set_fowner:
> > > >
> > > > struct landlock_ruleset *prev_dom, *new_dom;
> > > >
> > > > new_dom = landlock_get_current_domain();
> > > > landlock_get_ruleset(new_dom);
> > > >
> > > > /* Cf. f_modown() */
> > > > write_lock_irq(&filp->f_owner.lock);
> > > > prev_dom = rcu_replace_pointer(&landlock_file(file)->fown_domain,
> > > >         new_dom, lockdep_is_held(&filp->f_owner.lock));
> > > > write_unlock_irq(&filp->f_owner.lock);
> > > >
> > > > landlock_put_ruleset_rcu(prev_dom);
> > > >
> > > >
> > > > With landlock_put_ruleset_rcu() define with this:
> > > >
> > > > diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
> > > > index a93bdbf52fff..897116205520 100644
> > > > --- a/security/landlock/ruleset.c
> > > > +++ b/security/landlock/ruleset.c
> > > > @@ -524,6 +524,20 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
> > > >         }
> > > >  }
> > > >
> > > > +static void free_ruleset_rcu(struct rcu_head *const head)
> > > > +{
> > > > +       struct landlock_ruleset *ruleset;
> > > > +
> > > > +       ruleset = container_of(head, struct landlock_ruleset, rcu);
> > > > +       free_ruleset(ruleset);
> > > > +}
> > > 
> > > free_ruleset() can block but RCU callbacks aren't allowed to block,
> > > that's why landlock_put_ruleset_deferred() exists.
> > 
> > Yes, but landlock_put_ruleset_deferred() doesn't wait for RCU read-side
> > critical sections.
> > 
> > > 
> > > > +
> > > > +void landlock_put_ruleset_rcu(struct landlock_ruleset *const ruleset)
> > > > +{
> > > > +       if (ruleset && refcount_dec_and_test(&ruleset->usage))
> > > > +               call_rcu(&ruleset->rcu, free_ruleset_rcu);
> > > > +}
> > > 
> > > No, this pattern of combining refcounting and RCU doesn't work.
> > > 
> > > One legal pattern is:
> > > *The entire object* is subject to RCU; any refcount decrement that
> > > drops the refcount to 0 does call_rcu().
> > > (This is the usual RCU refcounting pattern in the kernel.)
> > > 
> > > Another legal pattern is:
> > > One particular *reference* is subject to RCU; when dropping this
> > > reference, *the refcount decrement is delayed with call_rcu()*.
> > > (This is basically the RCU pattern used for stuff like the reference
> > > from "struct pid" to "struct task_struct".)
> > > 
> > > But you can't use call_rcu() depending on whether the last dropped
> > > reference happened to be a reference that required RCU; what if the
> > > refcount is 2, then you first call landlock_put_ruleset_rcu() which
> > > decrements the refcount to 1, and immediately afterwards you call
> > > landlock_put_ruleset() which drops the refcount to 0 and frees the
> > > object without waiting for an RCU grace period? Like so:
> > > 
> > > thread A         thread B
> > > ========         ========
> > > rcu_read_lock()
> > > ruleset = rcu_dereference(...->fown_domain)
> > >                  ruleset = rcu_replace_pointer(...->fown_domain, new_dom, ...)
> > >                  landlock_put_ruleset_rcu(ruleset)
> > >                  landlock_put_ruleset(ruleset)
> > >                    free_ruleset(ruleset)
> > >                      kfree(ruleset)
> > > access ruleset [UAF]
> > > rcu_read_unlock()
> > 
> > Indeed
> > 
> > > 
> > > So if you want to use RCU lifetime for this, I think you'll have to
> > > turn landlock_put_ruleset() and landlock_put_ruleset_deferred() into
> > > one common function that always, when reaching refcount 0, schedules
> > > an RCU callback which then schedules a work_struct which then does
> > > free_ruleset().
> > > 
> > > I think that would be a little ugly, and it would look nicer to just
> > > use normal locking in the file_send_sigiotask hook?
> > 
> > I don't see how we can do that without delaying the free_ruleset() call
> > to after the RCU read-side critical section in f_setown().
> > 
> > What about calling refcount_dec_and_test() in free_ruleset_rcu()?  That
> > would almost always queue this call but it looks safe.
> 
> That's the second pattern you pointed out just above.  Because we should
> only use the related struct rcu_head for this call site (which is
> protected with f_owner.lock), we should make that explicit with a
> "_fown" suffix for the function (landlock_put_ruleset_rcu_fown) and the
> ruleset's field (rcu_fown).

Hmm, this is not enough because this function site could still be called
with the same ruleset several time, which could lead to memory leak and
potential rcu_head pointers inconsistencies...

> 
> > 
> > An alternative might be to call synchronize_rcu() in free_ruleset(), but
> > it's a big ugly too.
> > 
> > BTW, I don't understand why neither SELinux nor Smack use (explicit)
> > atomic operations nor lock.  And it looks weird that
> > security_file_set_fowner() isn't called by f_modown() with the same
> > locking to avoid races.
Mickaël Salaün Aug. 9, 2024, 1:37 p.m. UTC | #14
On Fri, Aug 09, 2024 at 02:44:06PM +0200, Jann Horn wrote:
> On Fri, Aug 9, 2024 at 12:59 PM Mickaël Salaün <mic@digikod.net> wrote:
> > On Thu, Aug 08, 2024 at 04:42:23PM +0200, Jann Horn wrote:
> > > On Thu, Aug 8, 2024 at 4:09 PM Mickaël Salaün <mic@digikod.net> wrote:
> > > > On Thu, Aug 08, 2024 at 03:10:54AM +0200, Jann Horn wrote:
> > > > > On Thu, Aug 8, 2024 at 1:36 AM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > On Wed, Aug 07, 2024 at 08:16:47PM +0200, Mickaël Salaün wrote:
> > > > > > > On Tue, Aug 06, 2024 at 11:55:27PM +0200, Jann Horn wrote:
> > > > > > > > On Tue, Aug 6, 2024 at 8:56 PM Jann Horn <jannh@google.com> wrote:
> > > > > > > > > On Tue, Aug 6, 2024 at 8:11 PM Tahera Fahimi <fahimitahera@gmail.com> wrote:
> > > > > > > > > > Currently, a sandbox process is not restricted to send a signal
> > > > > > > > > > (e.g. SIGKILL) to a process outside of the sandbox environment.
> > > > > > > > > > Ability to sending a signal for a sandboxed process should be
> > > > > > > > > > scoped the same way abstract unix sockets are scoped. Therefore,
> > > > > > > > > > we extend "scoped" field in a ruleset with
> > > > > > > > > > "LANDLOCK_SCOPED_SIGNAL" to specify that a ruleset will deny
> > > > > > > > > > sending any signal from within a sandbox process to its
> > > > > > > > > > parent(i.e. any parent sandbox or non-sandboxed procsses).
> > > > > > > > [...]
> > > > > > > > > > +       if (is_scoped)
> > > > > > > > > > +               return 0;
> > > > > > > > > > +
> > > > > > > > > > +       return -EPERM;
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > > +static int hook_file_send_sigiotask(struct task_struct *tsk,
> > > > > > > > > > +                                   struct fown_struct *fown, int signum)
> > > > > > >
> > > > > > > I was wondering if we should handle this case, but I guess it makes
> > > > > > > sense to have a consistent policy for all kind of user-triggerable
> > > > > > > signals.
> > > > > > >
> > > > > > > > > > +{
> > > > > > > > > > +       bool is_scoped;
> > > > > > > > > > +       const struct landlock_ruleset *dom, *target_dom;
> > > > > > > > > > +       struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
> > > > > > > > >
> > > > > > > > > I'm not an expert on how the fowner stuff works, but I think this will
> > > > > > > > > probably give you "result = NULL" if the file owner PID has already
> > > > > > > > > exited, and then the following landlock_get_task_domain() would
> > > > > > > > > probably crash? But I'm not entirely sure about how this works.
> > > > > > > > >
> > > > > > > > > I think the intended way to use this hook would be to instead use the
> > > > > > > > > "file_set_fowner" hook to record the owning domain (though the setup
> > > > > > > > > for that is going to be kind of a pain...), see the Smack and SELinux
> > > > > > > > > definitions of that hook. Or alternatively maybe it would be even
> > > > > > > > > nicer to change the fown_struct to record a cred* instead of a uid and
> > > > > > > > > euid and then use the domain from those credentials for this hook...
> > > > > > > > > I'm not sure which of those would be easier.
> > > > > > > >
> > > > > > > > (For what it's worth, I think the first option would probably be
> > > > > > > > easier to implement and ship for now, since you can basically copy
> > > > > > > > what Smack and SELinux are already doing in their implementations of
> > > > > > > > these hooks. I think the second option would theoretically result in
> > > > > > > > nicer code, but it might require a bit more work, and you'd have to
> > > > > > > > include the maintainers of the file locking code in the review of such
> > > > > > > > refactoring and have them approve those changes. So if you want to get
> > > > > > > > this patchset into the kernel quickly, the first option might be
> > > > > > > > better for now?)
> > > > > > > >
> > > > > > >
> > > > > > > I agree, let's extend landlock_file_security with a new "fown" pointer
> > > > > > > to a Landlock domain. We'll need to call landlock_get_ruleset() in
> > > > > > > hook_file_send_sigiotask(), and landlock_put_ruleset() in a new
> > > > > > > hook_file_free_security().
> > > > > > I think we should add a new hook (hook_file_set_owner()) to initialize
> > > > > > the "fown" pointer and call landlock_get_ruleset() in that?
> > > > >
> > > > > Yeah. Initialize the pointer in the file_set_fowner hook, and read the
> > > > > pointer in the file_send_sigiotask hook.
> > > > >
> > > > > Note that in the file_set_fowner hook, you'll probably need to use
> > > > > both landlock_get_ruleset() (to take a reference on the ruleset you're
> > > > > storing in the fown pointer) and landlock_put_ruleset() (to drop the
> > > > > reference to the ruleset that the fown pointer was pointing to
> > > > > before). And you'll need to use some kind of lock to protect the fown
> > > > > pointer - either by adding an appropriate lock next to your fown
> > > > > pointer or by using some appropriate existing lock in "struct file".
> > > > > Probably it's cleanest to have your own lock for this? (This lock will
> > > > > have to be something like a spinlock, not a mutex, since you need to
> > > > > be able to acquire it in the file_set_fowner hook, which runs inside
> > > > > an RCU read-side critical section, where sleeping is forbidden -
> > > > > acquiring a mutex can sleep and therefore is forbidden in this
> > > > > context, acquiring a spinlock can't sleep.)
> > > >
> > > > Yes, I think this should work for file_set_fowner:
> > > >
> > > > struct landlock_ruleset *prev_dom, *new_dom;
> > > >
> > > > new_dom = landlock_get_current_domain();
> > > > landlock_get_ruleset(new_dom);
> > > >
> > > > /* Cf. f_modown() */
> > > > write_lock_irq(&filp->f_owner.lock);
> > > > prev_dom = rcu_replace_pointer(&landlock_file(file)->fown_domain,
> > > >         new_dom, lockdep_is_held(&filp->f_owner.lock));
> > > > write_unlock_irq(&filp->f_owner.lock);
> > > >
> > > > landlock_put_ruleset_rcu(prev_dom);
> > > >
> > > >
> > > > With landlock_put_ruleset_rcu() define with this:
> > > >
> > > > diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
> > > > index a93bdbf52fff..897116205520 100644
> > > > --- a/security/landlock/ruleset.c
> > > > +++ b/security/landlock/ruleset.c
> > > > @@ -524,6 +524,20 @@ void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
> > > >         }
> > > >  }
> > > >
> > > > +static void free_ruleset_rcu(struct rcu_head *const head)
> > > > +{
> > > > +       struct landlock_ruleset *ruleset;
> > > > +
> > > > +       ruleset = container_of(head, struct landlock_ruleset, rcu);
> > > > +       free_ruleset(ruleset);
> > > > +}
> > >
> > > free_ruleset() can block but RCU callbacks aren't allowed to block,
> > > that's why landlock_put_ruleset_deferred() exists.
> >
> > Yes, but landlock_put_ruleset_deferred() doesn't wait for RCU read-side
> > critical sections.
> 
> Ah, I phrased that badly - I didn't mean to suggest that you should
> use landlock_put_ruleset_deferred() as a replacement for call_rcu().
> 
> [...]
> > > So if you want to use RCU lifetime for this, I think you'll have to
> > > turn landlock_put_ruleset() and landlock_put_ruleset_deferred() into
> > > one common function that always, when reaching refcount 0, schedules
> > > an RCU callback which then schedules a work_struct which then does
> > > free_ruleset().
> > >
> > > I think that would be a little ugly, and it would look nicer to just
> > > use normal locking in the file_send_sigiotask hook?
> >
> > I don't see how we can do that without delaying the free_ruleset() call
> > to after the RCU read-side critical section in f_setown().
> 
> It should work if you used landlock_put_ruleset_deferred() instead of
> landlock_put_ruleset().

Calling landlock_put_ruleset_deferred() in hook_file_set_fowner() or
replacing all landlock_put_ruleset() calls?

The deferred work queue is not guarantee to run after all concurrent RCU
read-side critical sections right?  Calling synchronize_rcu() in
free_ruleset_work() should give this guarantee, but it's not nice.  We
could add a boolean in landlock_ruleset to only call synchronize_rcu()
when required (i.e. called from file_set_fowner).

> 
> > What about calling refcount_dec_and_test() in free_ruleset_rcu()?  That
> > would almost always queue this call but it looks safe.
> 
> Every queued RCU invocation needs to have its own rcu_head - I think
> the approach you're suggesting could end up queuing the same rcu_head
> multiple times?

Right

> 
> > An alternative might be to call synchronize_rcu() in free_ruleset(), but
> > it's a big ugly too.
Jann Horn Aug. 9, 2024, 1:57 p.m. UTC | #15
On Fri, Aug 9, 2024 at 3:37 PM Mickaël Salaün <mic@digikod.net> wrote:
> On Fri, Aug 09, 2024 at 02:44:06PM +0200, Jann Horn wrote:
> > On Fri, Aug 9, 2024 at 12:59 PM Mickaël Salaün <mic@digikod.net> wrote:
> > > On Thu, Aug 08, 2024 at 04:42:23PM +0200, Jann Horn wrote:
[...]
> > > > So if you want to use RCU lifetime for this, I think you'll have to
> > > > turn landlock_put_ruleset() and landlock_put_ruleset_deferred() into
> > > > one common function that always, when reaching refcount 0, schedules
> > > > an RCU callback which then schedules a work_struct which then does
> > > > free_ruleset().
> > > >
> > > > I think that would be a little ugly, and it would look nicer to just
> > > > use normal locking in the file_send_sigiotask hook?
> > >
> > > I don't see how we can do that without delaying the free_ruleset() call
> > > to after the RCU read-side critical section in f_setown().
> >
> > It should work if you used landlock_put_ruleset_deferred() instead of
> > landlock_put_ruleset().
>
> Calling landlock_put_ruleset_deferred() in hook_file_set_fowner() or
> replacing all landlock_put_ruleset() calls?

Calling landlock_put_ruleset_deferred() in hook_file_set_fowner().

> The deferred work queue is not guarantee to run after all concurrent RCU
> read-side critical sections right?

Yes, I was talking about my "it would look nicer to just use normal
locking in the file_send_sigiotask hook" suggestion - don't use any
RCU stuff, just use the same lock in file_set_fowner and
file_send_sigiotask.
diff mbox series

Patch

diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h
index ab31e9f53e55..a65fdb507d39 100644
--- a/include/uapi/linux/landlock.h
+++ b/include/uapi/linux/landlock.h
@@ -292,8 +292,11 @@  struct landlock_net_port_attr {
  *   from connecting to an abstract unix socket created by a process
  *   outside the related Landlock domain (e.g. a parent domain or a
  *   non-sandboxed process).
+ * - %LANDLOCK_SCOPED_SIGNAL: Restrict a sandboxed process from sending a signal
+ *   to another process outside sandbox domain.
  */
 /* clang-format off */
 #define LANDLOCK_SCOPED_ABSTRACT_UNIX_SOCKET		(1ULL << 0)
+#define LANDLOCK_SCOPED_SIGNAL		                (1ULL << 1)
 /* clang-format on*/
 #endif /* _UAPI_LINUX_LANDLOCK_H */
diff --git a/security/landlock/limits.h b/security/landlock/limits.h
index eb01d0fb2165..fa28f9236407 100644
--- a/security/landlock/limits.h
+++ b/security/landlock/limits.h
@@ -26,7 +26,7 @@ 
 #define LANDLOCK_MASK_ACCESS_NET	((LANDLOCK_LAST_ACCESS_NET << 1) - 1)
 #define LANDLOCK_NUM_ACCESS_NET		__const_hweight64(LANDLOCK_MASK_ACCESS_NET)
 
-#define LANDLOCK_LAST_SCOPE		LANDLOCK_SCOPED_ABSTRACT_UNIX_SOCKET
+#define LANDLOCK_LAST_SCOPE		LANDLOCK_SCOPED_SIGNAL
 #define LANDLOCK_MASK_SCOPE		((LANDLOCK_LAST_SCOPE << 1) - 1)
 #define LANDLOCK_NUM_SCOPE		__const_hweight64(LANDLOCK_MASK_SCOPE)
 /* clang-format on */
diff --git a/security/landlock/task.c b/security/landlock/task.c
index 7e8579ebae83..a73cff27bb91 100644
--- a/security/landlock/task.c
+++ b/security/landlock/task.c
@@ -261,11 +261,54 @@  static int hook_unix_may_send(struct socket *const sock,
 	return -EPERM;
 }
 
+static int hook_task_kill(struct task_struct *const p,
+			  struct kernel_siginfo *const info, const int sig,
+			  const struct cred *const cred)
+{
+	bool is_scoped;
+	const struct landlock_ruleset *target_dom;
+
+	/* rcu is already locked */
+	target_dom = landlock_get_task_domain(p);
+	if (cred)
+		/* dealing with USB IO */
+		is_scoped = domain_IPC_scope(landlock_cred(cred)->domain,
+					     target_dom,
+					     LANDLOCK_SCOPED_SIGNAL);
+	else
+		is_scoped = domain_IPC_scope(landlock_get_current_domain(),
+					     target_dom,
+					     LANDLOCK_SCOPED_SIGNAL);
+	if (is_scoped)
+		return 0;
+
+	return -EPERM;
+}
+
+static int hook_file_send_sigiotask(struct task_struct *tsk,
+				    struct fown_struct *fown, int signum)
+{
+	bool is_scoped;
+	const struct landlock_ruleset *dom, *target_dom;
+	struct task_struct *result = get_pid_task(fown->pid, fown->pid_type);
+
+	/* rcu is already locked! */
+	dom = landlock_get_task_domain(result);
+	target_dom = landlock_get_task_domain(tsk);
+	is_scoped = domain_IPC_scope(dom, target_dom, LANDLOCK_SCOPED_SIGNAL);
+	put_task_struct(result);
+	if (is_scoped)
+		return 0;
+	return -EPERM;
+}
+
 static struct security_hook_list landlock_hooks[] __ro_after_init = {
 	LSM_HOOK_INIT(ptrace_access_check, hook_ptrace_access_check),
 	LSM_HOOK_INIT(ptrace_traceme, hook_ptrace_traceme),
 	LSM_HOOK_INIT(unix_stream_connect, hook_unix_stream_connect),
 	LSM_HOOK_INIT(unix_may_send, hook_unix_may_send),
+	LSM_HOOK_INIT(task_kill, hook_task_kill),
+	LSM_HOOK_INIT(file_send_sigiotask, hook_file_send_sigiotask),
 };
 
 __init void landlock_add_task_hooks(void)