diff mbox series

[v4] proc: add config & param to block forcing mem writes

Message ID 20240730132528.1143520-1-adrian.ratiu@collabora.com (mailing list archive)
State New
Headers show
Series [v4] proc: add config & param to block forcing mem writes | expand

Commit Message

Adrian Ratiu July 30, 2024, 1:25 p.m. UTC
This adds a Kconfig option and boot param to allow removing
the FOLL_FORCE flag from /proc/pid/mem write calls because
it can be abused.

The traditional forcing behavior is kept as default because
it can break GDB and some other use cases.

Previously we tried a more sophisticated approach allowing
distributions to fine-tune /proc/pid/mem behavior, however
that got NAK-ed by Linus [1], who prefers this simpler
approach with semantics also easier to understand for users.

Link: https://lore.kernel.org/lkml/CAHk-=wiGWLChxYmUA5HrT5aopZrB7_2VTa0NLZcxORgkUe5tEQ@mail.gmail.com/ [1]
Cc: Doug Anderson <dianders@chromium.org>
Cc: Jeff Xu <jeffxu@google.com>
Cc: Jann Horn <jannh@google.com>
Cc: Kees Cook <kees@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Christian Brauner <brauner@kernel.org>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
---
Changes in v4:
* Fixed doc punctuation, used passive tense, improved
  wording consistency, fixed default value wording
* Made struct constant_table a static const __initconst
* Reworked proc_mem_foll_force() indentation and var
  declarations to make code clearer
* Reworked enum + struct definition so lookup_constant()
  defaults to 'always'.

Changes in v3:
* Simplified code to use shorthand ifs and a
  lookup_constant() table

Changes in v2:
* Added bootparam on top of Linus' patch
* Slightly reworded commit msg
---
 .../admin-guide/kernel-parameters.txt         | 10 ++++
 fs/proc/base.c                                | 54 ++++++++++++++++++-
 security/Kconfig                              | 32 +++++++++++
 3 files changed, 95 insertions(+), 1 deletion(-)

Comments

Jeff Xu July 30, 2024, 11:08 p.m. UTC | #1
On Tue, Jul 30, 2024 at 6:25 AM Adrian Ratiu <adrian.ratiu@collabora.com> wrote:
>
> This adds a Kconfig option and boot param to allow removing
> the FOLL_FORCE flag from /proc/pid/mem write calls because
> it can be abused.
>
> The traditional forcing behavior is kept as default because
> it can break GDB and some other use cases.
>
> Previously we tried a more sophisticated approach allowing
> distributions to fine-tune /proc/pid/mem behavior, however
> that got NAK-ed by Linus [1], who prefers this simpler
> approach with semantics also easier to understand for users.
>
> Link: https://lore.kernel.org/lkml/CAHk-=wiGWLChxYmUA5HrT5aopZrB7_2VTa0NLZcxORgkUe5tEQ@mail.gmail.com/ [1]
> Cc: Doug Anderson <dianders@chromium.org>
> Cc: Jeff Xu <jeffxu@google.com>
> Cc: Jann Horn <jannh@google.com>
> Cc: Kees Cook <kees@kernel.org>
> Cc: Ard Biesheuvel <ardb@kernel.org>
> Cc: Christian Brauner <brauner@kernel.org>
> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
> ---
> Changes in v4:
> * Fixed doc punctuation, used passive tense, improved
>   wording consistency, fixed default value wording
> * Made struct constant_table a static const __initconst
> * Reworked proc_mem_foll_force() indentation and var
>   declarations to make code clearer
> * Reworked enum + struct definition so lookup_constant()
>   defaults to 'always'.
>
> Changes in v3:
> * Simplified code to use shorthand ifs and a
>   lookup_constant() table
>
> Changes in v2:
> * Added bootparam on top of Linus' patch
> * Slightly reworded commit msg
> ---
>  .../admin-guide/kernel-parameters.txt         | 10 ++++
>  fs/proc/base.c                                | 54 ++++++++++++++++++-
>  security/Kconfig                              | 32 +++++++++++
>  3 files changed, 95 insertions(+), 1 deletion(-)
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index f1384c7b59c9..8396e015aab3 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -4788,6 +4788,16 @@
>         printk.time=    Show timing data prefixed to each printk message line
>                         Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
>
> +       proc_mem.force_override= [KNL]
> +                       Format: {always | ptrace | never}
> +                       Traditionally /proc/pid/mem allows memory permissions to be
> +                       overridden without restrictions. This option may be set to
> +                       restrict that. Can be one of:
> +                       - 'always': traditional behavior always allows mem overrides.
> +                       - 'ptrace': only allow mem overrides for active ptracers.
> +                       - 'never':  never allow mem overrides.
> +                       If not specified, default is the CONFIG_PROC_MEM_* choice.
> +
>         processor.max_cstate=   [HW,ACPI]
>                         Limit processor to maximum C-state
>                         max_cstate=9 overrides any DMI blacklist limit.
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index 72a1acd03675..daacb8070042 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -85,6 +85,7 @@
>  #include <linux/elf.h>
>  #include <linux/pid_namespace.h>
>  #include <linux/user_namespace.h>
> +#include <linux/fs_parser.h>
>  #include <linux/fs_struct.h>
>  #include <linux/slab.h>
>  #include <linux/sched/autogroup.h>
> @@ -117,6 +118,35 @@
>  static u8 nlink_tid __ro_after_init;
>  static u8 nlink_tgid __ro_after_init;
>
> +enum proc_mem_force {
> +       PROC_MEM_FORCE_ALWAYS,
> +       PROC_MEM_FORCE_PTRACE,
> +       PROC_MEM_FORCE_NEVER
> +};
> +
> +static enum proc_mem_force proc_mem_force_override __ro_after_init =
> +       IS_ENABLED(CONFIG_PROC_MEM_NO_FORCE) ? PROC_MEM_FORCE_NEVER :
> +       IS_ENABLED(CONFIG_PROC_MEM_FORCE_PTRACE) ? PROC_MEM_FORCE_PTRACE :
> +       PROC_MEM_FORCE_ALWAYS;
> +
> +static const struct constant_table proc_mem_force_table[] __initconst = {
> +       { "never", PROC_MEM_FORCE_NEVER },
> +       { "ptrace", PROC_MEM_FORCE_PTRACE },
> +       { }
> +};
> +
> +static int __init early_proc_mem_force_override(char *buf)
> +{
> +       if (!buf)
> +               return -EINVAL;
> +
> +       proc_mem_force_override = lookup_constant(proc_mem_force_table,
> +                                                 buf, PROC_MEM_FORCE_ALWAYS);
proc_mem_force_table has two entries, this means:
if kernel cmdline has proc_mem.force_override="invalid",
    PROC_MEM_FORCE_ALWAYS will be used.

Another option is to have 3 entries in proc_mem_force_table: adding
{"aways", PROC_MEM_FORCE_ALWAYS}

and let lookup_constant return -1 when not found, and not override
proc_mem_force_override.

This enforces the kernel cmd line must be set to one of three choices
"always|ptrace|never" to be effective.

If you choose this path: please modify kernel-parameters.txt to
"If not specified or invalid, default is the CONFIG_PROC_MEM_* choice."

or else please clarify in the kernel-parameters.text:
If not specified, default is the CONFIG_PROC_MEM_* choice
If invalid str or empty string, PROC_MEM_FORCE_ALWAYS will be used
regardless CONFIG_PROC_MEM_* choice

> +
> +       return 0;
> +}
> +early_param("proc_mem.force_override", early_proc_mem_force_override);
> +
>  struct pid_entry {
>         const char *name;
>         unsigned int len;
> @@ -835,6 +865,26 @@ static int mem_open(struct inode *inode, struct file *file)
>         return ret;
>  }
>
> +static bool proc_mem_foll_force(struct file *file, struct mm_struct *mm)
> +{
> +       struct task_struct *task;
> +       bool ptrace_active = false;
> +
> +       switch (proc_mem_force_override) {
> +       case PROC_MEM_FORCE_NEVER:
> +               return false;
> +       case PROC_MEM_FORCE_PTRACE:
> +               task = get_proc_task(file_inode(file));
> +               if (task) {
> +                       ptrace_active = task->ptrace && task->mm == mm && task->parent == current;
Do we need to call "read_lock(&tasklist_lock);" ?
see comments in ptrace_check_attach() of  kernel/ptrace.c



> +                       put_task_struct(task);
> +               }
> +               return ptrace_active;
> +       default:
> +               return true;
> +       }
> +}
> +
>  static ssize_t mem_rw(struct file *file, char __user *buf,
>                         size_t count, loff_t *ppos, int write)
>  {
> @@ -855,7 +905,9 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
>         if (!mmget_not_zero(mm))
>                 goto free;
>
> -       flags = FOLL_FORCE | (write ? FOLL_WRITE : 0);
> +       flags = write ? FOLL_WRITE : 0;
> +       if (proc_mem_foll_force(file, mm))
> +               flags |= FOLL_FORCE;
>
>         while (count > 0) {
>                 size_t this_len = min_t(size_t, count, PAGE_SIZE);
> diff --git a/security/Kconfig b/security/Kconfig
> index 412e76f1575d..a93c1a9b7c28 100644
> --- a/security/Kconfig
> +++ b/security/Kconfig
> @@ -19,6 +19,38 @@ config SECURITY_DMESG_RESTRICT
>
>           If you are unsure how to answer this question, answer N.
>
> +choice
> +       prompt "Allow /proc/pid/mem access override"
> +       default PROC_MEM_ALWAYS_FORCE
> +       help
> +         Traditionally /proc/pid/mem allows users to override memory
> +         permissions for users like ptrace, assuming they have ptrace
> +         capability.
> +
> +         This allows people to limit that - either never override, or
> +         require actual active ptrace attachment.
> +
> +         Defaults to the traditional behavior (for now)
> +
> +config PROC_MEM_ALWAYS_FORCE
> +       bool "Traditional /proc/pid/mem behavior"
> +       help
> +         This allows /proc/pid/mem accesses to override memory mapping
> +         permissions if you have ptrace access rights.
> +
> +config PROC_MEM_FORCE_PTRACE
> +       bool "Require active ptrace() use for access override"
> +       help
> +         This allows /proc/pid/mem accesses to override memory mapping
> +         permissions for active ptracers like gdb.
> +
> +config PROC_MEM_NO_FORCE
> +       bool "Never"
> +       help
> +         Never override memory mapping permissions
> +
> +endchoice
> +
>  config SECURITY
>         bool "Enable different security models"
>         depends on SYSFS
> --
> 2.44.2
>
Linus Torvalds July 30, 2024, 11:18 p.m. UTC | #2
On Tue, 30 Jul 2024 at 16:09, Jeff Xu <jeffxu@google.com> wrote:
>
> > +               task = get_proc_task(file_inode(file));
> > +               if (task) {
> > +                       ptrace_active = task->ptrace && task->mm == mm && task->parent == current;
>
> Do we need to call "read_lock(&tasklist_lock);" ?
> see comments in ptrace_check_attach() of  kernel/ptrace.c

Well, technically I guess the tasklist_lock should be taken.

Practically speaking, maybe just using READ_ONCE() for these fields
would really be sufficient.

Yes, it could "race" with the task exiting or just detaching, but the
logic would basically be "at one point we were tracing it", and since
this fundamentally a "one-point" situation (with the actual _accesses_
happening later anyway), logically that should be sufficient.

I mean - none of this is about "permissions" per se. We actually did
the proper *permission* check at open() time regardless of all this
code. This is more of a further tightening of the rules (ie it has
gone from "are we allowed to ptrace" to "are we actually actively
ptracing".

I suspect that the main difference between the two situations is
probably (a) one extra step required and (b) whatever extra system
call security things people might have which may disable an actual
ptrace() or whatever..

              Linus
Adrian Ratiu July 31, 2024, 1:15 p.m. UTC | #3
On Wednesday, July 31, 2024 02:18 EEST, Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Tue, 30 Jul 2024 at 16:09, Jeff Xu <jeffxu@google.com> wrote:
> >
> > > +               task = get_proc_task(file_inode(file));
> > > +               if (task) {
> > > +                       ptrace_active = task->ptrace && task->mm == mm && task->parent == current;
> >
> > Do we need to call "read_lock(&tasklist_lock);" ?
> > see comments in ptrace_check_attach() of  kernel/ptrace.c
> 
> Well, technically I guess the tasklist_lock should be taken.
> 
> Practically speaking, maybe just using READ_ONCE() for these fields
> would really be sufficient.
> 
> Yes, it could "race" with the task exiting or just detaching, but the
> logic would basically be "at one point we were tracing it", and since
> this fundamentally a "one-point" situation (with the actual _accesses_
> happening later anyway), logically that should be sufficient.
> 
> I mean - none of this is about "permissions" per se. We actually did
> the proper *permission* check at open() time regardless of all this
> code. This is more of a further tightening of the rules (ie it has
> gone from "are we allowed to ptrace" to "are we actually actively
> ptracing".
> 
> I suspect that the main difference between the two situations is
> probably (a) one extra step required and (b) whatever extra system
> call security things people might have which may disable an actual
> ptrace() or whatever..

Either approach is fine with me.

Will leave v4 a few days longer in case others have a stronger
opinion or to gather & address more feedback.

If no one objects by then, I'll send v5 with READ_ONCE().
Adrian Ratiu July 31, 2024, 1:48 p.m. UTC | #4
On Wednesday, July 31, 2024 02:08 EEST, Jeff Xu <jeffxu@google.com> wrote:

> On Tue, Jul 30, 2024 at 6:25 AM Adrian Ratiu <adrian.ratiu@collabora.com> wrote:
> >
> > This adds a Kconfig option and boot param to allow removing
> > the FOLL_FORCE flag from /proc/pid/mem write calls because
> > it can be abused.
> >
> > The traditional forcing behavior is kept as default because
> > it can break GDB and some other use cases.
> >
> > Previously we tried a more sophisticated approach allowing
> > distributions to fine-tune /proc/pid/mem behavior, however
> > that got NAK-ed by Linus [1], who prefers this simpler
> > approach with semantics also easier to understand for users.
> >
> > Link: https://lore.kernel.org/lkml/CAHk-=wiGWLChxYmUA5HrT5aopZrB7_2VTa0NLZcxORgkUe5tEQ@mail.gmail.com/ [1]
> > Cc: Doug Anderson <dianders@chromium.org>
> > Cc: Jeff Xu <jeffxu@google.com>
> > Cc: Jann Horn <jannh@google.com>
> > Cc: Kees Cook <kees@kernel.org>
> > Cc: Ard Biesheuvel <ardb@kernel.org>
> > Cc: Christian Brauner <brauner@kernel.org>
> > Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
> > Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> > Signed-off-by: Adrian Ratiu <adrian.ratiu@collabora.com>
> > ---
> > Changes in v4:
> > * Fixed doc punctuation, used passive tense, improved
> >   wording consistency, fixed default value wording
> > * Made struct constant_table a static const __initconst
> > * Reworked proc_mem_foll_force() indentation and var
> >   declarations to make code clearer
> > * Reworked enum + struct definition so lookup_constant()
> >   defaults to 'always'.
> >
> > Changes in v3:
> > * Simplified code to use shorthand ifs and a
> >   lookup_constant() table
> >
> > Changes in v2:
> > * Added bootparam on top of Linus' patch
> > * Slightly reworded commit msg
> > ---
> >  .../admin-guide/kernel-parameters.txt         | 10 ++++
> >  fs/proc/base.c                                | 54 ++++++++++++++++++-
> >  security/Kconfig                              | 32 +++++++++++
> >  3 files changed, 95 insertions(+), 1 deletion(-)
> >
> > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> > index f1384c7b59c9..8396e015aab3 100644
> > --- a/Documentation/admin-guide/kernel-parameters.txt
> > +++ b/Documentation/admin-guide/kernel-parameters.txt
> > @@ -4788,6 +4788,16 @@
> >         printk.time=    Show timing data prefixed to each printk message line
> >                         Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
> >
> > +       proc_mem.force_override= [KNL]
> > +                       Format: {always | ptrace | never}
> > +                       Traditionally /proc/pid/mem allows memory permissions to be
> > +                       overridden without restrictions. This option may be set to
> > +                       restrict that. Can be one of:
> > +                       - 'always': traditional behavior always allows mem overrides.
> > +                       - 'ptrace': only allow mem overrides for active ptracers.
> > +                       - 'never':  never allow mem overrides.
> > +                       If not specified, default is the CONFIG_PROC_MEM_* choice.
> > +
> >         processor.max_cstate=   [HW,ACPI]
> >                         Limit processor to maximum C-state
> >                         max_cstate=9 overrides any DMI blacklist limit.
> > diff --git a/fs/proc/base.c b/fs/proc/base.c
> > index 72a1acd03675..daacb8070042 100644
> > --- a/fs/proc/base.c
> > +++ b/fs/proc/base.c
> > @@ -85,6 +85,7 @@
> >  #include <linux/elf.h>
> >  #include <linux/pid_namespace.h>
> >  #include <linux/user_namespace.h>
> > +#include <linux/fs_parser.h>
> >  #include <linux/fs_struct.h>
> >  #include <linux/slab.h>
> >  #include <linux/sched/autogroup.h>
> > @@ -117,6 +118,35 @@
> >  static u8 nlink_tid __ro_after_init;
> >  static u8 nlink_tgid __ro_after_init;
> >
> > +enum proc_mem_force {
> > +       PROC_MEM_FORCE_ALWAYS,
> > +       PROC_MEM_FORCE_PTRACE,
> > +       PROC_MEM_FORCE_NEVER
> > +};
> > +
> > +static enum proc_mem_force proc_mem_force_override __ro_after_init =
> > +       IS_ENABLED(CONFIG_PROC_MEM_NO_FORCE) ? PROC_MEM_FORCE_NEVER :
> > +       IS_ENABLED(CONFIG_PROC_MEM_FORCE_PTRACE) ? PROC_MEM_FORCE_PTRACE :
> > +       PROC_MEM_FORCE_ALWAYS;
> > +
> > +static const struct constant_table proc_mem_force_table[] __initconst = {
> > +       { "never", PROC_MEM_FORCE_NEVER },
> > +       { "ptrace", PROC_MEM_FORCE_PTRACE },
> > +       { }
> > +};
> > +
> > +static int __init early_proc_mem_force_override(char *buf)
> > +{
> > +       if (!buf)
> > +               return -EINVAL;
> > +
> > +       proc_mem_force_override = lookup_constant(proc_mem_force_table,
> > +                                                 buf, PROC_MEM_FORCE_ALWAYS);
> proc_mem_force_table has two entries, this means:
> if kernel cmdline has proc_mem.force_override="invalid",
>     PROC_MEM_FORCE_ALWAYS will be used.
> 
> Another option is to have 3 entries in proc_mem_force_table: adding
> {"aways", PROC_MEM_FORCE_ALWAYS}
> 
> and let lookup_constant return -1 when not found, and not override
> proc_mem_force_override.

Thanks Jeff for spotting this! :)

In addition to adding all the 3 entries as you suggested, I think we
can also do the following:

proc_mem_force_override = lookup_constant(proc_mem_force_table,
                                                 buf, proc_mem_force_override);

This will ensure that if something like "invalid" gets passed, the
proc_mem_force_override value remains unchanged. In other words
it remains equal to the default choice set via Kconfig and correctly
matches the doc description.

I'll address this before sending v5 in a few days to give others time
to review.

> 
> This enforces the kernel cmd line must be set to one of three choices
> "always|ptrace|never" to be effective.
> 
> If you choose this path: please modify kernel-parameters.txt to
> "If not specified or invalid, default is the CONFIG_PROC_MEM_* choice."
> 
> or else please clarify in the kernel-parameters.text:
> If not specified, default is the CONFIG_PROC_MEM_* choice
> If invalid str or empty string, PROC_MEM_FORCE_ALWAYS will be used
> regardless CONFIG_PROC_MEM_* choice
> 
> > +
> > +       return 0;
> > +}
> > +early_param("proc_mem.force_override", early_proc_mem_force_override);
> > +
> >  struct pid_entry {
> >         const char *name;
> >         unsigned int len;
> > @@ -835,6 +865,26 @@ static int mem_open(struct inode *inode, struct file *file)
> >         return ret;
> >  }
> >
> > +static bool proc_mem_foll_force(struct file *file, struct mm_struct *mm)
> > +{
> > +       struct task_struct *task;
> > +       bool ptrace_active = false;
> > +
> > +       switch (proc_mem_force_override) {
> > +       case PROC_MEM_FORCE_NEVER:
> > +               return false;
> > +       case PROC_MEM_FORCE_PTRACE:
> > +               task = get_proc_task(file_inode(file));
> > +               if (task) {
> > +                       ptrace_active = task->ptrace && task->mm == mm && task->parent == current;
> Do we need to call "read_lock(&tasklist_lock);" ?
> see comments in ptrace_check_attach() of  kernel/ptrace.c
> 
> 
> 
> > +                       put_task_struct(task);
> > +               }
> > +               return ptrace_active;
> > +       default:
> > +               return true;
> > +       }
> > +}
> > +
> >  static ssize_t mem_rw(struct file *file, char __user *buf,
> >                         size_t count, loff_t *ppos, int write)
> >  {
> > @@ -855,7 +905,9 @@ static ssize_t mem_rw(struct file *file, char __user *buf,
> >         if (!mmget_not_zero(mm))
> >                 goto free;
> >
> > -       flags = FOLL_FORCE | (write ? FOLL_WRITE : 0);
> > +       flags = write ? FOLL_WRITE : 0;
> > +       if (proc_mem_foll_force(file, mm))
> > +               flags |= FOLL_FORCE;
> >
> >         while (count > 0) {
> >                 size_t this_len = min_t(size_t, count, PAGE_SIZE);
> > diff --git a/security/Kconfig b/security/Kconfig
> > index 412e76f1575d..a93c1a9b7c28 100644
> > --- a/security/Kconfig
> > +++ b/security/Kconfig
> > @@ -19,6 +19,38 @@ config SECURITY_DMESG_RESTRICT
> >
> >           If you are unsure how to answer this question, answer N.
> >
> > +choice
> > +       prompt "Allow /proc/pid/mem access override"
> > +       default PROC_MEM_ALWAYS_FORCE
> > +       help
> > +         Traditionally /proc/pid/mem allows users to override memory
> > +         permissions for users like ptrace, assuming they have ptrace
> > +         capability.
> > +
> > +         This allows people to limit that - either never override, or
> > +         require actual active ptrace attachment.
> > +
> > +         Defaults to the traditional behavior (for now)
> > +
> > +config PROC_MEM_ALWAYS_FORCE
> > +       bool "Traditional /proc/pid/mem behavior"
> > +       help
> > +         This allows /proc/pid/mem accesses to override memory mapping
> > +         permissions if you have ptrace access rights.
> > +
> > +config PROC_MEM_FORCE_PTRACE
> > +       bool "Require active ptrace() use for access override"
> > +       help
> > +         This allows /proc/pid/mem accesses to override memory mapping
> > +         permissions for active ptracers like gdb.
> > +
> > +config PROC_MEM_NO_FORCE
> > +       bool "Never"
> > +       help
> > +         Never override memory mapping permissions
> > +
> > +endchoice
> > +
> >  config SECURITY
> >         bool "Enable different security models"
> >         depends on SYSFS
> > --
> > 2.44.2
> >
Christian Brauner July 31, 2024, 2:39 p.m. UTC | #5
On Wed, Jul 31, 2024 at 02:15:54PM GMT, Adrian Ratiu wrote:
> On Wednesday, July 31, 2024 02:18 EEST, Linus Torvalds <torvalds@linux-foundation.org> wrote:
> 
> > On Tue, 30 Jul 2024 at 16:09, Jeff Xu <jeffxu@google.com> wrote:
> > >
> > > > +               task = get_proc_task(file_inode(file));
> > > > +               if (task) {
> > > > +                       ptrace_active = task->ptrace && task->mm == mm && task->parent == current;
> > >
> > > Do we need to call "read_lock(&tasklist_lock);" ?
> > > see comments in ptrace_check_attach() of  kernel/ptrace.c
> > 
> > Well, technically I guess the tasklist_lock should be taken.
> > 
> > Practically speaking, maybe just using READ_ONCE() for these fields
> > would really be sufficient.
> > 
> > Yes, it could "race" with the task exiting or just detaching, but the
> > logic would basically be "at one point we were tracing it", and since
> > this fundamentally a "one-point" situation (with the actual _accesses_
> > happening later anyway), logically that should be sufficient.
> > 
> > I mean - none of this is about "permissions" per se. We actually did
> > the proper *permission* check at open() time regardless of all this
> > code. This is more of a further tightening of the rules (ie it has
> > gone from "are we allowed to ptrace" to "are we actually actively
> > ptracing".
> > 
> > I suspect that the main difference between the two situations is
> > probably (a) one extra step required and (b) whatever extra system
> > call security things people might have which may disable an actual
> > ptrace() or whatever..
> 
> Either approach is fine with me.
> 
> Will leave v4 a few days longer in case others have a stronger
> opinion or to gather & address more feedback.
> 
> If no one objects by then, I'll send v5 with READ_ONCE().

I'll just change that directly. No need to resend for that thing.
diff mbox series

Patch

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f1384c7b59c9..8396e015aab3 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4788,6 +4788,16 @@ 
 	printk.time=	Show timing data prefixed to each printk message line
 			Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
 
+	proc_mem.force_override= [KNL]
+			Format: {always | ptrace | never}
+			Traditionally /proc/pid/mem allows memory permissions to be
+			overridden without restrictions. This option may be set to
+			restrict that. Can be one of:
+			- 'always': traditional behavior always allows mem overrides.
+			- 'ptrace': only allow mem overrides for active ptracers.
+			- 'never':  never allow mem overrides.
+			If not specified, default is the CONFIG_PROC_MEM_* choice.
+
 	processor.max_cstate=	[HW,ACPI]
 			Limit processor to maximum C-state
 			max_cstate=9 overrides any DMI blacklist limit.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 72a1acd03675..daacb8070042 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -85,6 +85,7 @@ 
 #include <linux/elf.h>
 #include <linux/pid_namespace.h>
 #include <linux/user_namespace.h>
+#include <linux/fs_parser.h>
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
 #include <linux/sched/autogroup.h>
@@ -117,6 +118,35 @@ 
 static u8 nlink_tid __ro_after_init;
 static u8 nlink_tgid __ro_after_init;
 
+enum proc_mem_force {
+	PROC_MEM_FORCE_ALWAYS,
+	PROC_MEM_FORCE_PTRACE,
+	PROC_MEM_FORCE_NEVER
+};
+
+static enum proc_mem_force proc_mem_force_override __ro_after_init =
+	IS_ENABLED(CONFIG_PROC_MEM_NO_FORCE) ? PROC_MEM_FORCE_NEVER :
+	IS_ENABLED(CONFIG_PROC_MEM_FORCE_PTRACE) ? PROC_MEM_FORCE_PTRACE :
+	PROC_MEM_FORCE_ALWAYS;
+
+static const struct constant_table proc_mem_force_table[] __initconst = {
+	{ "never", PROC_MEM_FORCE_NEVER },
+	{ "ptrace", PROC_MEM_FORCE_PTRACE },
+	{ }
+};
+
+static int __init early_proc_mem_force_override(char *buf)
+{
+	if (!buf)
+		return -EINVAL;
+
+	proc_mem_force_override = lookup_constant(proc_mem_force_table,
+						  buf, PROC_MEM_FORCE_ALWAYS);
+
+	return 0;
+}
+early_param("proc_mem.force_override", early_proc_mem_force_override);
+
 struct pid_entry {
 	const char *name;
 	unsigned int len;
@@ -835,6 +865,26 @@  static int mem_open(struct inode *inode, struct file *file)
 	return ret;
 }
 
+static bool proc_mem_foll_force(struct file *file, struct mm_struct *mm)
+{
+	struct task_struct *task;
+	bool ptrace_active = false;
+
+	switch (proc_mem_force_override) {
+	case PROC_MEM_FORCE_NEVER:
+		return false;
+	case PROC_MEM_FORCE_PTRACE:
+		task = get_proc_task(file_inode(file));
+		if (task) {
+			ptrace_active = task->ptrace && task->mm == mm && task->parent == current;
+			put_task_struct(task);
+		}
+		return ptrace_active;
+	default:
+		return true;
+	}
+}
+
 static ssize_t mem_rw(struct file *file, char __user *buf,
 			size_t count, loff_t *ppos, int write)
 {
@@ -855,7 +905,9 @@  static ssize_t mem_rw(struct file *file, char __user *buf,
 	if (!mmget_not_zero(mm))
 		goto free;
 
-	flags = FOLL_FORCE | (write ? FOLL_WRITE : 0);
+	flags = write ? FOLL_WRITE : 0;
+	if (proc_mem_foll_force(file, mm))
+		flags |= FOLL_FORCE;
 
 	while (count > 0) {
 		size_t this_len = min_t(size_t, count, PAGE_SIZE);
diff --git a/security/Kconfig b/security/Kconfig
index 412e76f1575d..a93c1a9b7c28 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -19,6 +19,38 @@  config SECURITY_DMESG_RESTRICT
 
 	  If you are unsure how to answer this question, answer N.
 
+choice
+	prompt "Allow /proc/pid/mem access override"
+	default PROC_MEM_ALWAYS_FORCE
+	help
+	  Traditionally /proc/pid/mem allows users to override memory
+	  permissions for users like ptrace, assuming they have ptrace
+	  capability.
+
+	  This allows people to limit that - either never override, or
+	  require actual active ptrace attachment.
+
+	  Defaults to the traditional behavior (for now)
+
+config PROC_MEM_ALWAYS_FORCE
+	bool "Traditional /proc/pid/mem behavior"
+	help
+	  This allows /proc/pid/mem accesses to override memory mapping
+	  permissions if you have ptrace access rights.
+
+config PROC_MEM_FORCE_PTRACE
+	bool "Require active ptrace() use for access override"
+	help
+	  This allows /proc/pid/mem accesses to override memory mapping
+	  permissions for active ptracers like gdb.
+
+config PROC_MEM_NO_FORCE
+	bool "Never"
+	help
+	  Never override memory mapping permissions
+
+endchoice
+
 config SECURITY
 	bool "Enable different security models"
 	depends on SYSFS