diff mbox series

[v2] fs: move binfmt_misc sysctl to its own file as built-in

Message ID 20220209225758.476724-1-mcgrof@kernel.org (mailing list archive)
State New, archived
Headers show
Series [v2] fs: move binfmt_misc sysctl to its own file as built-in | expand

Commit Message

Luis Chamberlain Feb. 9, 2022, 10:57 p.m. UTC
This is the second attempt to move binfmt_misc sysctl to its
own file. The issue with the first move was that we moved
the binfmt_misc sysctls to the binfmt_misc module, but the
way things work on some systems is that the binfmt_misc
module will load if the sysctl is present. If we don't force
the sysctl on, the module won't load. The proper thing to do
is to register the sysctl if the module was built or the
binfmt_misc code was built-in, we do this by using the helper
IS_ENABLED() now.

The rationale for the move:

kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
dishes, this makes it very difficult to maintain.

To help with this maintenance let's start by moving sysctls to places
where they actually belong.  The proc sysctl maintainers do not want to
know what sysctl knobs you wish to add for your own piece of code, we
just care about the core logic.

This moves the binfmt_misc sysctl to its own file to help remove clutter
from kernel/sysctl.c.

Cc: Domenico Andreoli <domenico.andreoli@linux.com>
Cc: Tong Zhang <ztong0001@gmail.com>
Reviewed-by: Tong Zhang <ztong0001@gmail.com>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
---

Andrew,

If we get tested-by from Domenico and Tong I think this is ready.

Demenico, Tong, can you please test this patch? Linus' tree
should already have all the prior work reverted as Domenico requested
so this starts fresh.

 fs/file_table.c |  2 ++
 kernel/sysctl.c | 13 -------------
 2 files changed, 2 insertions(+), 13 deletions(-)

Comments

Tong Zhang Feb. 9, 2022, 11:15 p.m. UTC | #1
On Wed, Feb 9, 2022 at 2:58 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
>
> This is the second attempt to move binfmt_misc sysctl to its
> own file. The issue with the first move was that we moved
> the binfmt_misc sysctls to the binfmt_misc module, but the
> way things work on some systems is that the binfmt_misc
> module will load if the sysctl is present. If we don't force
> the sysctl on, the module won't load. The proper thing to do
> is to register the sysctl if the module was built or the
> binfmt_misc code was built-in, we do this by using the helper
> IS_ENABLED() now.
>
> The rationale for the move:
>
> kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
> dishes, this makes it very difficult to maintain.
>
> To help with this maintenance let's start by moving sysctls to places
> where they actually belong.  The proc sysctl maintainers do not want to
> know what sysctl knobs you wish to add for your own piece of code, we
> just care about the core logic.
>
> This moves the binfmt_misc sysctl to its own file to help remove clutter
> from kernel/sysctl.c.
>
> Cc: Domenico Andreoli <domenico.andreoli@linux.com>
> Cc: Tong Zhang <ztong0001@gmail.com>
> Reviewed-by: Tong Zhang <ztong0001@gmail.com>
> Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
> ---
>
> Andrew,
>
> If we get tested-by from Domenico and Tong I think this is ready.
>
> Demenico, Tong, can you please test this patch? Linus' tree
> should already have all the prior work reverted as Domenico requested
> so this starts fresh.
>
>  fs/file_table.c |  2 ++
>  kernel/sysctl.c | 13 -------------
>  2 files changed, 2 insertions(+), 13 deletions(-)
>
> diff --git a/fs/file_table.c b/fs/file_table.c
> index 57edef16dce4..4969021fa676 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -119,6 +119,8 @@ static struct ctl_table fs_stat_sysctls[] = {
>  static int __init init_fs_stat_sysctls(void)
>  {
>         register_sysctl_init("fs", fs_stat_sysctls);
> +       if (IS_ENABLED(CONFIG_BINFMT_MISC))
> +               register_sysctl_mount_point("fs/binfmt_misc");
>         return 0;
>  }
>  fs_initcall(init_fs_stat_sysctls);
> diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> index 241cfc6bc36f..788b9a34d5ab 100644
> --- a/kernel/sysctl.c
> +++ b/kernel/sysctl.c
> @@ -2735,17 +2735,6 @@ static struct ctl_table vm_table[] = {
>         { }
>  };
>
> -static struct ctl_table fs_table[] = {
> -#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
> -       {
> -               .procname       = "binfmt_misc",
> -               .mode           = 0555,
> -               .child          = sysctl_mount_point,
> -       },
> -#endif
> -       { }
> -};
> -
>  static struct ctl_table debug_table[] = {
>  #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
>         {
> @@ -2765,7 +2754,6 @@ static struct ctl_table dev_table[] = {
>
>  DECLARE_SYSCTL_BASE(kernel, kern_table);
>  DECLARE_SYSCTL_BASE(vm, vm_table);
> -DECLARE_SYSCTL_BASE(fs, fs_table);
>  DECLARE_SYSCTL_BASE(debug, debug_table);
>  DECLARE_SYSCTL_BASE(dev, dev_table);
>
> @@ -2773,7 +2761,6 @@ int __init sysctl_init_bases(void)
>  {
>         register_sysctl_base(kernel);
>         register_sysctl_base(vm);
> -       register_sysctl_base(fs);
>         register_sysctl_base(debug);
>         register_sysctl_base(dev);
>
> --
> 2.34.1
>

Hi Luis,
Thanks for posting.
I checked the master branch just now and the fix is already in, see
commit b42bc9a3c511("Fix regression due to "fs: move binfmt_misc
sysctl to its own file"")
I have tested it yesterday on a debian machine and it appears to be ok.
- Tong
Tong Zhang Feb. 9, 2022, 11:18 p.m. UTC | #2
On Wed, Feb 9, 2022 at 3:15 PM Tong Zhang <ztong0001@gmail.com> wrote:
>
> On Wed, Feb 9, 2022 at 2:58 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
> >
> > This is the second attempt to move binfmt_misc sysctl to its
> > own file. The issue with the first move was that we moved
> > the binfmt_misc sysctls to the binfmt_misc module, but the
> > way things work on some systems is that the binfmt_misc
> > module will load if the sysctl is present. If we don't force
> > the sysctl on, the module won't load. The proper thing to do
> > is to register the sysctl if the module was built or the
> > binfmt_misc code was built-in, we do this by using the helper
> > IS_ENABLED() now.
> >
> > The rationale for the move:
> >
> > kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
> > dishes, this makes it very difficult to maintain.
> >
> > To help with this maintenance let's start by moving sysctls to places
> > where they actually belong.  The proc sysctl maintainers do not want to
> > know what sysctl knobs you wish to add for your own piece of code, we
> > just care about the core logic.
> >
> > This moves the binfmt_misc sysctl to its own file to help remove clutter
> > from kernel/sysctl.c.
> >
> > Cc: Domenico Andreoli <domenico.andreoli@linux.com>
> > Cc: Tong Zhang <ztong0001@gmail.com>
> > Reviewed-by: Tong Zhang <ztong0001@gmail.com>
> > Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
> > ---
> >
> > Andrew,
> >
> > If we get tested-by from Domenico and Tong I think this is ready.
> >
> > Demenico, Tong, can you please test this patch? Linus' tree
> > should already have all the prior work reverted as Domenico requested
> > so this starts fresh.
> >
> >  fs/file_table.c |  2 ++
> >  kernel/sysctl.c | 13 -------------
> >  2 files changed, 2 insertions(+), 13 deletions(-)
> >
> > diff --git a/fs/file_table.c b/fs/file_table.c
> > index 57edef16dce4..4969021fa676 100644
> > --- a/fs/file_table.c
> > +++ b/fs/file_table.c
> > @@ -119,6 +119,8 @@ static struct ctl_table fs_stat_sysctls[] = {
> >  static int __init init_fs_stat_sysctls(void)
> >  {
> >         register_sysctl_init("fs", fs_stat_sysctls);
> > +       if (IS_ENABLED(CONFIG_BINFMT_MISC))
> > +               register_sysctl_mount_point("fs/binfmt_misc");
> >         return 0;
> >  }
> >  fs_initcall(init_fs_stat_sysctls);
> > diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> > index 241cfc6bc36f..788b9a34d5ab 100644
> > --- a/kernel/sysctl.c
> > +++ b/kernel/sysctl.c
> > @@ -2735,17 +2735,6 @@ static struct ctl_table vm_table[] = {
> >         { }
> >  };
> >
> > -static struct ctl_table fs_table[] = {
> > -#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
> > -       {
> > -               .procname       = "binfmt_misc",
> > -               .mode           = 0555,
> > -               .child          = sysctl_mount_point,
> > -       },
> > -#endif
> > -       { }
> > -};
> > -
> >  static struct ctl_table debug_table[] = {
> >  #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
> >         {
> > @@ -2765,7 +2754,6 @@ static struct ctl_table dev_table[] = {
> >
> >  DECLARE_SYSCTL_BASE(kernel, kern_table);
> >  DECLARE_SYSCTL_BASE(vm, vm_table);
> > -DECLARE_SYSCTL_BASE(fs, fs_table);
> >  DECLARE_SYSCTL_BASE(debug, debug_table);
> >  DECLARE_SYSCTL_BASE(dev, dev_table);
> >
> > @@ -2773,7 +2761,6 @@ int __init sysctl_init_bases(void)
> >  {
> >         register_sysctl_base(kernel);
> >         register_sysctl_base(vm);
> > -       register_sysctl_base(fs);
> >         register_sysctl_base(debug);
> >         register_sysctl_base(dev);
> >
> > --
> > 2.34.1
> >
>
> Hi Luis,
> Thanks for posting.
> I checked the master branch just now and the fix is already in, see
> commit b42bc9a3c511("Fix regression due to "fs: move binfmt_misc
> sysctl to its own file"")
> I have tested it yesterday on a debian machine and it appears to be ok.
> - Tong

One thing I forget to mention is that since we removed binfmt related
stuff from kernel/sysctl.c
#include<linux/binfmts.h> is not needed anymore and can be removed.

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5ae443b2882e..47e1696e3972 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -59,7 +59,6 @@
 #include <linux/oom.h>
 #include <linux/kmod.h>
 #include <linux/capability.h>
-#include <linux/binfmts.h>
 #include <linux/sched/sysctl.h>
 #include <linux/kexec.h>
 #include <linux/bpf.h>

- Tong
Luis Chamberlain Feb. 9, 2022, 11:29 p.m. UTC | #3
On Wed, Feb 09, 2022 at 03:15:53PM -0800, Tong Zhang wrote:
> On Wed, Feb 9, 2022 at 2:58 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
> >
> > This is the second attempt to move binfmt_misc sysctl to its
> > own file. The issue with the first move was that we moved
> > the binfmt_misc sysctls to the binfmt_misc module, but the
> > way things work on some systems is that the binfmt_misc
> > module will load if the sysctl is present. If we don't force
> > the sysctl on, the module won't load. The proper thing to do
> > is to register the sysctl if the module was built or the
> > binfmt_misc code was built-in, we do this by using the helper
> > IS_ENABLED() now.
> >
> > The rationale for the move:
> >
> > kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
> > dishes, this makes it very difficult to maintain.
> >
> > To help with this maintenance let's start by moving sysctls to places
> > where they actually belong.  The proc sysctl maintainers do not want to
> > know what sysctl knobs you wish to add for your own piece of code, we
> > just care about the core logic.
> >
> > This moves the binfmt_misc sysctl to its own file to help remove clutter
> > from kernel/sysctl.c.
> >
> > Cc: Domenico Andreoli <domenico.andreoli@linux.com>
> > Cc: Tong Zhang <ztong0001@gmail.com>
> > Reviewed-by: Tong Zhang <ztong0001@gmail.com>
> > Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
> > ---
> >
> > Andrew,
> >
> > If we get tested-by from Domenico and Tong I think this is ready.
> >
> > Demenico, Tong, can you please test this patch? Linus' tree
> > should already have all the prior work reverted as Domenico requested
> > so this starts fresh.
> >
> >  fs/file_table.c |  2 ++
> >  kernel/sysctl.c | 13 -------------
> >  2 files changed, 2 insertions(+), 13 deletions(-)
> >
> > diff --git a/fs/file_table.c b/fs/file_table.c
> > index 57edef16dce4..4969021fa676 100644
> > --- a/fs/file_table.c
> > +++ b/fs/file_table.c
> > @@ -119,6 +119,8 @@ static struct ctl_table fs_stat_sysctls[] = {
> >  static int __init init_fs_stat_sysctls(void)
> >  {
> >         register_sysctl_init("fs", fs_stat_sysctls);
> > +       if (IS_ENABLED(CONFIG_BINFMT_MISC))
> > +               register_sysctl_mount_point("fs/binfmt_misc");
> >         return 0;
> >  }
> >  fs_initcall(init_fs_stat_sysctls);
> > diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> > index 241cfc6bc36f..788b9a34d5ab 100644
> > --- a/kernel/sysctl.c
> > +++ b/kernel/sysctl.c
> > @@ -2735,17 +2735,6 @@ static struct ctl_table vm_table[] = {
> >         { }
> >  };
> >
> > -static struct ctl_table fs_table[] = {
> > -#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
> > -       {
> > -               .procname       = "binfmt_misc",
> > -               .mode           = 0555,
> > -               .child          = sysctl_mount_point,
> > -       },
> > -#endif
> > -       { }
> > -};
> > -
> >  static struct ctl_table debug_table[] = {
> >  #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
> >         {
> > @@ -2765,7 +2754,6 @@ static struct ctl_table dev_table[] = {
> >
> >  DECLARE_SYSCTL_BASE(kernel, kern_table);
> >  DECLARE_SYSCTL_BASE(vm, vm_table);
> > -DECLARE_SYSCTL_BASE(fs, fs_table);
> >  DECLARE_SYSCTL_BASE(debug, debug_table);
> >  DECLARE_SYSCTL_BASE(dev, dev_table);
> >
> > @@ -2773,7 +2761,6 @@ int __init sysctl_init_bases(void)
> >  {
> >         register_sysctl_base(kernel);
> >         register_sysctl_base(vm);
> > -       register_sysctl_base(fs);
> >         register_sysctl_base(debug);
> >         register_sysctl_base(dev);
> >
> > --
> > 2.34.1
> >
> 
> Hi Luis,
> Thanks for posting.
> I checked the master branch just now and the fix is already in, see
> commit b42bc9a3c511("Fix regression due to "fs: move binfmt_misc
> sysctl to its own file"")
> I have tested it yesterday on a debian machine and it appears to be ok.

The "fix" was to vert the original effort. This patch continues with the
effort and does it properly. As such it is a change which needs to be
tested. I'd appreciate if you can test.

 Luis
Tong Zhang Feb. 9, 2022, 11:39 p.m. UTC | #4
On Wed, Feb 9, 2022 at 3:29 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
>
> On Wed, Feb 09, 2022 at 03:15:53PM -0800, Tong Zhang wrote:
> > On Wed, Feb 9, 2022 at 2:58 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
> > >
> > > This is the second attempt to move binfmt_misc sysctl to its
> > > own file. The issue with the first move was that we moved
> > > the binfmt_misc sysctls to the binfmt_misc module, but the
> > > way things work on some systems is that the binfmt_misc
> > > module will load if the sysctl is present. If we don't force
> > > the sysctl on, the module won't load. The proper thing to do
> > > is to register the sysctl if the module was built or the
> > > binfmt_misc code was built-in, we do this by using the helper
> > > IS_ENABLED() now.
> > >
> > > The rationale for the move:
> > >
> > > kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
> > > dishes, this makes it very difficult to maintain.
> > >
> > > To help with this maintenance let's start by moving sysctls to places
> > > where they actually belong.  The proc sysctl maintainers do not want to
> > > know what sysctl knobs you wish to add for your own piece of code, we
> > > just care about the core logic.
> > >
> > > This moves the binfmt_misc sysctl to its own file to help remove clutter
> > > from kernel/sysctl.c.
> > >
> > > Cc: Domenico Andreoli <domenico.andreoli@linux.com>
> > > Cc: Tong Zhang <ztong0001@gmail.com>
> > > Reviewed-by: Tong Zhang <ztong0001@gmail.com>
> > > Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
> > > ---
> > >
> > > Andrew,
> > >
> > > If we get tested-by from Domenico and Tong I think this is ready.
> > >
> > > Demenico, Tong, can you please test this patch? Linus' tree
> > > should already have all the prior work reverted as Domenico requested
> > > so this starts fresh.
> > >
> > >  fs/file_table.c |  2 ++
> > >  kernel/sysctl.c | 13 -------------
> > >  2 files changed, 2 insertions(+), 13 deletions(-)
> > >
> > > diff --git a/fs/file_table.c b/fs/file_table.c
> > > index 57edef16dce4..4969021fa676 100644
> > > --- a/fs/file_table.c
> > > +++ b/fs/file_table.c
> > > @@ -119,6 +119,8 @@ static struct ctl_table fs_stat_sysctls[] = {
> > >  static int __init init_fs_stat_sysctls(void)
> > >  {
> > >         register_sysctl_init("fs", fs_stat_sysctls);
> > > +       if (IS_ENABLED(CONFIG_BINFMT_MISC))
> > > +               register_sysctl_mount_point("fs/binfmt_misc");
> > >         return 0;
> > >  }
> > >  fs_initcall(init_fs_stat_sysctls);
> > > diff --git a/kernel/sysctl.c b/kernel/sysctl.c
> > > index 241cfc6bc36f..788b9a34d5ab 100644
> > > --- a/kernel/sysctl.c
> > > +++ b/kernel/sysctl.c
> > > @@ -2735,17 +2735,6 @@ static struct ctl_table vm_table[] = {
> > >         { }
> > >  };
> > >
> > > -static struct ctl_table fs_table[] = {
> > > -#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
> > > -       {
> > > -               .procname       = "binfmt_misc",
> > > -               .mode           = 0555,
> > > -               .child          = sysctl_mount_point,
> > > -       },
> > > -#endif
> > > -       { }
> > > -};
> > > -
> > >  static struct ctl_table debug_table[] = {
> > >  #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
> > >         {
> > > @@ -2765,7 +2754,6 @@ static struct ctl_table dev_table[] = {
> > >
> > >  DECLARE_SYSCTL_BASE(kernel, kern_table);
> > >  DECLARE_SYSCTL_BASE(vm, vm_table);
> > > -DECLARE_SYSCTL_BASE(fs, fs_table);
> > >  DECLARE_SYSCTL_BASE(debug, debug_table);
> > >  DECLARE_SYSCTL_BASE(dev, dev_table);
> > >
> > > @@ -2773,7 +2761,6 @@ int __init sysctl_init_bases(void)
> > >  {
> > >         register_sysctl_base(kernel);
> > >         register_sysctl_base(vm);
> > > -       register_sysctl_base(fs);
> > >         register_sysctl_base(debug);
> > >         register_sysctl_base(dev);
> > >
> > > --
> > > 2.34.1
> > >
> >
> > Hi Luis,
> > Thanks for posting.
> > I checked the master branch just now and the fix is already in, see
> > commit b42bc9a3c511("Fix regression due to "fs: move binfmt_misc
> > sysctl to its own file"")
> > I have tested it yesterday on a debian machine and it appears to be ok.
>
> The "fix" was to vert the original effort. This patch continues with the
> effort and does it properly. As such it is a change which needs to be
> tested. I'd appreciate if you can test.
>
>  Luis

Tested-by: Tong Zhang<ztong0001@gmail.com>
Tong Zhang Feb. 10, 2022, 12:14 a.m. UTC | #5
On Wed, Feb 9, 2022 at 3:39 PM Tong Zhang <ztong0001@gmail.com> wrote:
>
> On Wed, Feb 9, 2022 at 3:29 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
> >
> > On Wed, Feb 09, 2022 at 03:15:53PM -0800, Tong Zhang wrote:
> > > On Wed, Feb 9, 2022 at 2:58 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
> > > >
> > > > This is the second attempt to move binfmt_misc sysctl to its
> > > > own file. The issue with the first move was that we moved
> > > > the binfmt_misc sysctls to the binfmt_misc module, but the
> > > > way things work on some systems is that the binfmt_misc
> > > > module will load if the sysctl is present. If we don't force
> > > > the sysctl on, the module won't load. The proper thing to do
> > > > is to register the sysctl if the module was built or the
> > > > binfmt_misc code was built-in, we do this by using the helper
> > > > IS_ENABLED() now.
> > > >
> > > > The rationale for the move:
> > > >
> > > > kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
> > > > dishes, this makes it very difficult to maintain.
> > > >
> > > > To help with this maintenance let's start by moving sysctls to places
> > > > where they actually belong.  The proc sysctl maintainers do not want to
> > > > know what sysctl knobs you wish to add for your own piece of code, we
> > > > just care about the core logic.
> > > >
> > > > This moves the binfmt_misc sysctl to its own file to help remove clutter
> > > > from kernel/sysctl.c.
> > > >
> > > > Cc: Domenico Andreoli <domenico.andreoli@linux.com>
> > > > Cc: Tong Zhang <ztong0001@gmail.com>
> > > > Reviewed-by: Tong Zhang <ztong0001@gmail.com>
> > > > Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
> > > > ---
> > > >
> > > > Andrew,
> > > >
> > > > If we get tested-by from Domenico and Tong I think this is ready.
> > > >
> > > > Demenico, Tong, can you please test this patch? Linus' tree
> > > > should already have all the prior work reverted as Domenico requested
> > > > so this starts fresh.
> > > >
> > > >  fs/file_table.c |  2 ++
> > > >  kernel/sysctl.c | 13 -------------
> > > >  2 files changed, 2 insertions(+), 13 deletions(-)
> > > >
> > > > diff --git a/fs/file_table.c b/fs/file_table.c
> > > > index 57edef16dce4..4969021fa676 100644
> > > > --- a/fs/file_table.c
> > > > +++ b/fs/file_table.c
> > > > @@ -119,6 +119,8 @@ static struct ctl_table fs_stat_sysctls[] = {
> > > >  static int __init init_fs_stat_sysctls(void)
> > > >  {
> > > >         register_sysctl_init("fs", fs_stat_sysctls);
> > > > +       if (IS_ENABLED(CONFIG_BINFMT_MISC))
> > > > +               register_sysctl_mount_point("fs/binfmt_misc");
                             ^^^^


I'm looking at this code again and we need to mark this return value
in kmemleak to avoid a false positive.


diff --git a/fs/file_table.c b/fs/file_table.c
index 4969021fa676..7303aa33b3fd 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -27,6 +27,7 @@
 #include <linux/task_work.h>
 #include <linux/ima.h>
 #include <linux/swap.h>
+#include <linux/kmemleak.h>

 #include <linux/atomic.h>

@@ -119,8 +120,10 @@ static struct ctl_table fs_stat_sysctls[] = {
 static int __init init_fs_stat_sysctls(void)
 {
        register_sysctl_init("fs", fs_stat_sysctls);
-       if (IS_ENABLED(CONFIG_BINFMT_MISC))
-               register_sysctl_mount_point("fs/binfmt_misc");
+       if (IS_ENABLED(CONFIG_BINFMT_MISC)) {
+               struct ctl_table_header *hdr =
register_sysctl_mount_point("fs/binfmt_misc");
+               kmemleak_not_leak(hdr);
+    }
        return 0;
 }
 fs_initcall(init_fs_stat_sysctls);
Luis Chamberlain Feb. 10, 2022, 12:23 a.m. UTC | #6
On Wed, Feb 09, 2022 at 04:14:08PM -0800, Tong Zhang wrote:
> On Wed, Feb 9, 2022 at 3:39 PM Tong Zhang <ztong0001@gmail.com> wrote:
> >
> > On Wed, Feb 9, 2022 at 3:29 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
> > >
> > > On Wed, Feb 09, 2022 at 03:15:53PM -0800, Tong Zhang wrote:
> > > > On Wed, Feb 9, 2022 at 2:58 PM Luis Chamberlain <mcgrof@kernel.org> wrote:
> > > > >
> > > > > This is the second attempt to move binfmt_misc sysctl to its
> > > > > own file. The issue with the first move was that we moved
> > > > > the binfmt_misc sysctls to the binfmt_misc module, but the
> > > > > way things work on some systems is that the binfmt_misc
> > > > > module will load if the sysctl is present. If we don't force
> > > > > the sysctl on, the module won't load. The proper thing to do
> > > > > is to register the sysctl if the module was built or the
> > > > > binfmt_misc code was built-in, we do this by using the helper
> > > > > IS_ENABLED() now.
> > > > >
> > > > > The rationale for the move:
> > > > >
> > > > > kernel/sysctl.c is a kitchen sink where everyone leaves their dirty
> > > > > dishes, this makes it very difficult to maintain.
> > > > >
> > > > > To help with this maintenance let's start by moving sysctls to places
> > > > > where they actually belong.  The proc sysctl maintainers do not want to
> > > > > know what sysctl knobs you wish to add for your own piece of code, we
> > > > > just care about the core logic.
> > > > >
> > > > > This moves the binfmt_misc sysctl to its own file to help remove clutter
> > > > > from kernel/sysctl.c.
> > > > >
> > > > > Cc: Domenico Andreoli <domenico.andreoli@linux.com>
> > > > > Cc: Tong Zhang <ztong0001@gmail.com>
> > > > > Reviewed-by: Tong Zhang <ztong0001@gmail.com>
> > > > > Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
> > > > > ---
> > > > >
> > > > > Andrew,
> > > > >
> > > > > If we get tested-by from Domenico and Tong I think this is ready.
> > > > >
> > > > > Demenico, Tong, can you please test this patch? Linus' tree
> > > > > should already have all the prior work reverted as Domenico requested
> > > > > so this starts fresh.
> > > > >
> > > > >  fs/file_table.c |  2 ++
> > > > >  kernel/sysctl.c | 13 -------------
> > > > >  2 files changed, 2 insertions(+), 13 deletions(-)
> > > > >
> > > > > diff --git a/fs/file_table.c b/fs/file_table.c
> > > > > index 57edef16dce4..4969021fa676 100644
> > > > > --- a/fs/file_table.c
> > > > > +++ b/fs/file_table.c
> > > > > @@ -119,6 +119,8 @@ static struct ctl_table fs_stat_sysctls[] = {
> > > > >  static int __init init_fs_stat_sysctls(void)
> > > > >  {
> > > > >         register_sysctl_init("fs", fs_stat_sysctls);
> > > > > +       if (IS_ENABLED(CONFIG_BINFMT_MISC))
> > > > > +               register_sysctl_mount_point("fs/binfmt_misc");
>                              ^^^^
> 
> 
> I'm looking at this code again and we need to mark this return value
> in kmemleak to avoid a false positive.
> 
> 
> diff --git a/fs/file_table.c b/fs/file_table.c
> index 4969021fa676..7303aa33b3fd 100644
> --- a/fs/file_table.c
> +++ b/fs/file_table.c
> @@ -27,6 +27,7 @@
>  #include <linux/task_work.h>
>  #include <linux/ima.h>
>  #include <linux/swap.h>
> +#include <linux/kmemleak.h>
> 
>  #include <linux/atomic.h>
> 
> @@ -119,8 +120,10 @@ static struct ctl_table fs_stat_sysctls[] = {
>  static int __init init_fs_stat_sysctls(void)
>  {
>         register_sysctl_init("fs", fs_stat_sysctls);
> -       if (IS_ENABLED(CONFIG_BINFMT_MISC))
> -               register_sysctl_mount_point("fs/binfmt_misc");
> +       if (IS_ENABLED(CONFIG_BINFMT_MISC)) {
> +               struct ctl_table_header *hdr =
> register_sysctl_mount_point("fs/binfmt_misc");
> +               kmemleak_not_leak(hdr);
> +    }

Good catch, will ammend. I'll give it a few days for others to review and test
before a new iteration.

  Luis
diff mbox series

Patch

diff --git a/fs/file_table.c b/fs/file_table.c
index 57edef16dce4..4969021fa676 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -119,6 +119,8 @@  static struct ctl_table fs_stat_sysctls[] = {
 static int __init init_fs_stat_sysctls(void)
 {
 	register_sysctl_init("fs", fs_stat_sysctls);
+	if (IS_ENABLED(CONFIG_BINFMT_MISC))
+		register_sysctl_mount_point("fs/binfmt_misc");
 	return 0;
 }
 fs_initcall(init_fs_stat_sysctls);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 241cfc6bc36f..788b9a34d5ab 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2735,17 +2735,6 @@  static struct ctl_table vm_table[] = {
 	{ }
 };
 
-static struct ctl_table fs_table[] = {
-#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
-	{
-		.procname	= "binfmt_misc",
-		.mode		= 0555,
-		.child		= sysctl_mount_point,
-	},
-#endif
-	{ }
-};
-
 static struct ctl_table debug_table[] = {
 #ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
 	{
@@ -2765,7 +2754,6 @@  static struct ctl_table dev_table[] = {
 
 DECLARE_SYSCTL_BASE(kernel, kern_table);
 DECLARE_SYSCTL_BASE(vm, vm_table);
-DECLARE_SYSCTL_BASE(fs, fs_table);
 DECLARE_SYSCTL_BASE(debug, debug_table);
 DECLARE_SYSCTL_BASE(dev, dev_table);
 
@@ -2773,7 +2761,6 @@  int __init sysctl_init_bases(void)
 {
 	register_sysctl_base(kernel);
 	register_sysctl_base(vm);
-	register_sysctl_base(fs);
 	register_sysctl_base(debug);
 	register_sysctl_base(dev);