diff mbox series

[net-next] net: Enable some sysctls for the userns root with privilege

Message ID 20211203032815.339186-1-xu.xin16@zte.com.cn (mailing list archive)
State Superseded
Headers show
Series [net-next] net: Enable some sysctls for the userns root with privilege | expand

Commit Message

CGEL Dec. 3, 2021, 3:28 a.m. UTC
From: xu xin <xu.xin16@zte.com.cn>

Enabled sysctls include the followings: 
1. net/ipv4/neigh/<if>/* 
2. net/ipv6/neigh/<if>/* 
3. net/ieee802154/6lowpan/* 
4. net/ipv6/route/* 
5. net/ipv4/vs/* 
6. net/unix/* 
7. net/core/xfrm_*

In practical work, some userns with root privilege have needs to adjust
these sysctls in their own netns, but limited just because they are not
init user_ns, even if they are given root privilege by docker -privilege.

Reported-by: xu xin <xu.xin16@zte.com.cn>
Tested-by: xu xin <xu.xin16@zte.com.cn>
Signed-off-by: xu xin <xu.xin16@zte.com.cn>
---
 net/core/neighbour.c                | 4 ----
 net/ieee802154/6lowpan/reassembly.c | 4 ----
 net/ipv6/route.c                    | 4 ----
 net/netfilter/ipvs/ip_vs_ctl.c      | 4 ----
 net/netfilter/ipvs/ip_vs_lblc.c     | 4 ----
 net/netfilter/ipvs/ip_vs_lblcr.c    | 3 ---
 net/unix/sysctl_net_unix.c          | 4 ----
 net/xfrm/xfrm_sysctl.c              | 4 ----
 8 files changed, 31 deletions(-)

Comments

Jakub Kicinski Dec. 7, 2021, 12:45 a.m. UTC | #1
On Fri,  3 Dec 2021 03:28:15 +0000 cgel.zte@gmail.com wrote:
> From: xu xin <xu.xin16@zte.com.cn>
> 
> Enabled sysctls include the followings: 
> 1. net/ipv4/neigh/<if>/* 
> 2. net/ipv6/neigh/<if>/* 
> 3. net/ieee802154/6lowpan/* 
> 4. net/ipv6/route/* 
> 5. net/ipv4/vs/* 
> 6. net/unix/* 
> 7. net/core/xfrm_*
> 
> In practical work, some userns with root privilege have needs to adjust
> these sysctls in their own netns, but limited just because they are not
> init user_ns, even if they are given root privilege by docker -privilege.

You need to justify why removing these checks is safe. It sounds like
you're only describing why having the permissions is problematic, which 
is fair but not sufficient to just remove them.

> Reported-by: xu xin <xu.xin16@zte.com.cn>
> Tested-by: xu xin <xu.xin16@zte.com.cn>

These tags are superfluous for the author of the patch.

> Signed-off-by: xu xin <xu.xin16@zte.com.cn>
> ---
>  net/core/neighbour.c                | 4 ----
>  net/ieee802154/6lowpan/reassembly.c | 4 ----
>  net/ipv6/route.c                    | 4 ----
>  net/netfilter/ipvs/ip_vs_ctl.c      | 4 ----
>  net/netfilter/ipvs/ip_vs_lblc.c     | 4 ----
>  net/netfilter/ipvs/ip_vs_lblcr.c    | 3 ---
>  net/unix/sysctl_net_unix.c          | 4 ----
>  net/xfrm/xfrm_sysctl.c              | 4 ----
>  8 files changed, 31 deletions(-)
> 
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index 0cdd4d9ad942..44d90cc341ea 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -3771,10 +3771,6 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
>  			neigh_proc_base_reachable_time;
>  	}
>  
> -	/* Don't export sysctls to unprivileged users */
> -	if (neigh_parms_net(p)->user_ns != &init_user_ns)
> -		t->neigh_vars[0].procname = NULL;
> -
>  	switch (neigh_parms_family(p)) {
>  	case AF_INET:
>  	      p_name = "ipv4";
> diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
> index be6f06adefe0..89cbad6d8368 100644
> --- a/net/ieee802154/6lowpan/reassembly.c
> +++ b/net/ieee802154/6lowpan/reassembly.c
> @@ -366,10 +366,6 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
>  				GFP_KERNEL);
>  		if (table == NULL)
>  			goto err_alloc;
> -
> -		/* Don't export sysctls to unprivileged users */
> -		if (net->user_ns != &init_user_ns)
> -			table[0].procname = NULL;
>  	}
>  
>  	table[0].data	= &ieee802154_lowpan->fqdir->high_thresh;
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index f0d29fcb2094..6a0b15d6500e 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -6409,10 +6409,6 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
>  		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
>  		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
>  		table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
> -
> -		/* Don't export sysctls to unprivileged users */
> -		if (net->user_ns != &init_user_ns)
> -			table[1].procname = NULL;
>  	}
>  
>  	return table;
> diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
> index 7f645328b47f..a77c8abf2fc7 100644
> --- a/net/netfilter/ipvs/ip_vs_ctl.c
> +++ b/net/netfilter/ipvs/ip_vs_ctl.c
> @@ -4040,10 +4040,6 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
>  		tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
>  		if (tbl == NULL)
>  			return -ENOMEM;
> -
> -		/* Don't export sysctls to unprivileged users */
> -		if (net->user_ns != &init_user_ns)
> -			tbl[0].procname = NULL;
>  	} else
>  		tbl = vs_vars;
>  	/* Initialize sysctl defaults */
> diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
> index 7ac7473e3804..567ba33fa5b4 100644
> --- a/net/netfilter/ipvs/ip_vs_lblc.c
> +++ b/net/netfilter/ipvs/ip_vs_lblc.c
> @@ -561,10 +561,6 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
>  		if (ipvs->lblc_ctl_table == NULL)
>  			return -ENOMEM;
>  
> -		/* Don't export sysctls to unprivileged users */
> -		if (net->user_ns != &init_user_ns)
> -			ipvs->lblc_ctl_table[0].procname = NULL;
> -
>  	} else
>  		ipvs->lblc_ctl_table = vs_vars_table;
>  	ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION;
> diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
> index 77c323c36a88..a58440a7bf9e 100644
> --- a/net/netfilter/ipvs/ip_vs_lblcr.c
> +++ b/net/netfilter/ipvs/ip_vs_lblcr.c
> @@ -747,9 +747,6 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
>  		if (ipvs->lblcr_ctl_table == NULL)
>  			return -ENOMEM;
>  
> -		/* Don't export sysctls to unprivileged users */
> -		if (net->user_ns != &init_user_ns)
> -			ipvs->lblcr_ctl_table[0].procname = NULL;
>  	} else
>  		ipvs->lblcr_ctl_table = vs_vars_table;
>  	ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
> diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
> index c09bea89151b..01d44e2598e2 100644
> --- a/net/unix/sysctl_net_unix.c
> +++ b/net/unix/sysctl_net_unix.c
> @@ -30,10 +30,6 @@ int __net_init unix_sysctl_register(struct net *net)
>  	if (table == NULL)
>  		goto err_alloc;
>  
> -	/* Don't export sysctls to unprivileged users */
> -	if (net->user_ns != &init_user_ns)
> -		table[0].procname = NULL;
> -
>  	table[0].data = &net->unx.sysctl_max_dgram_qlen;
>  	net->unx.ctl = register_net_sysctl(net, "net/unix", table);
>  	if (net->unx.ctl == NULL)
> diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
> index 0c6c5ef65f9d..a9b7723eb88f 100644
> --- a/net/xfrm/xfrm_sysctl.c
> +++ b/net/xfrm/xfrm_sysctl.c
> @@ -55,10 +55,6 @@ int __net_init xfrm_sysctl_init(struct net *net)
>  	table[2].data = &net->xfrm.sysctl_larval_drop;
>  	table[3].data = &net->xfrm.sysctl_acq_expires;
>  
> -	/* Don't export sysctls to unprivileged users */
> -	if (net->user_ns != &init_user_ns)
> -		table[0].procname = NULL;
> -
>  	net->xfrm.sysctl_hdr = register_net_sysctl(net, "net/core", table);
>  	if (!net->xfrm.sysctl_hdr)
>  		goto out_register;
CGEL Dec. 7, 2021, 7:18 a.m. UTC | #2
On Mon, Dec 06, 2021 at 04:45:20PM -0800, Jakub Kicinski wrote:
> On Fri,  3 Dec 2021 03:28:15 +0000 cgel.zte@gmail.com wrote:
> > From: xu xin <xu.xin16@zte.com.cn>
> > 
> > Enabled sysctls include the followings: 
> > 1. net/ipv4/neigh/<if>/* 
> > 2. net/ipv6/neigh/<if>/* 
> > 3. net/ieee802154/6lowpan/* 
> > 4. net/ipv6/route/* 
> > 5. net/ipv4/vs/* 
> > 6. net/unix/* 
> > 7. net/core/xfrm_*
> > 
> > In practical work, some userns with root privilege have needs to adjust
> > these sysctls in their own netns, but limited just because they are not
> > init user_ns, even if they are given root privilege by docker -privilege.
> 
> You need to justify why removing these checks is safe. It sounds like
> you're only describing why having the permissions is problematic, which 
> is fair but not sufficient to just remove them.
> 
Hi, Jakub
My patch is a little radical. I just saw Eric's previous reply to
Alexander(https://lore.kernel.org/all/87pmsqyuqy.fsf@disp2133/).
These were disabled because out of an abundance of caution.

My original intention is to enable part of syscyls about neighbor which
I think was safe, but I will try to figure out which of these sysctls
are safe to be enabled.
> > Reported-by: xu xin <xu.xin16@zte.com.cn>
> > Tested-by: xu xin <xu.xin16@zte.com.cn>
> 
> These tags are superfluous for the author of the patch.
> 
Ok. thank you to correct me.
> > Signed-off-by: xu xin <xu.xin16@zte.com.cn>
> > ---
> >  net/core/neighbour.c                | 4 ----
> >  net/ieee802154/6lowpan/reassembly.c | 4 ----
> >  net/ipv6/route.c                    | 4 ----
> >  net/netfilter/ipvs/ip_vs_ctl.c      | 4 ----
> >  net/netfilter/ipvs/ip_vs_lblc.c     | 4 ----
> >  net/netfilter/ipvs/ip_vs_lblcr.c    | 3 ---
> >  net/unix/sysctl_net_unix.c          | 4 ----
> >  net/xfrm/xfrm_sysctl.c              | 4 ----
> >  8 files changed, 31 deletions(-)
Joanne Koong Dec. 7, 2021, 10:16 p.m. UTC | #3
On 12/6/21 11:18 PM, CGEL wrote:

> On Mon, Dec 06, 2021 at 04:45:20PM -0800, Jakub Kicinski wrote:
>> On Fri,  3 Dec 2021 03:28:15 +0000 cgel.zte@gmail.com wrote:
>>> From: xu xin <xu.xin16@zte.com.cn>
>>>
>>> Enabled sysctls include the followings:
>>> 1. net/ipv4/neigh/<if>/*
>>> 2. net/ipv6/neigh/<if>/*
>>> 3. net/ieee802154/6lowpan/*
>>> 4. net/ipv6/route/*
>>> 5. net/ipv4/vs/*
>>> 6. net/unix/*
>>> 7. net/core/xfrm_*
>>>
>>> In practical work, some userns with root privilege have needs to adjust
>>> these sysctls in their own netns, but limited just because they are not
>>> init user_ns, even if they are given root privilege by docker -privilege.
>> You need to justify why removing these checks is safe. It sounds like
>> you're only describing why having the permissions is problematic, which
>> is fair but not sufficient to just remove them.
>>
> Hi, Jakub
> My patch is a little radical. I just saw Eric's previous reply to
> Alexander(https://lore.kernel.org/all/87pmsqyuqy.fsf@disp2133/).
> These were disabled because out of an abundance of caution.
>
> My original intention is to enable part of syscyls about neighbor which
> I think was safe, but I will try to figure out which of these sysctls
> are safe to be enabled.
>

A team at my company has a use case for needing to set the unix sysctls,
so I submitted a patch for enabling the unix sysctl here
https://lore.kernel.org/netdev/20211207202101.2457994-1-joannekoong@fb.com/T/#u

[...]
>>> Signed-off-by: xu xin <xu.xin16@zte.com.cn>
>>> ---
>>>   net/core/neighbour.c                | 4 ----
>>>   net/ieee802154/6lowpan/reassembly.c | 4 ----
>>>   net/ipv6/route.c                    | 4 ----
>>>   net/netfilter/ipvs/ip_vs_ctl.c      | 4 ----
>>>   net/netfilter/ipvs/ip_vs_lblc.c     | 4 ----
>>>   net/netfilter/ipvs/ip_vs_lblcr.c    | 3 ---
>>>   net/unix/sysctl_net_unix.c          | 4 ----
>>>   net/xfrm/xfrm_sysctl.c              | 4 ----
>>>   8 files changed, 31 deletions(-)
>
diff mbox series

Patch

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 0cdd4d9ad942..44d90cc341ea 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3771,10 +3771,6 @@  int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 			neigh_proc_base_reachable_time;
 	}
 
-	/* Don't export sysctls to unprivileged users */
-	if (neigh_parms_net(p)->user_ns != &init_user_ns)
-		t->neigh_vars[0].procname = NULL;
-
 	switch (neigh_parms_family(p)) {
 	case AF_INET:
 	      p_name = "ipv4";
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index be6f06adefe0..89cbad6d8368 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -366,10 +366,6 @@  static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
 				GFP_KERNEL);
 		if (table == NULL)
 			goto err_alloc;
-
-		/* Don't export sysctls to unprivileged users */
-		if (net->user_ns != &init_user_ns)
-			table[0].procname = NULL;
 	}
 
 	table[0].data	= &ieee802154_lowpan->fqdir->high_thresh;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f0d29fcb2094..6a0b15d6500e 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6409,10 +6409,6 @@  struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
 		table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
 		table[9].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
 		table[10].data = &net->ipv6.sysctl.skip_notify_on_dev_down;
-
-		/* Don't export sysctls to unprivileged users */
-		if (net->user_ns != &init_user_ns)
-			table[1].procname = NULL;
 	}
 
 	return table;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 7f645328b47f..a77c8abf2fc7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -4040,10 +4040,6 @@  static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
 		tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
 		if (tbl == NULL)
 			return -ENOMEM;
-
-		/* Don't export sysctls to unprivileged users */
-		if (net->user_ns != &init_user_ns)
-			tbl[0].procname = NULL;
 	} else
 		tbl = vs_vars;
 	/* Initialize sysctl defaults */
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 7ac7473e3804..567ba33fa5b4 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -561,10 +561,6 @@  static int __net_init __ip_vs_lblc_init(struct net *net)
 		if (ipvs->lblc_ctl_table == NULL)
 			return -ENOMEM;
 
-		/* Don't export sysctls to unprivileged users */
-		if (net->user_ns != &init_user_ns)
-			ipvs->lblc_ctl_table[0].procname = NULL;
-
 	} else
 		ipvs->lblc_ctl_table = vs_vars_table;
 	ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION;
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 77c323c36a88..a58440a7bf9e 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -747,9 +747,6 @@  static int __net_init __ip_vs_lblcr_init(struct net *net)
 		if (ipvs->lblcr_ctl_table == NULL)
 			return -ENOMEM;
 
-		/* Don't export sysctls to unprivileged users */
-		if (net->user_ns != &init_user_ns)
-			ipvs->lblcr_ctl_table[0].procname = NULL;
 	} else
 		ipvs->lblcr_ctl_table = vs_vars_table;
 	ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c
index c09bea89151b..01d44e2598e2 100644
--- a/net/unix/sysctl_net_unix.c
+++ b/net/unix/sysctl_net_unix.c
@@ -30,10 +30,6 @@  int __net_init unix_sysctl_register(struct net *net)
 	if (table == NULL)
 		goto err_alloc;
 
-	/* Don't export sysctls to unprivileged users */
-	if (net->user_ns != &init_user_ns)
-		table[0].procname = NULL;
-
 	table[0].data = &net->unx.sysctl_max_dgram_qlen;
 	net->unx.ctl = register_net_sysctl(net, "net/unix", table);
 	if (net->unx.ctl == NULL)
diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c
index 0c6c5ef65f9d..a9b7723eb88f 100644
--- a/net/xfrm/xfrm_sysctl.c
+++ b/net/xfrm/xfrm_sysctl.c
@@ -55,10 +55,6 @@  int __net_init xfrm_sysctl_init(struct net *net)
 	table[2].data = &net->xfrm.sysctl_larval_drop;
 	table[3].data = &net->xfrm.sysctl_acq_expires;
 
-	/* Don't export sysctls to unprivileged users */
-	if (net->user_ns != &init_user_ns)
-		table[0].procname = NULL;
-
 	net->xfrm.sysctl_hdr = register_net_sysctl(net, "net/core", table);
 	if (!net->xfrm.sysctl_hdr)
 		goto out_register;