diff mbox

net: don't unnecessarily load kernel modules in dev_ioctl()

Message ID 152037526427.18953.14013300464173153064.stgit@chester (mailing list archive)
State New, archived
Headers show

Commit Message

Paul Moore March 6, 2018, 10:27 p.m. UTC
From: Paul Moore <paul@paul-moore.com>

Starting with v4.16-rc1 we've been seeing a higher than usual number
of requests for the kernel to load networking modules, even on events
which shouldn't trigger a module load (e.g. ioctl(TCGETS)).  Stephen
Smalley suggested the problem may lie in commit 44c02a2c3dc5
("dev_ioctl(): move copyin/copyout to callers") which moves changes
the network dev_ioctl() function to always call dev_load(),
regardless of the requested ioctl.

This patch moves the dev_load() calls back into the individual ioctls
while preserving the rest of the original patch.

Reported-by: Dominick Grift <dac.override@gmail.com>
Suggested-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 net/core/dev_ioctl.c |    7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-security-module" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Paul Moore March 6, 2018, 10:32 p.m. UTC | #1
On Tue, Mar 6, 2018 at 5:27 PM, Paul Moore <pmoore@redhat.com> wrote:
> From: Paul Moore <paul@paul-moore.com>
>
> Starting with v4.16-rc1 we've been seeing a higher than usual number
> of requests for the kernel to load networking modules, even on events
> which shouldn't trigger a module load (e.g. ioctl(TCGETS)).  Stephen
> Smalley suggested the problem may lie in commit 44c02a2c3dc5
> ("dev_ioctl(): move copyin/copyout to callers") which moves changes
> the network dev_ioctl() function to always call dev_load(),
> regardless of the requested ioctl.
>
> This patch moves the dev_load() calls back into the individual ioctls
> while preserving the rest of the original patch.
>
> Reported-by: Dominick Grift <dac.override@gmail.com>
> Suggested-by: Stephen Smalley <sds@tycho.nsa.gov>
> Signed-off-by: Paul Moore <paul@paul-moore.com>
> ---
>  net/core/dev_ioctl.c |    7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)

In the interest of full disclosure, I've compiled this code but I
haven't booted it yet (test kernel building now).  I just wanted to
post this sooner rather than later in case the networking folks, or
Al, had a different solution they would prefer.

> diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
> index 0ab1af04296c..a04e1e88bf3a 100644
> --- a/net/core/dev_ioctl.c
> +++ b/net/core/dev_ioctl.c
> @@ -402,8 +402,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>         if (colon)
>                 *colon = 0;
>
> -       dev_load(net, ifr->ifr_name);
> -
>         /*
>          *      See which interface the caller is talking about.
>          */
> @@ -423,6 +421,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>         case SIOCGIFMAP:
>         case SIOCGIFINDEX:
>         case SIOCGIFTXQLEN:
> +               dev_load(net, ifr->ifr_name);
>                 rcu_read_lock();
>                 ret = dev_ifsioc_locked(net, ifr, cmd);
>                 rcu_read_unlock();
> @@ -431,6 +430,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>                 return ret;
>
>         case SIOCETHTOOL:
> +               dev_load(net, ifr->ifr_name);
>                 rtnl_lock();
>                 ret = dev_ethtool(net, ifr);
>                 rtnl_unlock();
> @@ -447,6 +447,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>         case SIOCGMIIPHY:
>         case SIOCGMIIREG:
>         case SIOCSIFNAME:
> +               dev_load(net, ifr->ifr_name);
>                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
>                         return -EPERM;
>                 rtnl_lock();
> @@ -494,6 +495,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>                 /* fall through */
>         case SIOCBONDSLAVEINFOQUERY:
>         case SIOCBONDINFOQUERY:
> +               dev_load(net, ifr->ifr_name);
>                 rtnl_lock();
>                 ret = dev_ifsioc(net, ifr, cmd);
>                 rtnl_unlock();
> @@ -518,6 +520,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>                     cmd == SIOCGHWTSTAMP ||
>                     (cmd >= SIOCDEVPRIVATE &&
>                      cmd <= SIOCDEVPRIVATE + 15)) {
> +                       dev_load(net, ifr->ifr_name);
>                         rtnl_lock();
>                         ret = dev_ifsioc(net, ifr, cmd);
>                         rtnl_unlock();
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-security-module" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stephen Hemminger March 6, 2018, 11:59 p.m. UTC | #2
On Tue, 06 Mar 2018 17:27:44 -0500
Paul Moore <pmoore@redhat.com> wrote:

> From: Paul Moore <paul@paul-moore.com>
> 
> Starting with v4.16-rc1 we've been seeing a higher than usual number
> of requests for the kernel to load networking modules, even on events
> which shouldn't trigger a module load (e.g. ioctl(TCGETS)).  Stephen
> Smalley suggested the problem may lie in commit 44c02a2c3dc5
> ("dev_ioctl(): move copyin/copyout to callers") which moves changes
> the network dev_ioctl() function to always call dev_load(),
> regardless of the requested ioctl.
> 
> This patch moves the dev_load() calls back into the individual ioctls
> while preserving the rest of the original patch.
> 
> Reported-by: Dominick Grift <dac.override@gmail.com>
> Suggested-by: Stephen Smalley <sds@tycho.nsa.gov>
> Signed-off-by: Paul Moore <paul@paul-moore.com>
> ---
>  net/core/dev_ioctl.c |    7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
> index 0ab1af04296c..a04e1e88bf3a 100644
> --- a/net/core/dev_ioctl.c
> +++ b/net/core/dev_ioctl.c
> @@ -402,8 +402,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>  	if (colon)
>  		*colon = 0;
>  
> -	dev_load(net, ifr->ifr_name);

Actually dev_load by ethernet name is really a legacy thing that should just die,

It was kept around so that some very tunnel configuration using special names.

	# ifconfig sit0

which probably several web pages still tell users to do...
We have much better control now with ip commands so that this is just
baggage.
--
To unsubscribe from this list: send the line "unsubscribe linux-security-module" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Paul Moore March 7, 2018, 1:46 p.m. UTC | #3
On Tue, Mar 6, 2018 at 6:59 PM, Stephen Hemminger
<stephen@networkplumber.org> wrote:
> On Tue, 06 Mar 2018 17:27:44 -0500
> Paul Moore <pmoore@redhat.com> wrote:
>> From: Paul Moore <paul@paul-moore.com>
>>
>> Starting with v4.16-rc1 we've been seeing a higher than usual number
>> of requests for the kernel to load networking modules, even on events
>> which shouldn't trigger a module load (e.g. ioctl(TCGETS)).  Stephen
>> Smalley suggested the problem may lie in commit 44c02a2c3dc5
>> ("dev_ioctl(): move copyin/copyout to callers") which moves changes
>> the network dev_ioctl() function to always call dev_load(),
>> regardless of the requested ioctl.
>>
>> This patch moves the dev_load() calls back into the individual ioctls
>> while preserving the rest of the original patch.
>>
>> Reported-by: Dominick Grift <dac.override@gmail.com>
>> Suggested-by: Stephen Smalley <sds@tycho.nsa.gov>
>> Signed-off-by: Paul Moore <paul@paul-moore.com>
>> ---
>>  net/core/dev_ioctl.c |    7 +++++--
>>  1 file changed, 5 insertions(+), 2 deletions(-)
>>
>> diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
>> index 0ab1af04296c..a04e1e88bf3a 100644
>> --- a/net/core/dev_ioctl.c
>> +++ b/net/core/dev_ioctl.c
>> @@ -402,8 +402,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>>       if (colon)
>>               *colon = 0;
>>
>> -     dev_load(net, ifr->ifr_name);
>
> Actually dev_load by ethernet name is really a legacy thing that should just die,
>
> It was kept around so that some very tunnel configuration using special names.
>
>         # ifconfig sit0
>
> which probably several web pages still tell users to do...
> We have much better control now with ip commands so that this is just
> baggage.

In an effort to get this regression fixed quickly, and not get tangled
up in a user education issue, can we at least restore the old ioctl()
behavior and worry about removing dev_load() later?
David Miller March 7, 2018, 8:13 p.m. UTC | #4
From: Paul Moore <paul@paul-moore.com>
Date: Tue, 6 Mar 2018 17:32:47 -0500

> On Tue, Mar 6, 2018 at 5:27 PM, Paul Moore <pmoore@redhat.com> wrote:
>> From: Paul Moore <paul@paul-moore.com>
>>
>> Starting with v4.16-rc1 we've been seeing a higher than usual number
>> of requests for the kernel to load networking modules, even on events
>> which shouldn't trigger a module load (e.g. ioctl(TCGETS)).  Stephen
>> Smalley suggested the problem may lie in commit 44c02a2c3dc5
>> ("dev_ioctl(): move copyin/copyout to callers") which moves changes
>> the network dev_ioctl() function to always call dev_load(),
>> regardless of the requested ioctl.
>>
>> This patch moves the dev_load() calls back into the individual ioctls
>> while preserving the rest of the original patch.
>>
>> Reported-by: Dominick Grift <dac.override@gmail.com>
>> Suggested-by: Stephen Smalley <sds@tycho.nsa.gov>
>> Signed-off-by: Paul Moore <paul@paul-moore.com>
>> ---
>>  net/core/dev_ioctl.c |    7 +++++--
>>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> In the interest of full disclosure, I've compiled this code but I
> haven't booted it yet (test kernel building now).  I just wanted to
> post this sooner rather than later in case the networking folks, or
> Al, had a different solution they would prefer.

This is definitely the right fix, so patch applied.

Taking the dev_load() out of that switch statement definitely has
side effects.
--
To unsubscribe from this list: send the line "unsubscribe linux-security-module" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller March 8, 2018, 5:34 p.m. UTC | #5
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Tue, 6 Mar 2018 15:59:20 -0800

> Actually dev_load by ethernet name is really a legacy thing that
> should just die,
> 
> It was kept around so that some very tunnel configuration using special names.
> 
> 	# ifconfig sit0
> 
> which probably several web pages still tell users to do...
> We have much better control now with ip commands so that this is just
> baggage.

As you say, some people use this stuff, so we really can't break
it at this point.
--
To unsubscribe from this list: send the line "unsubscribe linux-security-module" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet March 8, 2018, 6:05 p.m. UTC | #6
On 03/08/2018 09:34 AM, David Miller wrote:
> From: Stephen Hemminger <stephen@networkplumber.org>
> Date: Tue, 6 Mar 2018 15:59:20 -0800
> 
>> Actually dev_load by ethernet name is really a legacy thing that
>> should just die,
>>
>> It was kept around so that some very tunnel configuration using special names.
>>
>> 	# ifconfig sit0
>>
>> which probably several web pages still tell users to do...
>> We have much better control now with ip commands so that this is just
>> baggage.
> 
> As you say, some people use this stuff, so we really can't break
> it at this point.

Another problematic legacy behavior is the automatic creation of 
fallback tunnels, which hurts netns creation/deletion.

Some environments want to create a netns for every job/task, and they do 
not care if the init netns has these tunnels or not.

We have a local patch adding yet another knob to control this, since it 
saves a lot of cpu cycles (about 10ms per netns create/delete pair here)

lpk43:~# echo 0 >/proc/sys/net/core/fb_tunnels_only_for_init_net
lpk43:~# time for i in {1..1000}; do unshare -n /bin/false;done

real	0m14.939s
user	0m0.152s
sys	0m1.496s

lpk43:~# unshare -n
lpk43:~# ip link
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN mode DEFAULT group 
default qlen 1000
     link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: tunl0@NONE: <NOARP> mtu 1480 qdisc noop state DOWN mode DEFAULT group 
default qlen 1000
     link/ipip 0.0.0.0 brd 0.0.0.0
3: gre0@NONE: <NOARP> mtu 1476 qdisc noop state DOWN mode DEFAULT group 
default qlen 1000
     link/gre 0.0.0.0 brd 0.0.0.0
4: gretap0@NONE: <BROADCAST,MULTICAST> mtu 1462 qdisc noop state DOWN 
mode DEFAULT group default qlen 1000
     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
5: erspan0@NONE: <BROADCAST,MULTICAST> mtu 1450 qdisc noop state DOWN 
mode DEFAULT group default qlen 1000
     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
6: sit0@NONE: <NOARP> mtu 1480 qdisc noop state DOWN mode DEFAULT group 
default qlen 1000
     link/sit 0.0.0.0 brd 0.0.0.0
7: ip6tnl0@NONE: <NOARP> mtu 1452 qdisc noop state DOWN mode DEFAULT 
group default qlen 1000
     link/tunnel6 :: brd ::
8: ip6gre0@NONE: <NOARP> mtu 1448 qdisc noop state DOWN mode DEFAULT 
group default qlen 1000
     link/gre6 00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00 brd 
00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00
lpk43:~# exit
logout

lpk43:~# echo 1 >/proc/sys/net/core/fb_tunnels_only_for_init_net

lpk43:~# time for i in {1..1000}; do unshare -n /bin/false;done

real	0m4.169s
user	0m0.202s
sys	0m0.875s

lpk43:~# unshare -n

lpk43:~# ip link
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN mode DEFAULT group 
default qlen 1000
     link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
lpk43:~#





--
To unsubscribe from this list: send the line "unsubscribe linux-security-module" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Miller March 8, 2018, 6:11 p.m. UTC | #7
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 8 Mar 2018 10:05:12 -0800

> Another problematic legacy behavior is the automatic creation of
> fallback tunnels, which hurts netns creation/deletion.
> 
> Some environments want to create a netns for every job/task, and they
> do not care if the init netns has these tunnels or not.
> 
> We have a local patch adding yet another knob to control this, since
> it saves a lot of cpu cycles (about 10ms per netns create/delete pair
> here)

Yeah, understood.  At small scale the current behavior maybe made
sense, but these days it really doesn't.

No objections to the knob if you want to submit it.
--
To unsubscribe from this list: send the line "unsubscribe linux-security-module" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Dumazet March 8, 2018, 8:53 p.m. UTC | #8
On 03/08/2018 10:11 AM, David Miller wrote:

> Yeah, understood.  At small scale the current behavior maybe made
> sense, but these days it really doesn't.
> 
> No objections to the knob if you want to submit it.
> 

Thanks David, I have rebased my patch and sent it.

New numbers on net-next are looking very nice, thanks to Kirill work.
--
To unsubscribe from this list: send the line "unsubscribe linux-security-module" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 0ab1af04296c..a04e1e88bf3a 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -402,8 +402,6 @@  int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	if (colon)
 		*colon = 0;
 
-	dev_load(net, ifr->ifr_name);
-
 	/*
 	 *	See which interface the caller is talking about.
 	 */
@@ -423,6 +421,7 @@  int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	case SIOCGIFMAP:
 	case SIOCGIFINDEX:
 	case SIOCGIFTXQLEN:
+		dev_load(net, ifr->ifr_name);
 		rcu_read_lock();
 		ret = dev_ifsioc_locked(net, ifr, cmd);
 		rcu_read_unlock();
@@ -431,6 +430,7 @@  int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 		return ret;
 
 	case SIOCETHTOOL:
+		dev_load(net, ifr->ifr_name);
 		rtnl_lock();
 		ret = dev_ethtool(net, ifr);
 		rtnl_unlock();
@@ -447,6 +447,7 @@  int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSIFNAME:
+		dev_load(net, ifr->ifr_name);
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			return -EPERM;
 		rtnl_lock();
@@ -494,6 +495,7 @@  int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 		/* fall through */
 	case SIOCBONDSLAVEINFOQUERY:
 	case SIOCBONDINFOQUERY:
+		dev_load(net, ifr->ifr_name);
 		rtnl_lock();
 		ret = dev_ifsioc(net, ifr, cmd);
 		rtnl_unlock();
@@ -518,6 +520,7 @@  int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 		    cmd == SIOCGHWTSTAMP ||
 		    (cmd >= SIOCDEVPRIVATE &&
 		     cmd <= SIOCDEVPRIVATE + 15)) {
+			dev_load(net, ifr->ifr_name);
 			rtnl_lock();
 			ret = dev_ifsioc(net, ifr, cmd);
 			rtnl_unlock();