diff mbox

net: don't unnecessarily load kernel modules in dev_ioctl()

Message ID 152037526427.18953.14013300464173153064.stgit@chester (mailing list archive)
State Accepted
Headers show

Commit Message

Paul Moore March 6, 2018, 10:27 p.m. UTC
From: Paul Moore <paul@paul-moore.com>

Starting with v4.16-rc1 we've been seeing a higher than usual number
of requests for the kernel to load networking modules, even on events
which shouldn't trigger a module load (e.g. ioctl(TCGETS)).  Stephen
Smalley suggested the problem may lie in commit 44c02a2c3dc5
("dev_ioctl(): move copyin/copyout to callers") which moves changes
the network dev_ioctl() function to always call dev_load(),
regardless of the requested ioctl.

This patch moves the dev_load() calls back into the individual ioctls
while preserving the rest of the original patch.

Reported-by: Dominick Grift <dac.override@gmail.com>
Suggested-by: Stephen Smalley <sds@tycho.nsa.gov>
Signed-off-by: Paul Moore <paul@paul-moore.com>
---
 net/core/dev_ioctl.c |    7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

Comments

Paul Moore March 6, 2018, 10:32 p.m. UTC | #1
On Tue, Mar 6, 2018 at 5:27 PM, Paul Moore <pmoore@redhat.com> wrote:
> From: Paul Moore <paul@paul-moore.com>
>
> Starting with v4.16-rc1 we've been seeing a higher than usual number
> of requests for the kernel to load networking modules, even on events
> which shouldn't trigger a module load (e.g. ioctl(TCGETS)).  Stephen
> Smalley suggested the problem may lie in commit 44c02a2c3dc5
> ("dev_ioctl(): move copyin/copyout to callers") which moves changes
> the network dev_ioctl() function to always call dev_load(),
> regardless of the requested ioctl.
>
> This patch moves the dev_load() calls back into the individual ioctls
> while preserving the rest of the original patch.
>
> Reported-by: Dominick Grift <dac.override@gmail.com>
> Suggested-by: Stephen Smalley <sds@tycho.nsa.gov>
> Signed-off-by: Paul Moore <paul@paul-moore.com>
> ---
>  net/core/dev_ioctl.c |    7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)

In the interest of full disclosure, I've compiled this code but I
haven't booted it yet (test kernel building now).  I just wanted to
post this sooner rather than later in case the networking folks, or
Al, had a different solution they would prefer.

> diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
> index 0ab1af04296c..a04e1e88bf3a 100644
> --- a/net/core/dev_ioctl.c
> +++ b/net/core/dev_ioctl.c
> @@ -402,8 +402,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>         if (colon)
>                 *colon = 0;
>
> -       dev_load(net, ifr->ifr_name);
> -
>         /*
>          *      See which interface the caller is talking about.
>          */
> @@ -423,6 +421,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>         case SIOCGIFMAP:
>         case SIOCGIFINDEX:
>         case SIOCGIFTXQLEN:
> +               dev_load(net, ifr->ifr_name);
>                 rcu_read_lock();
>                 ret = dev_ifsioc_locked(net, ifr, cmd);
>                 rcu_read_unlock();
> @@ -431,6 +430,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>                 return ret;
>
>         case SIOCETHTOOL:
> +               dev_load(net, ifr->ifr_name);
>                 rtnl_lock();
>                 ret = dev_ethtool(net, ifr);
>                 rtnl_unlock();
> @@ -447,6 +447,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>         case SIOCGMIIPHY:
>         case SIOCGMIIREG:
>         case SIOCSIFNAME:
> +               dev_load(net, ifr->ifr_name);
>                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
>                         return -EPERM;
>                 rtnl_lock();
> @@ -494,6 +495,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>                 /* fall through */
>         case SIOCBONDSLAVEINFOQUERY:
>         case SIOCBONDINFOQUERY:
> +               dev_load(net, ifr->ifr_name);
>                 rtnl_lock();
>                 ret = dev_ifsioc(net, ifr, cmd);
>                 rtnl_unlock();
> @@ -518,6 +520,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>                     cmd == SIOCGHWTSTAMP ||
>                     (cmd >= SIOCDEVPRIVATE &&
>                      cmd <= SIOCDEVPRIVATE + 15)) {
> +                       dev_load(net, ifr->ifr_name);
>                         rtnl_lock();
>                         ret = dev_ifsioc(net, ifr, cmd);
>                         rtnl_unlock();
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-security-module" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Stephen Hemminger March 6, 2018, 11:59 p.m. UTC | #2
On Tue, 06 Mar 2018 17:27:44 -0500
Paul Moore <pmoore@redhat.com> wrote:

> From: Paul Moore <paul@paul-moore.com>
> 
> Starting with v4.16-rc1 we've been seeing a higher than usual number
> of requests for the kernel to load networking modules, even on events
> which shouldn't trigger a module load (e.g. ioctl(TCGETS)).  Stephen
> Smalley suggested the problem may lie in commit 44c02a2c3dc5
> ("dev_ioctl(): move copyin/copyout to callers") which moves changes
> the network dev_ioctl() function to always call dev_load(),
> regardless of the requested ioctl.
> 
> This patch moves the dev_load() calls back into the individual ioctls
> while preserving the rest of the original patch.
> 
> Reported-by: Dominick Grift <dac.override@gmail.com>
> Suggested-by: Stephen Smalley <sds@tycho.nsa.gov>
> Signed-off-by: Paul Moore <paul@paul-moore.com>
> ---
>  net/core/dev_ioctl.c |    7 +++++--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
> index 0ab1af04296c..a04e1e88bf3a 100644
> --- a/net/core/dev_ioctl.c
> +++ b/net/core/dev_ioctl.c
> @@ -402,8 +402,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>  	if (colon)
>  		*colon = 0;
>  
> -	dev_load(net, ifr->ifr_name);

Actually dev_load by ethernet name is really a legacy thing that should just die,

It was kept around so that some very tunnel configuration using special names.

	# ifconfig sit0

which probably several web pages still tell users to do...
We have much better control now with ip commands so that this is just
baggage.
Paul Moore March 7, 2018, 1:46 p.m. UTC | #3
On Tue, Mar 6, 2018 at 6:59 PM, Stephen Hemminger
<stephen@networkplumber.org> wrote:
> On Tue, 06 Mar 2018 17:27:44 -0500
> Paul Moore <pmoore@redhat.com> wrote:
>> From: Paul Moore <paul@paul-moore.com>
>>
>> Starting with v4.16-rc1 we've been seeing a higher than usual number
>> of requests for the kernel to load networking modules, even on events
>> which shouldn't trigger a module load (e.g. ioctl(TCGETS)).  Stephen
>> Smalley suggested the problem may lie in commit 44c02a2c3dc5
>> ("dev_ioctl(): move copyin/copyout to callers") which moves changes
>> the network dev_ioctl() function to always call dev_load(),
>> regardless of the requested ioctl.
>>
>> This patch moves the dev_load() calls back into the individual ioctls
>> while preserving the rest of the original patch.
>>
>> Reported-by: Dominick Grift <dac.override@gmail.com>
>> Suggested-by: Stephen Smalley <sds@tycho.nsa.gov>
>> Signed-off-by: Paul Moore <paul@paul-moore.com>
>> ---
>>  net/core/dev_ioctl.c |    7 +++++--
>>  1 file changed, 5 insertions(+), 2 deletions(-)
>>
>> diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
>> index 0ab1af04296c..a04e1e88bf3a 100644
>> --- a/net/core/dev_ioctl.c
>> +++ b/net/core/dev_ioctl.c
>> @@ -402,8 +402,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
>>       if (colon)
>>               *colon = 0;
>>
>> -     dev_load(net, ifr->ifr_name);
>
> Actually dev_load by ethernet name is really a legacy thing that should just die,
>
> It was kept around so that some very tunnel configuration using special names.
>
>         # ifconfig sit0
>
> which probably several web pages still tell users to do...
> We have much better control now with ip commands so that this is just
> baggage.

In an effort to get this regression fixed quickly, and not get tangled
up in a user education issue, can we at least restore the old ioctl()
behavior and worry about removing dev_load() later?
David Miller March 7, 2018, 8:13 p.m. UTC | #4
From: Paul Moore <paul@paul-moore.com>
Date: Tue, 6 Mar 2018 17:32:47 -0500

> On Tue, Mar 6, 2018 at 5:27 PM, Paul Moore <pmoore@redhat.com> wrote:
>> From: Paul Moore <paul@paul-moore.com>
>>
>> Starting with v4.16-rc1 we've been seeing a higher than usual number
>> of requests for the kernel to load networking modules, even on events
>> which shouldn't trigger a module load (e.g. ioctl(TCGETS)).  Stephen
>> Smalley suggested the problem may lie in commit 44c02a2c3dc5
>> ("dev_ioctl(): move copyin/copyout to callers") which moves changes
>> the network dev_ioctl() function to always call dev_load(),
>> regardless of the requested ioctl.
>>
>> This patch moves the dev_load() calls back into the individual ioctls
>> while preserving the rest of the original patch.
>>
>> Reported-by: Dominick Grift <dac.override@gmail.com>
>> Suggested-by: Stephen Smalley <sds@tycho.nsa.gov>
>> Signed-off-by: Paul Moore <paul@paul-moore.com>
>> ---
>>  net/core/dev_ioctl.c |    7 +++++--
>>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> In the interest of full disclosure, I've compiled this code but I
> haven't booted it yet (test kernel building now).  I just wanted to
> post this sooner rather than later in case the networking folks, or
> Al, had a different solution they would prefer.

This is definitely the right fix, so patch applied.

Taking the dev_load() out of that switch statement definitely has
side effects.
David Miller March 8, 2018, 5:34 p.m. UTC | #5
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Tue, 6 Mar 2018 15:59:20 -0800

> Actually dev_load by ethernet name is really a legacy thing that
> should just die,
> 
> It was kept around so that some very tunnel configuration using special names.
> 
> 	# ifconfig sit0
> 
> which probably several web pages still tell users to do...
> We have much better control now with ip commands so that this is just
> baggage.

As you say, some people use this stuff, so we really can't break
it at this point.
Eric Dumazet March 8, 2018, 6:05 p.m. UTC | #6
On 03/08/2018 09:34 AM, David Miller wrote:
> From: Stephen Hemminger <stephen@networkplumber.org>
> Date: Tue, 6 Mar 2018 15:59:20 -0800
> 
>> Actually dev_load by ethernet name is really a legacy thing that
>> should just die,
>>
>> It was kept around so that some very tunnel configuration using special names.
>>
>> 	# ifconfig sit0
>>
>> which probably several web pages still tell users to do...
>> We have much better control now with ip commands so that this is just
>> baggage.
> 
> As you say, some people use this stuff, so we really can't break
> it at this point.

Another problematic legacy behavior is the automatic creation of 
fallback tunnels, which hurts netns creation/deletion.

Some environments want to create a netns for every job/task, and they do 
not care if the init netns has these tunnels or not.

We have a local patch adding yet another knob to control this, since it 
saves a lot of cpu cycles (about 10ms per netns create/delete pair here)

lpk43:~# echo 0 >/proc/sys/net/core/fb_tunnels_only_for_init_net
lpk43:~# time for i in {1..1000}; do unshare -n /bin/false;done

real	0m14.939s
user	0m0.152s
sys	0m1.496s

lpk43:~# unshare -n
lpk43:~# ip link
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN mode DEFAULT group 
default qlen 1000
     link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: tunl0@NONE: <NOARP> mtu 1480 qdisc noop state DOWN mode DEFAULT group 
default qlen 1000
     link/ipip 0.0.0.0 brd 0.0.0.0
3: gre0@NONE: <NOARP> mtu 1476 qdisc noop state DOWN mode DEFAULT group 
default qlen 1000
     link/gre 0.0.0.0 brd 0.0.0.0
4: gretap0@NONE: <BROADCAST,MULTICAST> mtu 1462 qdisc noop state DOWN 
mode DEFAULT group default qlen 1000
     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
5: erspan0@NONE: <BROADCAST,MULTICAST> mtu 1450 qdisc noop state DOWN 
mode DEFAULT group default qlen 1000
     link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
6: sit0@NONE: <NOARP> mtu 1480 qdisc noop state DOWN mode DEFAULT group 
default qlen 1000
     link/sit 0.0.0.0 brd 0.0.0.0
7: ip6tnl0@NONE: <NOARP> mtu 1452 qdisc noop state DOWN mode DEFAULT 
group default qlen 1000
     link/tunnel6 :: brd ::
8: ip6gre0@NONE: <NOARP> mtu 1448 qdisc noop state DOWN mode DEFAULT 
group default qlen 1000
     link/gre6 00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00 brd 
00:00:00:00:00:00:00:00:00:00:00:00:00:00:00:00
lpk43:~# exit
logout

lpk43:~# echo 1 >/proc/sys/net/core/fb_tunnels_only_for_init_net

lpk43:~# time for i in {1..1000}; do unshare -n /bin/false;done

real	0m4.169s
user	0m0.202s
sys	0m0.875s

lpk43:~# unshare -n

lpk43:~# ip link
1: lo: <LOOPBACK> mtu 65536 qdisc noop state DOWN mode DEFAULT group 
default qlen 1000
     link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
lpk43:~#
David Miller March 8, 2018, 6:11 p.m. UTC | #7
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 8 Mar 2018 10:05:12 -0800

> Another problematic legacy behavior is the automatic creation of
> fallback tunnels, which hurts netns creation/deletion.
> 
> Some environments want to create a netns for every job/task, and they
> do not care if the init netns has these tunnels or not.
> 
> We have a local patch adding yet another knob to control this, since
> it saves a lot of cpu cycles (about 10ms per netns create/delete pair
> here)

Yeah, understood.  At small scale the current behavior maybe made
sense, but these days it really doesn't.

No objections to the knob if you want to submit it.
Eric Dumazet March 8, 2018, 8:53 p.m. UTC | #8
On 03/08/2018 10:11 AM, David Miller wrote:

> Yeah, understood.  At small scale the current behavior maybe made
> sense, but these days it really doesn't.
> 
> No objections to the knob if you want to submit it.
> 

Thanks David, I have rebased my patch and sent it.

New numbers on net-next are looking very nice, thanks to Kirill work.
diff mbox

Patch

diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 0ab1af04296c..a04e1e88bf3a 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -402,8 +402,6 @@  int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	if (colon)
 		*colon = 0;
 
-	dev_load(net, ifr->ifr_name);
-
 	/*
 	 *	See which interface the caller is talking about.
 	 */
@@ -423,6 +421,7 @@  int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	case SIOCGIFMAP:
 	case SIOCGIFINDEX:
 	case SIOCGIFTXQLEN:
+		dev_load(net, ifr->ifr_name);
 		rcu_read_lock();
 		ret = dev_ifsioc_locked(net, ifr, cmd);
 		rcu_read_unlock();
@@ -431,6 +430,7 @@  int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 		return ret;
 
 	case SIOCETHTOOL:
+		dev_load(net, ifr->ifr_name);
 		rtnl_lock();
 		ret = dev_ethtool(net, ifr);
 		rtnl_unlock();
@@ -447,6 +447,7 @@  int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSIFNAME:
+		dev_load(net, ifr->ifr_name);
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
 			return -EPERM;
 		rtnl_lock();
@@ -494,6 +495,7 @@  int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 		/* fall through */
 	case SIOCBONDSLAVEINFOQUERY:
 	case SIOCBONDINFOQUERY:
+		dev_load(net, ifr->ifr_name);
 		rtnl_lock();
 		ret = dev_ifsioc(net, ifr, cmd);
 		rtnl_unlock();
@@ -518,6 +520,7 @@  int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c
 		    cmd == SIOCGHWTSTAMP ||
 		    (cmd >= SIOCDEVPRIVATE &&
 		     cmd <= SIOCDEVPRIVATE + 15)) {
+			dev_load(net, ifr->ifr_name);
 			rtnl_lock();
 			ret = dev_ifsioc(net, ifr, cmd);
 			rtnl_unlock();