diff mbox series

[net-next,v2] net: ipvs: add sysctl_run_estimation to support disable estimation

Message ID 20210819045137.35447-1-dust.li@linux.alibaba.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net-next,v2] net: ipvs: add sysctl_run_estimation to support disable estimation | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 11 maintainers not CCed: davem@davemloft.net dsahern@kernel.org pablo@netfilter.org linux-doc@vger.kernel.org yoshfuji@linux-ipv6.org kuba@kernel.org kadlec@netfilter.org coreteam@netfilter.org fw@strlen.de netfilter-devel@vger.kernel.org corbet@lwn.net
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 1 this patch: 1
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: please, no space before tabs
netdev/build_allmodconfig_warn success Errors and warnings before: 1 this patch: 1
netdev/header_inline success Link

Commit Message

Dust Li Aug. 19, 2021, 4:51 a.m. UTC
estimation_timer will iterater the est_list to do estimation
for each ipvs stats. When there are lots of services, the
list can be very large.
We observiced estimation_timer() run for more then 200ms on
a machine with 104 CPU and 50K services.

yunhong-cgl jiang report the same phenomenon before:
https://www.spinics.net/lists/lvs-devel/msg05426.html

In some cases(for example a large K8S cluster with many ipvs services),
ipvs estimation may not be needed. So adding a sysctl blob to allow
users to disable this completely.

Default is: 1 (enable)

Cc: yunhong-cgl jiang <xintian1976@gmail.com>
Signed-off-by: Dust Li <dust.li@linux.alibaba.com>
---
v2: Use common sysctl facilities
---
 Documentation/networking/ipvs-sysctl.rst | 17 +++++++++++++++++
 include/net/ip_vs.h                      | 12 ++++++++++++
 net/netfilter/ipvs/ip_vs_ctl.c           |  8 ++++++++
 net/netfilter/ipvs/ip_vs_est.c           |  5 +++++
 4 files changed, 42 insertions(+)

Comments

kernel test robot Aug. 19, 2021, 8:23 a.m. UTC | #1
Hi Dust,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on net-next/master]

url:    https://github.com/0day-ci/linux/commits/Dust-Li/net-ipvs-add-sysctl_run_estimation-to-support-disable-estimation/20210819-125335
base:   https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git 19b8ece42c56aaa122f7e91eb391bb3dd7e193cd
config: ia64-randconfig-r024-20210818 (attached as .config)
compiler: ia64-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/0day-ci/linux/commit/8f0f8c6b2f04fe397ca8df17353590cdd2f5a414
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Dust-Li/net-ipvs-add-sysctl_run_estimation-to-support-disable-estimation/20210819-125335
        git checkout 8f0f8c6b2f04fe397ca8df17353590cdd2f5a414
        # save the attached .config to linux build tree
        mkdir build_dir
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=ia64 SHELL=/bin/bash net/netfilter/ipvs/

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from arch/ia64/include/asm/pgtable.h:153,
                    from include/linux/pgtable.h:6,
                    from arch/ia64/include/asm/uaccess.h:40,
                    from include/linux/uaccess.h:11,
                    from include/net/checksum.h:21,
                    from include/net/ip_vs.h:23,
                    from net/netfilter/ipvs/ip_vs_lc.c:18:
   arch/ia64/include/asm/mmu_context.h: In function 'reload_context':
   arch/ia64/include/asm/mmu_context.h:127:48: warning: variable 'old_rr4' set but not used [-Wunused-but-set-variable]
     127 |         unsigned long rr0, rr1, rr2, rr3, rr4, old_rr4;
         |                                                ^~~~~~~
   In file included from net/netfilter/ipvs/ip_vs_lc.c:18:
   include/net/ip_vs.h: At top level:
>> include/net/ip_vs.h:1660:19: error: redefinition of 'sysctl_run_estimation'
    1660 | static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
         |                   ^~~~~~~~~~~~~~~~~~~~~
   include/net/ip_vs.h:1075:19: note: previous definition of 'sysctl_run_estimation' with type 'int(struct netns_ipvs *)'
    1075 | static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
         |                   ^~~~~~~~~~~~~~~~~~~~~
--
   In file included from arch/ia64/include/asm/pgtable.h:153,
                    from include/linux/pgtable.h:6,
                    from include/linux/mm.h:33,
                    from include/linux/bvec.h:14,
                    from include/linux/skbuff.h:17,
                    from include/linux/ip.h:16,
                    from net/netfilter/ipvs/ip_vs_core.c:27:
   arch/ia64/include/asm/mmu_context.h: In function 'reload_context':
   arch/ia64/include/asm/mmu_context.h:127:48: warning: variable 'old_rr4' set but not used [-Wunused-but-set-variable]
     127 |         unsigned long rr0, rr1, rr2, rr3, rr4, old_rr4;
         |                                                ^~~~~~~
   In file included from net/netfilter/ipvs/ip_vs_core.c:52:
   include/net/ip_vs.h: At top level:
>> include/net/ip_vs.h:1660:19: error: redefinition of 'sysctl_run_estimation'
    1660 | static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
         |                   ^~~~~~~~~~~~~~~~~~~~~
   include/net/ip_vs.h:1075:19: note: previous definition of 'sysctl_run_estimation' with type 'int(struct netns_ipvs *)'
    1075 | static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
         |                   ^~~~~~~~~~~~~~~~~~~~~
   net/netfilter/ipvs/ip_vs_core.c: In function 'ip_vs_in_icmp':
   net/netfilter/ipvs/ip_vs_core.c:1643:15: warning: variable 'outer_proto' set but not used [-Wunused-but-set-variable]
    1643 |         char *outer_proto = "IPIP";
         |               ^~~~~~~~~~~


vim +/sysctl_run_estimation +1660 include/net/ip_vs.h

  1659	
> 1660	static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
  1661	{
  1662		return 1;
  1663	}
  1664	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Dust Li Aug. 19, 2021, 8:41 a.m. UTC | #2
On Thu, Aug 19, 2021 at 04:23:32PM +0800, kernel test robot wrote:
>Hi Dust,
>
>Thank you for the patch! Yet something to improve:
>
>[auto build test ERROR on net-next/master]

Sorry, my fault !

The sysctl_run_estimation() was put in the wrong place when
CONFIG_SYSCTL not defined.

I will send a v3.

>
>url:    https://github.com/0day-ci/linux/commits/Dust-Li/net-ipvs-add-sysctl_run_estimation-to-support-disable-estimation/20210819-125335
>base:   https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git 19b8ece42c56aaa122f7e91eb391bb3dd7e193cd
>config: ia64-randconfig-r024-20210818 (attached as .config)
>compiler: ia64-linux-gcc (GCC) 11.2.0
>reproduce (this is a W=1 build):
>        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
>        chmod +x ~/bin/make.cross
>        # https://github.com/0day-ci/linux/commit/8f0f8c6b2f04fe397ca8df17353590cdd2f5a414
>        git remote add linux-review https://github.com/0day-ci/linux
>        git fetch --no-tags linux-review Dust-Li/net-ipvs-add-sysctl_run_estimation-to-support-disable-estimation/20210819-125335
>        git checkout 8f0f8c6b2f04fe397ca8df17353590cdd2f5a414
>        # save the attached .config to linux build tree
>        mkdir build_dir
>        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross O=build_dir ARCH=ia64 SHELL=/bin/bash net/netfilter/ipvs/
>
>If you fix the issue, kindly add following tag as appropriate
>Reported-by: kernel test robot <lkp@intel.com>
>
>All errors (new ones prefixed by >>):
>
>   In file included from arch/ia64/include/asm/pgtable.h:153,
>                    from include/linux/pgtable.h:6,
>                    from arch/ia64/include/asm/uaccess.h:40,
>                    from include/linux/uaccess.h:11,
>                    from include/net/checksum.h:21,
>                    from include/net/ip_vs.h:23,
>                    from net/netfilter/ipvs/ip_vs_lc.c:18:
>   arch/ia64/include/asm/mmu_context.h: In function 'reload_context':
>   arch/ia64/include/asm/mmu_context.h:127:48: warning: variable 'old_rr4' set but not used [-Wunused-but-set-variable]
>     127 |         unsigned long rr0, rr1, rr2, rr3, rr4, old_rr4;
>         |                                                ^~~~~~~
>   In file included from net/netfilter/ipvs/ip_vs_lc.c:18:
>   include/net/ip_vs.h: At top level:
>>> include/net/ip_vs.h:1660:19: error: redefinition of 'sysctl_run_estimation'
>    1660 | static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
>         |                   ^~~~~~~~~~~~~~~~~~~~~
>   include/net/ip_vs.h:1075:19: note: previous definition of 'sysctl_run_estimation' with type 'int(struct netns_ipvs *)'
>    1075 | static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
>         |                   ^~~~~~~~~~~~~~~~~~~~~
>--
>   In file included from arch/ia64/include/asm/pgtable.h:153,
>                    from include/linux/pgtable.h:6,
>                    from include/linux/mm.h:33,
>                    from include/linux/bvec.h:14,
>                    from include/linux/skbuff.h:17,
>                    from include/linux/ip.h:16,
>                    from net/netfilter/ipvs/ip_vs_core.c:27:
>   arch/ia64/include/asm/mmu_context.h: In function 'reload_context':
>   arch/ia64/include/asm/mmu_context.h:127:48: warning: variable 'old_rr4' set but not used [-Wunused-but-set-variable]
>     127 |         unsigned long rr0, rr1, rr2, rr3, rr4, old_rr4;
>         |                                                ^~~~~~~
>   In file included from net/netfilter/ipvs/ip_vs_core.c:52:
>   include/net/ip_vs.h: At top level:
>>> include/net/ip_vs.h:1660:19: error: redefinition of 'sysctl_run_estimation'
>    1660 | static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
>         |                   ^~~~~~~~~~~~~~~~~~~~~
>   include/net/ip_vs.h:1075:19: note: previous definition of 'sysctl_run_estimation' with type 'int(struct netns_ipvs *)'
>    1075 | static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
>         |                   ^~~~~~~~~~~~~~~~~~~~~
>   net/netfilter/ipvs/ip_vs_core.c: In function 'ip_vs_in_icmp':
>   net/netfilter/ipvs/ip_vs_core.c:1643:15: warning: variable 'outer_proto' set but not used [-Wunused-but-set-variable]
>    1643 |         char *outer_proto = "IPIP";
>         |               ^~~~~~~~~~~
>
>
>vim +/sysctl_run_estimation +1660 include/net/ip_vs.h
>
>  1659	
>> 1660	static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
>  1661	{
>  1662		return 1;
>  1663	}
>  1664	
>
>---
>0-DAY CI Kernel Test Service, Intel Corporation
>https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
diff mbox series

Patch

diff --git a/Documentation/networking/ipvs-sysctl.rst b/Documentation/networking/ipvs-sysctl.rst
index 2afccc63856e..e20f7a27fc85 100644
--- a/Documentation/networking/ipvs-sysctl.rst
+++ b/Documentation/networking/ipvs-sysctl.rst
@@ -300,3 +300,20 @@  sync_version - INTEGER
 
 	Kernels with this sync_version entry are able to receive messages
 	of both version 1 and version 2 of the synchronisation protocol.
+
+run_estimation - BOOLEAN
+	0 - disabled
+	not 0 - enabled (default)
+
+	If disabled, the estimation will be stop, and you can't see
+	any update on speed estimation data.
+
+	For example
+	'Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s'
+	those data in /proc/net/ip_vs_stats will always be zero.
+	Note, this only affect the speed estimation, the total data
+	will still be updated.
+
+	You can always re-enable estimation by setting this value to 1.
+	But be carefull, the first estimation after re-enable is not
+	accurate.
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 7cb5a1aace40..dba2102ec316 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -931,6 +931,7 @@  struct netns_ipvs {
 	int			sysctl_conn_reuse_mode;
 	int			sysctl_schedule_icmp;
 	int			sysctl_ignore_tunneled;
+	int 			sysctl_run_estimation;
 
 	/* ip_vs_lblc */
 	int			sysctl_lblc_expiration;
@@ -1071,6 +1072,11 @@  static inline int sysctl_cache_bypass(struct netns_ipvs *ipvs)
 	return ipvs->sysctl_cache_bypass;
 }
 
+static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
+{
+	return ipvs->sysctl_run_estimation;
+}
+
 #else
 
 static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -1650,6 +1656,12 @@  static inline int ip_vs_confirm_conntrack(struct sk_buff *skb)
 static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
 {
 }
+
+static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
+{
+	return 1;
+}
+
 #endif /* CONFIG_IP_VS_NFCT */
 
 /* Using old conntrack that can not be redirected to another real server? */
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c25097092a06..cbea5a68afb5 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2017,6 +2017,12 @@  static struct ctl_table vs_vars[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "run_estimation",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #ifdef CONFIG_IP_VS_DEBUG
 	{
 		.procname	= "debug_level",
@@ -4090,6 +4096,8 @@  static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
 	tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
 	tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
 	tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
+	ipvs->sysctl_run_estimation = 1;
+	tbl[idx++].data = &ipvs->sysctl_run_estimation;
 
 	ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
 	if (ipvs->sysctl_hdr == NULL) {
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 05b8112ffb37..9a1a7af6a186 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -100,6 +100,9 @@  static void estimation_timer(struct timer_list *t)
 	u64 rate;
 	struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer);
 
+	if (!sysctl_run_estimation(ipvs))
+		goto skip;
+
 	spin_lock(&ipvs->est_lock);
 	list_for_each_entry(e, &ipvs->est_list, list) {
 		s = container_of(e, struct ip_vs_stats, est);
@@ -131,6 +134,8 @@  static void estimation_timer(struct timer_list *t)
 		spin_unlock(&s->lock);
 	}
 	spin_unlock(&ipvs->est_lock);
+
+skip:
 	mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
 }