diff mbox series

[v2,1/3] ipv4: Namespaceify route/error_cost knob

Message ID 20220824020343.213715-1-xu.xin16@zte.com.cn (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Namespaceify two sysctls related with route | expand

Checks

Context Check Description
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 452443 this patch: 452443
netdev/cc_maintainers warning 2 maintainers not CCed: edumazet@google.com pabeni@redhat.com
netdev/build_clang success Errors and warnings before: 1115 this patch: 1115
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 473745 this patch: 473745
netdev/checkpatch warning CHECK: From:/Signed-off-by: email comments mismatch: 'From: xu xin <xu.xin16@zte.com.cn>' != 'Signed-off-by: xu xin (CGEL ZTE) <xu.xin16@zte.com.cn>'
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

CGEL Aug. 24, 2022, 2:03 a.m. UTC
From: xu xin <xu.xin16@zte.com.cn>

Different netns has different requirement on the setting of error_cost
sysctl which is used to limit the max frequency of sending
ICMP_DEST_UNREACH packet together with error_burst. To put it simply,
it refers to the minimum time interval between two consecutive
ICMP_DEST_UNREACHABLE packets sent to the same peer when now is
icmp-stable period not the burst case after a long calm time.

Enable error_cost to be configured per network namespace.

Signed-off-by: xu xin (CGEL ZTE) <xu.xin16@zte.com.cn>
Reviewed-by: Yunkai Zhang (CGEL ZTE) <zhang.yunkai@zte.com.cn>
---
 include/net/netns/ipv4.h |  1 +
 net/ipv4/route.c         | 23 +++++++++++++----------
 2 files changed, 14 insertions(+), 10 deletions(-)

Comments

David Ahern Aug. 25, 2022, 3:23 p.m. UTC | #1
On 8/23/22 7:03 PM, cgel.zte@gmail.com wrote:
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index 795cbe1de912..b022ae749640 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -118,7 +118,6 @@ static int ip_rt_max_size;
>  static int ip_rt_redirect_number __read_mostly	= 9;
>  static int ip_rt_redirect_load __read_mostly	= HZ / 50;
>  static int ip_rt_redirect_silence __read_mostly	= ((HZ / 50) << (9 + 1));
> -static int ip_rt_error_cost __read_mostly	= HZ;
>  static int ip_rt_error_burst __read_mostly	= 5 * HZ;
>  
>  static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT;
> @@ -949,6 +948,7 @@ static int ip_error(struct sk_buff *skb)
>  	SKB_DR(reason);
>  	bool send;
>  	int code;
> +	int error_cost;

can be moved to below where it is needed
>  
>  	if (netif_is_l3_master(skb->dev)) {
>  		dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
> @@ -1002,11 +1002,13 @@ static int ip_error(struct sk_buff *skb)
>  	if (peer) {

to here and then name it ip_rt_error_cost and you don't need to
		int ip_rt_error_cost = READ_ONCE(net->ipv4.ip_rt_error_cost);

make changes to the algorithm.

Also, why not ip_rt_error_burst as well? part of the same algorithm.
David Ahern Aug. 25, 2022, 3:24 p.m. UTC | #2
On 8/25/22 8:23 AM, David Ahern wrote:
> Also, why not ip_rt_error_burst as well? part of the same algorithm.
> 

nevermind. that is patch 2.
CGEL Aug. 30, 2022, 9:47 a.m. UTC | #3
On Thu, Aug 25, 2022 at 08:23:56AM -0700, David Ahern wrote:
> On 8/23/22 7:03 PM, cgel.zte@gmail.com wrote:
> > diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> > index 795cbe1de912..b022ae749640 100644
> > --- a/net/ipv4/route.c
> > +++ b/net/ipv4/route.c
> > @@ -118,7 +118,6 @@ static int ip_rt_max_size;
> >  static int ip_rt_redirect_number __read_mostly	= 9;
> >  static int ip_rt_redirect_load __read_mostly	= HZ / 50;
> >  static int ip_rt_redirect_silence __read_mostly	= ((HZ / 50) << (9 + 1));
> > -static int ip_rt_error_cost __read_mostly	= HZ;
> >  static int ip_rt_error_burst __read_mostly	= 5 * HZ;
> >  
> >  static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT;
> > @@ -949,6 +948,7 @@ static int ip_error(struct sk_buff *skb)
> >  	SKB_DR(reason);
> >  	bool send;
> >  	int code;
> > +	int error_cost;
> 
> can be moved to below where it is needed
> >  
> >  	if (netif_is_l3_master(skb->dev)) {
> >  		dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
> > @@ -1002,11 +1002,13 @@ static int ip_error(struct sk_buff *skb)
> >  	if (peer) {
> 
> to here and then name it ip_rt_error_cost and you don't need to
> 		int ip_rt_error_cost = READ_ONCE(net->ipv4.ip_rt_error_cost);
> 
> make changes to the algorithm.

Yes, done.

> 
> Also, why not ip_rt_error_burst as well? part of the same algorithm.

done.
diff mbox series

Patch

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index c7320ef356d9..319395bbad3c 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -85,6 +85,7 @@  struct netns_ipv4 {
 	u32 ip_rt_min_pmtu;
 	int ip_rt_mtu_expires;
 	int ip_rt_min_advmss;
+	int ip_rt_error_cost;
 
 	struct local_ports ip_local_ports;
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 795cbe1de912..b022ae749640 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -118,7 +118,6 @@  static int ip_rt_max_size;
 static int ip_rt_redirect_number __read_mostly	= 9;
 static int ip_rt_redirect_load __read_mostly	= HZ / 50;
 static int ip_rt_redirect_silence __read_mostly	= ((HZ / 50) << (9 + 1));
-static int ip_rt_error_cost __read_mostly	= HZ;
 static int ip_rt_error_burst __read_mostly	= 5 * HZ;
 
 static int ip_rt_gc_timeout __read_mostly	= RT_GC_TIMEOUT;
@@ -949,6 +948,7 @@  static int ip_error(struct sk_buff *skb)
 	SKB_DR(reason);
 	bool send;
 	int code;
+	int error_cost;
 
 	if (netif_is_l3_master(skb->dev)) {
 		dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
@@ -1002,11 +1002,13 @@  static int ip_error(struct sk_buff *skb)
 	if (peer) {
 		now = jiffies;
 		peer->rate_tokens += now - peer->rate_last;
+		error_cost = READ_ONCE(net->ipv4.ip_rt_error_cost);
+
 		if (peer->rate_tokens > ip_rt_error_burst)
 			peer->rate_tokens = ip_rt_error_burst;
 		peer->rate_last = now;
-		if (peer->rate_tokens >= ip_rt_error_cost)
-			peer->rate_tokens -= ip_rt_error_cost;
+		if (peer->rate_tokens >= error_cost)
+			peer->rate_tokens -= error_cost;
 		else
 			send = false;
 		inet_putpeer(peer);
@@ -3535,13 +3537,6 @@  static struct ctl_table ipv4_route_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "error_cost",
-		.data		= &ip_rt_error_cost,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{
 		.procname	= "error_burst",
 		.data		= &ip_rt_error_burst,
@@ -3590,6 +3585,13 @@  static struct ctl_table ipv4_route_netns_table[] = {
 		.mode       = 0644,
 		.proc_handler   = proc_dointvec,
 	},
+	{
+		.procname   = "error_cost",
+		.data       = &init_net.ipv4.ip_rt_error_cost,
+		.maxlen     = sizeof(int),
+		.mode       = 0644,
+		.proc_handler   = proc_dointvec,
+	},
 	{ },
 };
 
@@ -3653,6 +3655,7 @@  static __net_init int netns_ip_rt_init(struct net *net)
 	net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU;
 	net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES;
 	net->ipv4.ip_rt_min_advmss = DEFAULT_MIN_ADVMSS;
+	net->ipv4.ip_rt_error_cost = HZ;
 	return 0;
 }