diff mbox series

[net-next] net/smc: Unbind smc control from tcp control

Message ID 20221123105830.17167-1-jaka@linux.ibm.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net-next] net/smc: Unbind smc control from tcp control | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 4 maintainers not CCed: linux-doc@vger.kernel.org pabeni@redhat.com corbet@lwn.net edumazet@google.com
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 50 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Jan Karcher Nov. 23, 2022, 10:58 a.m. UTC
In the past SMC used the values of tcp_{w|r}mem to create the send
buffer and RMB. We now have our own sysctl knobs to tune them without
influencing the TCP default.

This patch removes the dependency on the TCP control by providing our
own initial values which aim for a low memory footprint.

Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
---
 Documentation/networking/smc-sysctl.rst |  4 ++--
 net/smc/smc_core.h                      |  6 ++++--
 net/smc/smc_sysctl.c                    | 10 ++++++----
 3 files changed, 12 insertions(+), 8 deletions(-)

Comments

Tony Lu Nov. 23, 2022, 11:13 a.m. UTC | #1
On Wed, Nov 23, 2022 at 11:58:30AM +0100, Jan Karcher wrote:
> In the past SMC used the values of tcp_{w|r}mem to create the send
> buffer and RMB. We now have our own sysctl knobs to tune them without
> influencing the TCP default.
> 
> This patch removes the dependency on the TCP control by providing our
> own initial values which aim for a low memory footprint.

+1, before introducing sysctl knobs of SMC, we were going to get rid of
TCP and have SMC own values. Now this does it, So I very much agree with
this.

> 
> Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
> ---
>  Documentation/networking/smc-sysctl.rst |  4 ++--
>  net/smc/smc_core.h                      |  6 ++++--
>  net/smc/smc_sysctl.c                    | 10 ++++++----
>  3 files changed, 12 insertions(+), 8 deletions(-)
> 
> diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst
> index 6d8acdbe9be1..a1c634d3690a 100644
> --- a/Documentation/networking/smc-sysctl.rst
> +++ b/Documentation/networking/smc-sysctl.rst
> @@ -44,7 +44,7 @@ smcr_testlink_time - INTEGER
>  
>  wmem - INTEGER
>  	Initial size of send buffer used by SMC sockets.
> -	The default value inherits from net.ipv4.tcp_wmem[1].
> +	The default value aims for a small memory footprint and is set to 16KiB.
>  
>  	The minimum value is 16KiB and there is no hard limit for max value, but
>  	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
> @@ -53,7 +53,7 @@ wmem - INTEGER
>  
>  rmem - INTEGER
>  	Initial size of receive buffer (RMB) used by SMC sockets.
> -	The default value inherits from net.ipv4.tcp_rmem[1].
> +	The default value aims for a small memory footprint and is set to 64KiB.
>  
>  	The minimum value is 16KiB and there is no hard limit for max value, but
>  	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
> diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
> index 285f9bd8e232..67c3937f341d 100644
> --- a/net/smc/smc_core.h
> +++ b/net/smc/smc_core.h
> @@ -206,8 +206,10 @@ struct smc_rtoken {				/* address/key of remote RMB */
>  	u32			rkey;
>  };
>  
> -#define SMC_BUF_MIN_SIZE	16384	/* minimum size of an RMB */
> -#define SMC_RMBE_SIZES		16	/* number of distinct RMBE sizes */
> +#define SMC_SNDBUF_INIT_SIZE 16384 /* initial size of send buffer */
> +#define SMC_RCVBUF_INIT_SIZE 65536 /* initial size of receive buffer */
> +#define SMC_BUF_MIN_SIZE	 16384	/* minimum size of an RMB */
> +#define SMC_RMBE_SIZES		 16	/* number of distinct RMBE sizes */
>  /* theoretically, the RFC states that largest size would be 512K,
>   * i.e. compressed 5 and thus 6 sizes (0..5), despite
>   * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
> diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
> index b6f79fabb9d3..a63aa79d4856 100644
> --- a/net/smc/smc_sysctl.c
> +++ b/net/smc/smc_sysctl.c
> @@ -19,8 +19,10 @@
>  #include "smc_llc.h"
>  #include "smc_sysctl.h"
>  
> -static int min_sndbuf = SMC_BUF_MIN_SIZE;
> -static int min_rcvbuf = SMC_BUF_MIN_SIZE;
> +static int initial_sndbuf	= SMC_SNDBUF_INIT_SIZE;
> +static int initial_rcvbuf	= SMC_RCVBUF_INIT_SIZE;
> +static int min_sndbuf		= SMC_BUF_MIN_SIZE;
> +static int min_rcvbuf		= SMC_BUF_MIN_SIZE;
>  
>  static struct ctl_table smc_table[] = {
>  	{
> @@ -88,8 +90,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
>  	net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
>  	net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
>  	net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
> -	WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
> -	WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
> +	WRITE_ONCE(net->smc.sysctl_wmem, initial_sndbuf);
> +	WRITE_ONCE(net->smc.sysctl_rmem, initial_rcvbuf);

Maybe we can use SMC_{SND|RCV}BUF_INIT_SIZE macro directly, instead of
new variables.

Cheers,
Tony Lu

>  
>  	return 0;
>  
> -- 
> 2.34.1
Jan Karcher Nov. 23, 2022, 11:19 a.m. UTC | #2
On 23/11/2022 12:13, Tony Lu wrote:
> On Wed, Nov 23, 2022 at 11:58:30AM +0100, Jan Karcher wrote:
>> In the past SMC used the values of tcp_{w|r}mem to create the send
>> buffer and RMB. We now have our own sysctl knobs to tune them without
>> influencing the TCP default.
>>
>> This patch removes the dependency on the TCP control by providing our
>> own initial values which aim for a low memory footprint.
> 
> +1, before introducing sysctl knobs of SMC, we were going to get rid of
> TCP and have SMC own values. Now this does it, So I very much agree with
> this.
> 
>>
>> Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
>> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
>> ---
>>   Documentation/networking/smc-sysctl.rst |  4 ++--
>>   net/smc/smc_core.h                      |  6 ++++--
>>   net/smc/smc_sysctl.c                    | 10 ++++++----
>>   3 files changed, 12 insertions(+), 8 deletions(-)
>>
>> diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst
>> index 6d8acdbe9be1..a1c634d3690a 100644
>> --- a/Documentation/networking/smc-sysctl.rst
>> +++ b/Documentation/networking/smc-sysctl.rst
>> @@ -44,7 +44,7 @@ smcr_testlink_time - INTEGER
>>   
>>   wmem - INTEGER
>>   	Initial size of send buffer used by SMC sockets.
>> -	The default value inherits from net.ipv4.tcp_wmem[1].
>> +	The default value aims for a small memory footprint and is set to 16KiB.
>>   
>>   	The minimum value is 16KiB and there is no hard limit for max value, but
>>   	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
>> @@ -53,7 +53,7 @@ wmem - INTEGER
>>   
>>   rmem - INTEGER
>>   	Initial size of receive buffer (RMB) used by SMC sockets.
>> -	The default value inherits from net.ipv4.tcp_rmem[1].
>> +	The default value aims for a small memory footprint and is set to 64KiB.
>>   
>>   	The minimum value is 16KiB and there is no hard limit for max value, but
>>   	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
>> diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
>> index 285f9bd8e232..67c3937f341d 100644
>> --- a/net/smc/smc_core.h
>> +++ b/net/smc/smc_core.h
>> @@ -206,8 +206,10 @@ struct smc_rtoken {				/* address/key of remote RMB */
>>   	u32			rkey;
>>   };
>>   
>> -#define SMC_BUF_MIN_SIZE	16384	/* minimum size of an RMB */
>> -#define SMC_RMBE_SIZES		16	/* number of distinct RMBE sizes */
>> +#define SMC_SNDBUF_INIT_SIZE 16384 /* initial size of send buffer */
>> +#define SMC_RCVBUF_INIT_SIZE 65536 /* initial size of receive buffer */
>> +#define SMC_BUF_MIN_SIZE	 16384	/* minimum size of an RMB */
>> +#define SMC_RMBE_SIZES		 16	/* number of distinct RMBE sizes */
>>   /* theoretically, the RFC states that largest size would be 512K,
>>    * i.e. compressed 5 and thus 6 sizes (0..5), despite
>>    * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
>> diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
>> index b6f79fabb9d3..a63aa79d4856 100644
>> --- a/net/smc/smc_sysctl.c
>> +++ b/net/smc/smc_sysctl.c
>> @@ -19,8 +19,10 @@
>>   #include "smc_llc.h"
>>   #include "smc_sysctl.h"
>>   
>> -static int min_sndbuf = SMC_BUF_MIN_SIZE;
>> -static int min_rcvbuf = SMC_BUF_MIN_SIZE;
>> +static int initial_sndbuf	= SMC_SNDBUF_INIT_SIZE;
>> +static int initial_rcvbuf	= SMC_RCVBUF_INIT_SIZE;
>> +static int min_sndbuf		= SMC_BUF_MIN_SIZE;
>> +static int min_rcvbuf		= SMC_BUF_MIN_SIZE;
>>   
>>   static struct ctl_table smc_table[] = {
>>   	{
>> @@ -88,8 +90,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
>>   	net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
>>   	net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
>>   	net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
>> -	WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
>> -	WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
>> +	WRITE_ONCE(net->smc.sysctl_wmem, initial_sndbuf);
>> +	WRITE_ONCE(net->smc.sysctl_rmem, initial_rcvbuf);
> 
> Maybe we can use SMC_{SND|RCV}BUF_INIT_SIZE macro directly, instead of
> new variables.

The reason i created the new variables is that min_{snd|rcv}buf also 
have their own variables. I know it is not needed but thought it was 
cleaner.
If you have a strong opinion on using the value directly i can change 
it. Please let me know if you want it changed.

- Jan
> 
> Cheers,
> Tony Lu
> 
>>   
>>   	return 0;
>>   
>> -- 
>> 2.34.1
Tony Lu Nov. 23, 2022, 11:25 a.m. UTC | #3
On Wed, Nov 23, 2022 at 12:19:19PM +0100, Jan Karcher wrote:
> 
> 
> On 23/11/2022 12:13, Tony Lu wrote:
> > On Wed, Nov 23, 2022 at 11:58:30AM +0100, Jan Karcher wrote:
> > > In the past SMC used the values of tcp_{w|r}mem to create the send
> > > buffer and RMB. We now have our own sysctl knobs to tune them without
> > > influencing the TCP default.
> > > 
> > > This patch removes the dependency on the TCP control by providing our
> > > own initial values which aim for a low memory footprint.
> > 
> > +1, before introducing sysctl knobs of SMC, we were going to get rid of
> > TCP and have SMC own values. Now this does it, So I very much agree with
> > this.
> > 
> > > 
> > > Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
> > > Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
> > > ---
> > >   Documentation/networking/smc-sysctl.rst |  4 ++--
> > >   net/smc/smc_core.h                      |  6 ++++--
> > >   net/smc/smc_sysctl.c                    | 10 ++++++----
> > >   3 files changed, 12 insertions(+), 8 deletions(-)
> > > 
> > > diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst
> > > index 6d8acdbe9be1..a1c634d3690a 100644
> > > --- a/Documentation/networking/smc-sysctl.rst
> > > +++ b/Documentation/networking/smc-sysctl.rst
> > > @@ -44,7 +44,7 @@ smcr_testlink_time - INTEGER
> > >   wmem - INTEGER
> > >   	Initial size of send buffer used by SMC sockets.
> > > -	The default value inherits from net.ipv4.tcp_wmem[1].
> > > +	The default value aims for a small memory footprint and is set to 16KiB.
> > >   	The minimum value is 16KiB and there is no hard limit for max value, but
> > >   	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
> > > @@ -53,7 +53,7 @@ wmem - INTEGER
> > >   rmem - INTEGER
> > >   	Initial size of receive buffer (RMB) used by SMC sockets.
> > > -	The default value inherits from net.ipv4.tcp_rmem[1].
> > > +	The default value aims for a small memory footprint and is set to 64KiB.
> > >   	The minimum value is 16KiB and there is no hard limit for max value, but
> > >   	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
> > > diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
> > > index 285f9bd8e232..67c3937f341d 100644
> > > --- a/net/smc/smc_core.h
> > > +++ b/net/smc/smc_core.h
> > > @@ -206,8 +206,10 @@ struct smc_rtoken {				/* address/key of remote RMB */
> > >   	u32			rkey;
> > >   };
> > > -#define SMC_BUF_MIN_SIZE	16384	/* minimum size of an RMB */
> > > -#define SMC_RMBE_SIZES		16	/* number of distinct RMBE sizes */
> > > +#define SMC_SNDBUF_INIT_SIZE 16384 /* initial size of send buffer */
> > > +#define SMC_RCVBUF_INIT_SIZE 65536 /* initial size of receive buffer */
> > > +#define SMC_BUF_MIN_SIZE	 16384	/* minimum size of an RMB */
> > > +#define SMC_RMBE_SIZES		 16	/* number of distinct RMBE sizes */
> > >   /* theoretically, the RFC states that largest size would be 512K,
> > >    * i.e. compressed 5 and thus 6 sizes (0..5), despite
> > >    * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
> > > diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
> > > index b6f79fabb9d3..a63aa79d4856 100644
> > > --- a/net/smc/smc_sysctl.c
> > > +++ b/net/smc/smc_sysctl.c
> > > @@ -19,8 +19,10 @@
> > >   #include "smc_llc.h"
> > >   #include "smc_sysctl.h"
> > > -static int min_sndbuf = SMC_BUF_MIN_SIZE;
> > > -static int min_rcvbuf = SMC_BUF_MIN_SIZE;
> > > +static int initial_sndbuf	= SMC_SNDBUF_INIT_SIZE;
> > > +static int initial_rcvbuf	= SMC_RCVBUF_INIT_SIZE;
> > > +static int min_sndbuf		= SMC_BUF_MIN_SIZE;
> > > +static int min_rcvbuf		= SMC_BUF_MIN_SIZE;
> > >   static struct ctl_table smc_table[] = {
> > >   	{
> > > @@ -88,8 +90,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
> > >   	net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
> > >   	net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
> > >   	net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
> > > -	WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
> > > -	WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
> > > +	WRITE_ONCE(net->smc.sysctl_wmem, initial_sndbuf);
> > > +	WRITE_ONCE(net->smc.sysctl_rmem, initial_rcvbuf);
> > 
> > Maybe we can use SMC_{SND|RCV}BUF_INIT_SIZE macro directly, instead of
> > new variables.
> 
> The reason i created the new variables is that min_{snd|rcv}buf also have
> their own variables. I know it is not needed but thought it was cleaner.
> If you have a strong opinion on using the value directly i can change it.
> Please let me know if you want it changed.

Yep, it's okay for me to use variables or macros. Just let it be.

Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>

Cheers,
Tony Lu

> 
> - Jan
> > 
> > Cheers,
> > Tony Lu
> > 
> > >   	return 0;
> > > -- 
> > > 2.34.1
Tony Lu Nov. 23, 2022, 12:07 p.m. UTC | #4
On Wed, Nov 23, 2022 at 11:58:30AM +0100, Jan Karcher wrote:
> In the past SMC used the values of tcp_{w|r}mem to create the send
> buffer and RMB. We now have our own sysctl knobs to tune them without
> influencing the TCP default.
> 
> This patch removes the dependency on the TCP control by providing our
> own initial values which aim for a low memory footprint.
> 
> Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
> ---
>  Documentation/networking/smc-sysctl.rst |  4 ++--
>  net/smc/smc_core.h                      |  6 ++++--
>  net/smc/smc_sysctl.c                    | 10 ++++++----
>  3 files changed, 12 insertions(+), 8 deletions(-)
> 
> diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst
> index 6d8acdbe9be1..a1c634d3690a 100644
> --- a/Documentation/networking/smc-sysctl.rst
> +++ b/Documentation/networking/smc-sysctl.rst
> @@ -44,7 +44,7 @@ smcr_testlink_time - INTEGER
>  
>  wmem - INTEGER
>  	Initial size of send buffer used by SMC sockets.
> -	The default value inherits from net.ipv4.tcp_wmem[1].
> +	The default value aims for a small memory footprint and is set to 16KiB.
>  
>  	The minimum value is 16KiB and there is no hard limit for max value, but
>  	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
> @@ -53,7 +53,7 @@ wmem - INTEGER
>  
>  rmem - INTEGER
>  	Initial size of receive buffer (RMB) used by SMC sockets.
> -	The default value inherits from net.ipv4.tcp_rmem[1].
> +	The default value aims for a small memory footprint and is set to 64KiB.
>  
>  	The minimum value is 16KiB and there is no hard limit for max value, but
>  	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
> diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
> index 285f9bd8e232..67c3937f341d 100644
> --- a/net/smc/smc_core.h
> +++ b/net/smc/smc_core.h
> @@ -206,8 +206,10 @@ struct smc_rtoken {				/* address/key of remote RMB */
>  	u32			rkey;
>  };
>  
> -#define SMC_BUF_MIN_SIZE	16384	/* minimum size of an RMB */

Hi Jan,

This patch inspired me that the min value of RMB and sndbuffer is 16KiB,
it means that every connection costs 32KiB at least. It's still a large
size for small environments, such as virtual machines or containers.

Also we have tested some cases with smaller buffer size (4KiB, with
hacked code), it also shows good performance compared with larger buffer
size.

So I am wondering that we could reduce the min value of RMB/send buffer,
such as 4KiB.

Cheers,
Tony Lu

> -#define SMC_RMBE_SIZES		16	/* number of distinct RMBE sizes */
> +#define SMC_SNDBUF_INIT_SIZE 16384 /* initial size of send buffer */
> +#define SMC_RCVBUF_INIT_SIZE 65536 /* initial size of receive buffer */
> +#define SMC_BUF_MIN_SIZE	 16384	/* minimum size of an RMB */
> +#define SMC_RMBE_SIZES		 16	/* number of distinct RMBE sizes */
>  /* theoretically, the RFC states that largest size would be 512K,
>   * i.e. compressed 5 and thus 6 sizes (0..5), despite
>   * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
> diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
> index b6f79fabb9d3..a63aa79d4856 100644
> --- a/net/smc/smc_sysctl.c
> +++ b/net/smc/smc_sysctl.c
> @@ -19,8 +19,10 @@
>  #include "smc_llc.h"
>  #include "smc_sysctl.h"
>  
> -static int min_sndbuf = SMC_BUF_MIN_SIZE;
> -static int min_rcvbuf = SMC_BUF_MIN_SIZE;
> +static int initial_sndbuf	= SMC_SNDBUF_INIT_SIZE;
> +static int initial_rcvbuf	= SMC_RCVBUF_INIT_SIZE;
> +static int min_sndbuf		= SMC_BUF_MIN_SIZE;
> +static int min_rcvbuf		= SMC_BUF_MIN_SIZE;
>  
>  static struct ctl_table smc_table[] = {
>  	{
> @@ -88,8 +90,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
>  	net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
>  	net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
>  	net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
> -	WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
> -	WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
> +	WRITE_ONCE(net->smc.sysctl_wmem, initial_sndbuf);
> +	WRITE_ONCE(net->smc.sysctl_rmem, initial_rcvbuf);
>  
>  	return 0;
>  
> -- 
> 2.34.1
Jan Karcher Nov. 23, 2022, 1:36 p.m. UTC | #5
On 23/11/2022 13:07, Tony Lu wrote:
> On Wed, Nov 23, 2022 at 11:58:30AM +0100, Jan Karcher wrote:
>> In the past SMC used the values of tcp_{w|r}mem to create the send
>> buffer and RMB. We now have our own sysctl knobs to tune them without
>> influencing the TCP default.
>>
>> This patch removes the dependency on the TCP control by providing our
>> own initial values which aim for a low memory footprint.
>>
>> Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
>> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
>> ---
>>   Documentation/networking/smc-sysctl.rst |  4 ++--
>>   net/smc/smc_core.h                      |  6 ++++--
>>   net/smc/smc_sysctl.c                    | 10 ++++++----
>>   3 files changed, 12 insertions(+), 8 deletions(-)
>>
>> diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst
>> index 6d8acdbe9be1..a1c634d3690a 100644
>> --- a/Documentation/networking/smc-sysctl.rst
>> +++ b/Documentation/networking/smc-sysctl.rst
>> @@ -44,7 +44,7 @@ smcr_testlink_time - INTEGER
>>   
>>   wmem - INTEGER
>>   	Initial size of send buffer used by SMC sockets.
>> -	The default value inherits from net.ipv4.tcp_wmem[1].
>> +	The default value aims for a small memory footprint and is set to 16KiB.
>>   
>>   	The minimum value is 16KiB and there is no hard limit for max value, but
>>   	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
>> @@ -53,7 +53,7 @@ wmem - INTEGER
>>   
>>   rmem - INTEGER
>>   	Initial size of receive buffer (RMB) used by SMC sockets.
>> -	The default value inherits from net.ipv4.tcp_rmem[1].
>> +	The default value aims for a small memory footprint and is set to 64KiB.
>>   
>>   	The minimum value is 16KiB and there is no hard limit for max value, but
>>   	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
>> diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
>> index 285f9bd8e232..67c3937f341d 100644
>> --- a/net/smc/smc_core.h
>> +++ b/net/smc/smc_core.h
>> @@ -206,8 +206,10 @@ struct smc_rtoken {				/* address/key of remote RMB */
>>   	u32			rkey;
>>   };
>>   
>> -#define SMC_BUF_MIN_SIZE	16384	/* minimum size of an RMB */
> 
> Hi Jan,
> 
> This patch inspired me that the min value of RMB and sndbuffer is 16KiB,
> it means that every connection costs 32KiB at least. It's still a large
> size for small environments, such as virtual machines or containers.
> 
> Also we have tested some cases with smaller buffer size (4KiB, with
> hacked code), it also shows good performance compared with larger buffer
> size.
> 
> So I am wondering that we could reduce the min value of RMB/send buffer,
> such as 4KiB.

That sounds interesting.
We did not think about reducing the minimum value.
One thing I'm wondering is if other OS like z/OS have own architectural 
limits or limits in general that we would have to consider in any way.
Let us look into it if we run into any trouble with lower memory.

- Jan

> 
> Cheers,
> Tony Lu
> 
>> -#define SMC_RMBE_SIZES		16	/* number of distinct RMBE sizes */
>> +#define SMC_SNDBUF_INIT_SIZE 16384 /* initial size of send buffer */
>> +#define SMC_RCVBUF_INIT_SIZE 65536 /* initial size of receive buffer */
>> +#define SMC_BUF_MIN_SIZE	 16384	/* minimum size of an RMB */
>> +#define SMC_RMBE_SIZES		 16	/* number of distinct RMBE sizes */
>>   /* theoretically, the RFC states that largest size would be 512K,
>>    * i.e. compressed 5 and thus 6 sizes (0..5), despite
>>    * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
>> diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
>> index b6f79fabb9d3..a63aa79d4856 100644
>> --- a/net/smc/smc_sysctl.c
>> +++ b/net/smc/smc_sysctl.c
>> @@ -19,8 +19,10 @@
>>   #include "smc_llc.h"
>>   #include "smc_sysctl.h"
>>   
>> -static int min_sndbuf = SMC_BUF_MIN_SIZE;
>> -static int min_rcvbuf = SMC_BUF_MIN_SIZE;
>> +static int initial_sndbuf	= SMC_SNDBUF_INIT_SIZE;
>> +static int initial_rcvbuf	= SMC_RCVBUF_INIT_SIZE;
>> +static int min_sndbuf		= SMC_BUF_MIN_SIZE;
>> +static int min_rcvbuf		= SMC_BUF_MIN_SIZE;
>>   
>>   static struct ctl_table smc_table[] = {
>>   	{
>> @@ -88,8 +90,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
>>   	net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
>>   	net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
>>   	net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
>> -	WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
>> -	WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
>> +	WRITE_ONCE(net->smc.sysctl_wmem, initial_sndbuf);
>> +	WRITE_ONCE(net->smc.sysctl_rmem, initial_rcvbuf);
>>   
>>   	return 0;
>>   
>> -- 
>> 2.34.1
Alexandra Winter Nov. 24, 2022, 1 p.m. UTC | #6
On 23.11.22 12:25, Tony Lu wrote:
> On Wed, Nov 23, 2022 at 12:19:19PM +0100, Jan Karcher wrote:
>>
>>
>> On 23/11/2022 12:13, Tony Lu wrote:
>>> On Wed, Nov 23, 2022 at 11:58:30AM +0100, Jan Karcher wrote:
>>>> In the past SMC used the values of tcp_{w|r}mem to create the send
>>>> buffer and RMB. We now have our own sysctl knobs to tune them without
>>>> influencing the TCP default.
>>>>
>>>> This patch removes the dependency on the TCP control by providing our
>>>> own initial values which aim for a low memory footprint.
>>>
>>> +1, before introducing sysctl knobs of SMC, we were going to get rid of
>>> TCP and have SMC own values. Now this does it, So I very much agree with
>>> this.
>>>
Iiuc you are changing the default values in this a patch and your other patch:
Default values for real_buf for send and receive:

before 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
    real_buf=net.ipv4.tcp_{w|r}mem[1]/2   send: 8k  recv: 64k 
    
after 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
real_buf=net.ipv4.tcp_{w|r}mem[1]   send: 16k (16*1024) recv: 128k (131072) 

after net/smc: Fix expected buffersizes and sync logic
real_buf=net.ipv4.tcp_{w|r}mem[1]   send: 16k (16*1024) recv: 128k (131072) 

after net/smc: Unbind smc control from tcp control
real_buf=SMC_*BUF_INIT_SIZE   send: 16k (16384) recv: 64k (65536)

If my understanding is correct, then I nack this. 
Defaults should be restored to the values before 0227f058aa29.
Otherwise users will notice a change in memory usage that needs to
be avoided or announced more explicitely. (and don't change them twice)

>>>>
>>>> Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
>>>> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
>>>> ---
>>>>   Documentation/networking/smc-sysctl.rst |  4 ++--
>>>>   net/smc/smc_core.h                      |  6 ++++--
>>>>   net/smc/smc_sysctl.c                    | 10 ++++++----
>>>>   3 files changed, 12 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst
>>>> index 6d8acdbe9be1..a1c634d3690a 100644
>>>> --- a/Documentation/networking/smc-sysctl.rst
>>>> +++ b/Documentation/networking/smc-sysctl.rst
>>>> @@ -44,7 +44,7 @@ smcr_testlink_time - INTEGER
>>>>   wmem - INTEGER
>>>>   	Initial size of send buffer used by SMC sockets.
>>>> -	The default value inherits from net.ipv4.tcp_wmem[1].
>>>> +	The default value aims for a small memory footprint and is set to 16KiB.
>>>>   	The minimum value is 16KiB and there is no hard limit for max value, but
>>>>   	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
>>>> @@ -53,7 +53,7 @@ wmem - INTEGER
>>>>   rmem - INTEGER
>>>>   	Initial size of receive buffer (RMB) used by SMC sockets.
>>>> -	The default value inherits from net.ipv4.tcp_rmem[1].
>>>> +	The default value aims for a small memory footprint and is set to 64KiB.
>>>>   	The minimum value is 16KiB and there is no hard limit for max value, but
>>>>   	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
>>>> diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
>>>> index 285f9bd8e232..67c3937f341d 100644
>>>> --- a/net/smc/smc_core.h
>>>> +++ b/net/smc/smc_core.h
>>>> @@ -206,8 +206,10 @@ struct smc_rtoken {				/* address/key of remote RMB */
>>>>   	u32			rkey;
>>>>   };
>>>> -#define SMC_BUF_MIN_SIZE	16384	/* minimum size of an RMB */
>>>> -#define SMC_RMBE_SIZES		16	/* number of distinct RMBE sizes */
>>>> +#define SMC_SNDBUF_INIT_SIZE 16384 /* initial size of send buffer */
>>>> +#define SMC_RCVBUF_INIT_SIZE 65536 /* initial size of receive buffer */
>>>> +#define SMC_BUF_MIN_SIZE	 16384	/* minimum size of an RMB */
>>>> +#define SMC_RMBE_SIZES		 16	/* number of distinct RMBE sizes */
>>>>   /* theoretically, the RFC states that largest size would be 512K,
>>>>    * i.e. compressed 5 and thus 6 sizes (0..5), despite
>>>>    * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
>>>> diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
>>>> index b6f79fabb9d3..a63aa79d4856 100644
>>>> --- a/net/smc/smc_sysctl.c
>>>> +++ b/net/smc/smc_sysctl.c
>>>> @@ -19,8 +19,10 @@
>>>>   #include "smc_llc.h"
>>>>   #include "smc_sysctl.h"
>>>> -static int min_sndbuf = SMC_BUF_MIN_SIZE;
>>>> -static int min_rcvbuf = SMC_BUF_MIN_SIZE;
>>>> +static int initial_sndbuf	= SMC_SNDBUF_INIT_SIZE;
>>>> +static int initial_rcvbuf	= SMC_RCVBUF_INIT_SIZE;
>>>> +static int min_sndbuf		= SMC_BUF_MIN_SIZE;
>>>> +static int min_rcvbuf		= SMC_BUF_MIN_SIZE;
Broken formatting
>>>>   static struct ctl_table smc_table[] = {
>>>>   	{
>>>> @@ -88,8 +90,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
>>>>   	net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
>>>>   	net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
>>>>   	net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
>>>> -	WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
>>>> -	WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
>>>> +	WRITE_ONCE(net->smc.sysctl_wmem, initial_sndbuf);
>>>> +	WRITE_ONCE(net->smc.sysctl_rmem, initial_rcvbuf);
>>>
>>> Maybe we can use SMC_{SND|RCV}BUF_INIT_SIZE macro directly, instead of
>>> new variables.
>>
>> The reason i created the new variables is that min_{snd|rcv}buf also have
>> their own variables. I know it is not needed but thought it was cleaner.
>> If you have a strong opinion on using the value directly i can change it.
>> Please let me know if you want it changed.
> 
> Yep, it's okay for me to use variables or macros. Just let it be.
I think it's better coding style to use the macros instead of unneccessary variables.
At least the variables could be defined as const.
> 
> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
> 
> Cheers,
> Tony Lu
> 
>>
>> - Jan
>>>
>>> Cheers,
>>> Tony Lu
>>>
>>>>   	return 0;
>>>> -- 
>>>> 2.34.1
Alexandra Winter Nov. 24, 2022, 2:06 p.m. UTC | #7
On 24.11.22 14:00, Alexandra Winter wrote:
> 
> 
> On 23.11.22 12:25, Tony Lu wrote:
>> On Wed, Nov 23, 2022 at 12:19:19PM +0100, Jan Karcher wrote:
>>>
>>>
>>> On 23/11/2022 12:13, Tony Lu wrote:
>>>> On Wed, Nov 23, 2022 at 11:58:30AM +0100, Jan Karcher wrote:
>>>>> In the past SMC used the values of tcp_{w|r}mem to create the send
>>>>> buffer and RMB. We now have our own sysctl knobs to tune them without
>>>>> influencing the TCP default.
>>>>>
>>>>> This patch removes the dependency on the TCP control by providing our
>>>>> own initial values which aim for a low memory footprint.
>>>>
>>>> +1, before introducing sysctl knobs of SMC, we were going to get rid of
>>>> TCP and have SMC own values. Now this does it, So I very much agree with
>>>> this.
>>>>
> Iiuc you are changing the default values in this a patch and your other patch:
> Default values for real_buf for send and receive:
> 
> before 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
>     real_buf=net.ipv4.tcp_{w|r}mem[1]/2   send: 8k  recv: 64k 
Jan, you explained to me: "the minima is 16Kib. This is enforced in smc_compress_bufsize 
which would move any value <= 16Kib into bucket 0 - which is 16KiB "
so 					send  16k   recv: 64k
>     
> after 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
> real_buf=net.ipv4.tcp_{w|r}mem[1]   send: 16k (16*1024) recv: 128k (131072) 
> 
> after net/smc: Fix expected buffersizes and sync logic
> real_buf=net.ipv4.tcp_{w|r}mem[1]   send: 16k (16*1024) recv: 128k (131072) 
> 
> after net/smc: Unbind smc control from tcp control
> real_buf=SMC_*BUF_INIT_SIZE   send: 16k (16384) recv: 64k (65536)
> 
> If my understanding is correct, then I nack this. 
> Defaults should be restored to the values before 0227f058aa29.
> Otherwise users will notice a change in memory usage that needs to
> be avoided or announced more explicitely. (and don't change them twice)
See above, I stand corrected. This does restore the default receive buffer size.
It should got to net with a Fixes: 0227f058aa29 tag.
And the descrition should clarify that this restores the default recv buffer size
(and uncouples from tcp control)
Maybe the users and the distros would thank you, if you merge them into 1 patch?


> 
>>>>>
>>>>> Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
>>>>> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
>>>>> ---
>>>>>   Documentation/networking/smc-sysctl.rst |  4 ++--
>>>>>   net/smc/smc_core.h                      |  6 ++++--
>>>>>   net/smc/smc_sysctl.c                    | 10 ++++++----
>>>>>   3 files changed, 12 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst
>>>>> index 6d8acdbe9be1..a1c634d3690a 100644
>>>>> --- a/Documentation/networking/smc-sysctl.rst
>>>>> +++ b/Documentation/networking/smc-sysctl.rst
>>>>> @@ -44,7 +44,7 @@ smcr_testlink_time - INTEGER
>>>>>   wmem - INTEGER
>>>>>   	Initial size of send buffer used by SMC sockets.
>>>>> -	The default value inherits from net.ipv4.tcp_wmem[1].
>>>>> +	The default value aims for a small memory footprint and is set to 16KiB.
>>>>>   	The minimum value is 16KiB and there is no hard limit for max value, but
>>>>>   	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
>>>>> @@ -53,7 +53,7 @@ wmem - INTEGER
>>>>>   rmem - INTEGER
>>>>>   	Initial size of receive buffer (RMB) used by SMC sockets.
>>>>> -	The default value inherits from net.ipv4.tcp_rmem[1].
>>>>> +	The default value aims for a small memory footprint and is set to 64KiB.
>>>>>   	The minimum value is 16KiB and there is no hard limit for max value, but
>>>>>   	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
>>>>> diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
>>>>> index 285f9bd8e232..67c3937f341d 100644
>>>>> --- a/net/smc/smc_core.h
>>>>> +++ b/net/smc/smc_core.h
>>>>> @@ -206,8 +206,10 @@ struct smc_rtoken {				/* address/key of remote RMB */
>>>>>   	u32			rkey;
>>>>>   };
>>>>> -#define SMC_BUF_MIN_SIZE	16384	/* minimum size of an RMB */
>>>>> -#define SMC_RMBE_SIZES		16	/* number of distinct RMBE sizes */
>>>>> +#define SMC_SNDBUF_INIT_SIZE 16384 /* initial size of send buffer */
>>>>> +#define SMC_RCVBUF_INIT_SIZE 65536 /* initial size of receive buffer */
>>>>> +#define SMC_BUF_MIN_SIZE	 16384	/* minimum size of an RMB */
>>>>> +#define SMC_RMBE_SIZES		 16	/* number of distinct RMBE sizes */
There is one blank added to these lines? Why? They still don't align.
>>>>>   /* theoretically, the RFC states that largest size would be 512K,
>>>>>    * i.e. compressed 5 and thus 6 sizes (0..5), despite
>>>>>    * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
>>>>> diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
>>>>> index b6f79fabb9d3..a63aa79d4856 100644
>>>>> --- a/net/smc/smc_sysctl.c
>>>>> +++ b/net/smc/smc_sysctl.c
>>>>> @@ -19,8 +19,10 @@
>>>>>   #include "smc_llc.h"
>>>>>   #include "smc_sysctl.h"
>>>>> -static int min_sndbuf = SMC_BUF_MIN_SIZE;
>>>>> -static int min_rcvbuf = SMC_BUF_MIN_SIZE;
>>>>> +static int initial_sndbuf	= SMC_SNDBUF_INIT_SIZE;
>>>>> +static int initial_rcvbuf	= SMC_RCVBUF_INIT_SIZE;
>>>>> +static int min_sndbuf		= SMC_BUF_MIN_SIZE;
>>>>> +static int min_rcvbuf		= SMC_BUF_MIN_SIZE;
> Broken formatting
>>>>>   static struct ctl_table smc_table[] = {
>>>>>   	{
>>>>> @@ -88,8 +90,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
>>>>>   	net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
>>>>>   	net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
>>>>>   	net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
>>>>> -	WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
>>>>> -	WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
>>>>> +	WRITE_ONCE(net->smc.sysctl_wmem, initial_sndbuf);
>>>>> +	WRITE_ONCE(net->smc.sysctl_rmem, initial_rcvbuf);
>>>>
>>>> Maybe we can use SMC_{SND|RCV}BUF_INIT_SIZE macro directly, instead of
>>>> new variables.
>>>
>>> The reason i created the new variables is that min_{snd|rcv}buf also have
>>> their own variables. I know it is not needed but thought it was cleaner.
>>> If you have a strong opinion on using the value directly i can change it.
>>> Please let me know if you want it changed.
>>
>> Yep, it's okay for me to use variables or macros. Just let it be.
> I think it's better coding style to use the macros instead of unneccessary variables.
> At least the variables could be defined as const.
>>
>> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
>>
>> Cheers,
>> Tony Lu
>>
>>>
>>> - Jan
>>>>
>>>> Cheers,
>>>> Tony Lu
>>>>
>>>>>   	return 0;
>>>>> -- 
>>>>> 2.34.1
Tony Lu Nov. 24, 2022, 5:15 p.m. UTC | #8
On Thu, Nov 24, 2022 at 02:00:35PM +0100, Alexandra Winter wrote:
> 
> 
> On 23.11.22 12:25, Tony Lu wrote:
> > On Wed, Nov 23, 2022 at 12:19:19PM +0100, Jan Karcher wrote:
> >>
> >>
> >> On 23/11/2022 12:13, Tony Lu wrote:
> >>> On Wed, Nov 23, 2022 at 11:58:30AM +0100, Jan Karcher wrote:
> >>>> In the past SMC used the values of tcp_{w|r}mem to create the send
> >>>> buffer and RMB. We now have our own sysctl knobs to tune them without
> >>>> influencing the TCP default.
> >>>>
> >>>> This patch removes the dependency on the TCP control by providing our
> >>>> own initial values which aim for a low memory footprint.
> >>>
> >>> +1, before introducing sysctl knobs of SMC, we were going to get rid of
> >>> TCP and have SMC own values. Now this does it, So I very much agree with
> >>> this.
> >>>
> Iiuc you are changing the default values in this a patch and your other patch:
> Default values for real_buf for send and receive:
> 
> before 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
>     real_buf=net.ipv4.tcp_{w|r}mem[1]/2   send: 8k  recv: 64k 
>     
> after 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
> real_buf=net.ipv4.tcp_{w|r}mem[1]   send: 16k (16*1024) recv: 128k (131072) 
> 
> after net/smc: Fix expected buffersizes and sync logic
> real_buf=net.ipv4.tcp_{w|r}mem[1]   send: 16k (16*1024) recv: 128k (131072) 
> 
> after net/smc: Unbind smc control from tcp control
> real_buf=SMC_*BUF_INIT_SIZE   send: 16k (16384) recv: 64k (65536)
> 
> If my understanding is correct, then I nack this. 
> Defaults should be restored to the values before 0227f058aa29.
> Otherwise users will notice a change in memory usage that needs to
> be avoided or announced more explicitely. (and don't change them twice)

The logic of buffer size are changed indeed. I very much agree that do
not break the user space. I am wondering that the values of user-defined
configurations should be the ABI/API compatibilities.

Actually before the patch of adding sysctls of buffers, the values of
buffer size is bind to tcp_{w|r}mem[1] tightly. The people who changed
the value of tcp_{w|r}mem[1] may break the convention of SMC by
accident.

After getting rid of tcp_{w|r}mem[1], SMC have its own sysctl for
buffer size. I do think this a really good chance for us to determined
the reasonable values of buffers and document them in a place that
people are easy to learn, the logic of {set|get}sockopt in SMC are
different from socket manual. What do you think?

Cheers,
Tony Lu

> 
> >>>>
> >>>> Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
> >>>> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
> >>>> ---
> >>>>   Documentation/networking/smc-sysctl.rst |  4 ++--
> >>>>   net/smc/smc_core.h                      |  6 ++++--
> >>>>   net/smc/smc_sysctl.c                    | 10 ++++++----
> >>>>   3 files changed, 12 insertions(+), 8 deletions(-)
> >>>>
> >>>> diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst
> >>>> index 6d8acdbe9be1..a1c634d3690a 100644
> >>>> --- a/Documentation/networking/smc-sysctl.rst
> >>>> +++ b/Documentation/networking/smc-sysctl.rst
> >>>> @@ -44,7 +44,7 @@ smcr_testlink_time - INTEGER
> >>>>   wmem - INTEGER
> >>>>   	Initial size of send buffer used by SMC sockets.
> >>>> -	The default value inherits from net.ipv4.tcp_wmem[1].
> >>>> +	The default value aims for a small memory footprint and is set to 16KiB.
> >>>>   	The minimum value is 16KiB and there is no hard limit for max value, but
> >>>>   	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
> >>>> @@ -53,7 +53,7 @@ wmem - INTEGER
> >>>>   rmem - INTEGER
> >>>>   	Initial size of receive buffer (RMB) used by SMC sockets.
> >>>> -	The default value inherits from net.ipv4.tcp_rmem[1].
> >>>> +	The default value aims for a small memory footprint and is set to 64KiB.
> >>>>   	The minimum value is 16KiB and there is no hard limit for max value, but
> >>>>   	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
> >>>> diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
> >>>> index 285f9bd8e232..67c3937f341d 100644
> >>>> --- a/net/smc/smc_core.h
> >>>> +++ b/net/smc/smc_core.h
> >>>> @@ -206,8 +206,10 @@ struct smc_rtoken {				/* address/key of remote RMB */
> >>>>   	u32			rkey;
> >>>>   };
> >>>> -#define SMC_BUF_MIN_SIZE	16384	/* minimum size of an RMB */
> >>>> -#define SMC_RMBE_SIZES		16	/* number of distinct RMBE sizes */
> >>>> +#define SMC_SNDBUF_INIT_SIZE 16384 /* initial size of send buffer */
> >>>> +#define SMC_RCVBUF_INIT_SIZE 65536 /* initial size of receive buffer */
> >>>> +#define SMC_BUF_MIN_SIZE	 16384	/* minimum size of an RMB */
> >>>> +#define SMC_RMBE_SIZES		 16	/* number of distinct RMBE sizes */
> >>>>   /* theoretically, the RFC states that largest size would be 512K,
> >>>>    * i.e. compressed 5 and thus 6 sizes (0..5), despite
> >>>>    * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
> >>>> diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
> >>>> index b6f79fabb9d3..a63aa79d4856 100644
> >>>> --- a/net/smc/smc_sysctl.c
> >>>> +++ b/net/smc/smc_sysctl.c
> >>>> @@ -19,8 +19,10 @@
> >>>>   #include "smc_llc.h"
> >>>>   #include "smc_sysctl.h"
> >>>> -static int min_sndbuf = SMC_BUF_MIN_SIZE;
> >>>> -static int min_rcvbuf = SMC_BUF_MIN_SIZE;
> >>>> +static int initial_sndbuf	= SMC_SNDBUF_INIT_SIZE;
> >>>> +static int initial_rcvbuf	= SMC_RCVBUF_INIT_SIZE;
> >>>> +static int min_sndbuf		= SMC_BUF_MIN_SIZE;
> >>>> +static int min_rcvbuf		= SMC_BUF_MIN_SIZE;
> Broken formatting
> >>>>   static struct ctl_table smc_table[] = {
> >>>>   	{
> >>>> @@ -88,8 +90,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
> >>>>   	net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
> >>>>   	net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
> >>>>   	net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
> >>>> -	WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
> >>>> -	WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
> >>>> +	WRITE_ONCE(net->smc.sysctl_wmem, initial_sndbuf);
> >>>> +	WRITE_ONCE(net->smc.sysctl_rmem, initial_rcvbuf);
> >>>
> >>> Maybe we can use SMC_{SND|RCV}BUF_INIT_SIZE macro directly, instead of
> >>> new variables.
> >>
> >> The reason i created the new variables is that min_{snd|rcv}buf also have
> >> their own variables. I know it is not needed but thought it was cleaner.
> >> If you have a strong opinion on using the value directly i can change it.
> >> Please let me know if you want it changed.
> > 
> > Yep, it's okay for me to use variables or macros. Just let it be.
> I think it's better coding style to use the macros instead of unneccessary variables.
> At least the variables could be defined as const.
> > 
> > Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
> > 
> > Cheers,
> > Tony Lu
> > 
> >>
> >> - Jan
> >>>
> >>> Cheers,
> >>> Tony Lu
> >>>
> >>>>   	return 0;
> >>>> -- 
> >>>> 2.34.1
Tony Lu Nov. 24, 2022, 6:04 p.m. UTC | #9
On Thu, Nov 24, 2022 at 03:06:50PM +0100, Alexandra Winter wrote:
> 
> 
> On 24.11.22 14:00, Alexandra Winter wrote:
> > 
> > 
> > On 23.11.22 12:25, Tony Lu wrote:
> >> On Wed, Nov 23, 2022 at 12:19:19PM +0100, Jan Karcher wrote:
> >>>
> >>>
> >>> On 23/11/2022 12:13, Tony Lu wrote:
> >>>> On Wed, Nov 23, 2022 at 11:58:30AM +0100, Jan Karcher wrote:
> >>>>> In the past SMC used the values of tcp_{w|r}mem to create the send
> >>>>> buffer and RMB. We now have our own sysctl knobs to tune them without
> >>>>> influencing the TCP default.
> >>>>>
> >>>>> This patch removes the dependency on the TCP control by providing our
> >>>>> own initial values which aim for a low memory footprint.
> >>>>
> >>>> +1, before introducing sysctl knobs of SMC, we were going to get rid of
> >>>> TCP and have SMC own values. Now this does it, So I very much agree with
> >>>> this.
> >>>>
> > Iiuc you are changing the default values in this a patch and your other patch:
> > Default values for real_buf for send and receive:
> > 
> > before 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
> >     real_buf=net.ipv4.tcp_{w|r}mem[1]/2   send: 8k  recv: 64k 
> Jan, you explained to me: "the minima is 16Kib. This is enforced in smc_compress_bufsize 
> which would move any value <= 16Kib into bucket 0 - which is 16KiB "
> so 					send  16k   recv: 64k
> >     
> > after 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
> > real_buf=net.ipv4.tcp_{w|r}mem[1]   send: 16k (16*1024) recv: 128k (131072) 
> > 
> > after net/smc: Fix expected buffersizes and sync logic
> > real_buf=net.ipv4.tcp_{w|r}mem[1]   send: 16k (16*1024) recv: 128k (131072) 
> > 
> > after net/smc: Unbind smc control from tcp control
> > real_buf=SMC_*BUF_INIT_SIZE   send: 16k (16384) recv: 64k (65536)
> > 
> > If my understanding is correct, then I nack this. 
> > Defaults should be restored to the values before 0227f058aa29.
> > Otherwise users will notice a change in memory usage that needs to
> > be avoided or announced more explicitely. (and don't change them twice)
> See above, I stand corrected. This does restore the default receive buffer size.
> It should got to net with a Fixes: 0227f058aa29 tag.
> And the descrition should clarify that this restores the default recv buffer size
> (and uncouples from tcp control)
> Maybe the users and the distros would thank you, if you merge them into 1 patch?

AFAIK, updating kernel to the version of upstream will take a long time
for distros and users. Since we have sysctls, maybe we could let
applications and distros use sysctl to set their desired and default
values with documentation. It's a more common way to tweak kernel. 

Cheers,
Tony Lu

> 
> 
> > 
> >>>>>
> >>>>> Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
> >>>>> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
> >>>>> ---
> >>>>>   Documentation/networking/smc-sysctl.rst |  4 ++--
> >>>>>   net/smc/smc_core.h                      |  6 ++++--
> >>>>>   net/smc/smc_sysctl.c                    | 10 ++++++----
> >>>>>   3 files changed, 12 insertions(+), 8 deletions(-)
> >>>>>
> >>>>> diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst
> >>>>> index 6d8acdbe9be1..a1c634d3690a 100644
> >>>>> --- a/Documentation/networking/smc-sysctl.rst
> >>>>> +++ b/Documentation/networking/smc-sysctl.rst
> >>>>> @@ -44,7 +44,7 @@ smcr_testlink_time - INTEGER
> >>>>>   wmem - INTEGER
> >>>>>   	Initial size of send buffer used by SMC sockets.
> >>>>> -	The default value inherits from net.ipv4.tcp_wmem[1].
> >>>>> +	The default value aims for a small memory footprint and is set to 16KiB.
> >>>>>   	The minimum value is 16KiB and there is no hard limit for max value, but
> >>>>>   	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
> >>>>> @@ -53,7 +53,7 @@ wmem - INTEGER
> >>>>>   rmem - INTEGER
> >>>>>   	Initial size of receive buffer (RMB) used by SMC sockets.
> >>>>> -	The default value inherits from net.ipv4.tcp_rmem[1].
> >>>>> +	The default value aims for a small memory footprint and is set to 64KiB.
> >>>>>   	The minimum value is 16KiB and there is no hard limit for max value, but
> >>>>>   	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
> >>>>> diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
> >>>>> index 285f9bd8e232..67c3937f341d 100644
> >>>>> --- a/net/smc/smc_core.h
> >>>>> +++ b/net/smc/smc_core.h
> >>>>> @@ -206,8 +206,10 @@ struct smc_rtoken {				/* address/key of remote RMB */
> >>>>>   	u32			rkey;
> >>>>>   };
> >>>>> -#define SMC_BUF_MIN_SIZE	16384	/* minimum size of an RMB */
> >>>>> -#define SMC_RMBE_SIZES		16	/* number of distinct RMBE sizes */
> >>>>> +#define SMC_SNDBUF_INIT_SIZE 16384 /* initial size of send buffer */
> >>>>> +#define SMC_RCVBUF_INIT_SIZE 65536 /* initial size of receive buffer */
> >>>>> +#define SMC_BUF_MIN_SIZE	 16384	/* minimum size of an RMB */
> >>>>> +#define SMC_RMBE_SIZES		 16	/* number of distinct RMBE sizes */
> There is one blank added to these lines? Why? They still don't align.
> >>>>>   /* theoretically, the RFC states that largest size would be 512K,
> >>>>>    * i.e. compressed 5 and thus 6 sizes (0..5), despite
> >>>>>    * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
> >>>>> diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
> >>>>> index b6f79fabb9d3..a63aa79d4856 100644
> >>>>> --- a/net/smc/smc_sysctl.c
> >>>>> +++ b/net/smc/smc_sysctl.c
> >>>>> @@ -19,8 +19,10 @@
> >>>>>   #include "smc_llc.h"
> >>>>>   #include "smc_sysctl.h"
> >>>>> -static int min_sndbuf = SMC_BUF_MIN_SIZE;
> >>>>> -static int min_rcvbuf = SMC_BUF_MIN_SIZE;
> >>>>> +static int initial_sndbuf	= SMC_SNDBUF_INIT_SIZE;
> >>>>> +static int initial_rcvbuf	= SMC_RCVBUF_INIT_SIZE;
> >>>>> +static int min_sndbuf		= SMC_BUF_MIN_SIZE;
> >>>>> +static int min_rcvbuf		= SMC_BUF_MIN_SIZE;
> > Broken formatting
> >>>>>   static struct ctl_table smc_table[] = {
> >>>>>   	{
> >>>>> @@ -88,8 +90,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
> >>>>>   	net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
> >>>>>   	net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
> >>>>>   	net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
> >>>>> -	WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
> >>>>> -	WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
> >>>>> +	WRITE_ONCE(net->smc.sysctl_wmem, initial_sndbuf);
> >>>>> +	WRITE_ONCE(net->smc.sysctl_rmem, initial_rcvbuf);
> >>>>
> >>>> Maybe we can use SMC_{SND|RCV}BUF_INIT_SIZE macro directly, instead of
> >>>> new variables.
> >>>
> >>> The reason i created the new variables is that min_{snd|rcv}buf also have
> >>> their own variables. I know it is not needed but thought it was cleaner.
> >>> If you have a strong opinion on using the value directly i can change it.
> >>> Please let me know if you want it changed.
> >>
> >> Yep, it's okay for me to use variables or macros. Just let it be.
> > I think it's better coding style to use the macros instead of unneccessary variables.
> > At least the variables could be defined as const.
> >>
> >> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
> >>
> >> Cheers,
> >> Tony Lu
> >>
> >>>
> >>> - Jan
> >>>>
> >>>> Cheers,
> >>>> Tony Lu
> >>>>
> >>>>>   	return 0;
> >>>>> -- 
> >>>>> 2.34.1
Jan Karcher Nov. 25, 2022, 6:12 a.m. UTC | #10
On 24/11/2022 19:04, Tony Lu wrote:
> On Thu, Nov 24, 2022 at 03:06:50PM +0100, Alexandra Winter wrote:
>>
>>
>> On 24.11.22 14:00, Alexandra Winter wrote:
[ ... ]
>>>>>> On Wed, Nov 23, 2022 at 11:58:30AM +0100, Jan Karcher wrote:
>>>>>>> In the past SMC used the values of tcp_{w|r}mem to create the send
>>>>>>> buffer and RMB. We now have our own sysctl knobs to tune them without
>>>>>>> influencing the TCP default.
>>>>>>>
>>>>>>> This patch removes the dependency on the TCP control by providing our
>>>>>>> own initial values which aim for a low memory footprint
[ ... ]>>> Iiuc you are changing the default values in this a patch and 
your other patch:
>>> Default values for real_buf for send and receive:
>>>
>>> before 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
>>>      real_buf=net.ipv4.tcp_{w|r}mem[1]/2   send: 8k  recv: 64k
>> Jan, you explained to me: "the minima is 16Kib. This is enforced in smc_compress_bufsize
>> which would move any value <= 16Kib into bucket 0 - which is 16KiB "
>> so 					send  16k   recv: 64k
>>>      
>>> after 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
>>> real_buf=net.ipv4.tcp_{w|r}mem[1]   send: 16k (16*1024) recv: 128k (131072)
>>>
>>> after net/smc: Fix expected buffersizes and sync logic
>>> real_buf=net.ipv4.tcp_{w|r}mem[1]   send: 16k (16*1024) recv: 128k (131072)
>>>
>>> after net/smc: Unbind smc control from tcp control
>>> real_buf=SMC_*BUF_INIT_SIZE   send: 16k (16384) recv: 64k (65536)
>>>
>>> If my understanding is correct, then I nack this.
>>> Defaults should be restored to the values before 0227f058aa29.
>>> Otherwise users will notice a change in memory usage that needs to
>>> be avoided or announced more explicitely. (and don't change them twice)
>> See above, I stand corrected. This does restore the default receive buffer size.
>> It should got to net with a Fixes: 0227f058aa29 tag.
>> And the descrition should clarify that this restores the default recv buffer size
>> (and uncouples from tcp control)
>> Maybe the users and the distros would thank you, if you merge them into 1 patch? >
> AFAIK, updating kernel to the version of upstream will take a long time
> for distros and users. Since we have sysctls, maybe we could let
> applications and distros use sysctl to set their desired and default
> values with documentation. It's a more common way to tweak kernel. >
To whomever it may interest. The second patch in discussion is:
https://lore.kernel.org/netdev/20221123104907.14624-1-jaka@linux.ibm.com/
(Just in case someone stumbles on this thread without being included prior).

I decided to split those two separate in favor of the user. While the 
fix corrects specific behavior this patch does provides a new 
functionality and could be used without the fixed patch.

AFAIK the distros do not care how many patches they apply. We just need 
to tell them to backport one more patch. Which IMO is a good trade-off 
if it gives other members of the community more freedom.

> Cheers,
> Tony Lu
> 
>>
>>
>>>
>>>>>>>
>>>>>>> Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
>>>>>>> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
>>>>>>> ---
>>>>>>>    Documentation/networking/smc-sysctl.rst |  4 ++--
>>>>>>>    net/smc/smc_core.h                      |  6 ++++--
>>>>>>>    net/smc/smc_sysctl.c                    | 10 ++++++----
>>>>>>>    3 files changed, 12 insertions(+), 8 deletions(-)
>>>>>>>
>>>>>>> diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst
>>>>>>> index 6d8acdbe9be1..a1c634d3690a 100644
>>>>>>> --- a/Documentation/networking/smc-sysctl.rst
>>>>>>> +++ b/Documentation/networking/smc-sysctl.rst
>>>>>>> @@ -44,7 +44,7 @@ smcr_testlink_time - INTEGER
>>>>>>>    wmem - INTEGER
>>>>>>>    	Initial size of send buffer used by SMC sockets.
>>>>>>> -	The default value inherits from net.ipv4.tcp_wmem[1].
>>>>>>> +	The default value aims for a small memory footprint and is set to 16KiB.
>>>>>>>    	The minimum value is 16KiB and there is no hard limit for max value, but
>>>>>>>    	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
>>>>>>> @@ -53,7 +53,7 @@ wmem - INTEGER
>>>>>>>    rmem - INTEGER
>>>>>>>    	Initial size of receive buffer (RMB) used by SMC sockets.
>>>>>>> -	The default value inherits from net.ipv4.tcp_rmem[1].
>>>>>>> +	The default value aims for a small memory footprint and is set to 64KiB.
>>>>>>>    	The minimum value is 16KiB and there is no hard limit for max value, but
>>>>>>>    	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
>>>>>>> diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
>>>>>>> index 285f9bd8e232..67c3937f341d 100644
>>>>>>> --- a/net/smc/smc_core.h
>>>>>>> +++ b/net/smc/smc_core.h
>>>>>>> @@ -206,8 +206,10 @@ struct smc_rtoken {				/* address/key of remote RMB */
>>>>>>>    	u32			rkey;
>>>>>>>    };
>>>>>>> -#define SMC_BUF_MIN_SIZE	16384	/* minimum size of an RMB */
>>>>>>> -#define SMC_RMBE_SIZES		16	/* number of distinct RMBE sizes */
>>>>>>> +#define SMC_SNDBUF_INIT_SIZE 16384 /* initial size of send buffer */
>>>>>>> +#define SMC_RCVBUF_INIT_SIZE 65536 /* initial size of receive buffer */
>>>>>>> +#define SMC_BUF_MIN_SIZE	 16384	/* minimum size of an RMB */
>>>>>>> +#define SMC_RMBE_SIZES		 16	/* number of distinct RMBE sizes */
>> There is one blank added to these lines? Why? They still don't align.

Argh good catch. I think my new workstation had a different tab spacing...

>>>>>>>    /* theoretically, the RFC states that largest size would be 512K,
>>>>>>>     * i.e. compressed 5 and thus 6 sizes (0..5), despite
>>>>>>>     * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
>>>>>>> diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
>>>>>>> index b6f79fabb9d3..a63aa79d4856 100644
>>>>>>> --- a/net/smc/smc_sysctl.c
>>>>>>> +++ b/net/smc/smc_sysctl.c
>>>>>>> @@ -19,8 +19,10 @@
>>>>>>>    #include "smc_llc.h"
>>>>>>>    #include "smc_sysctl.h"
>>>>>>> -static int min_sndbuf = SMC_BUF_MIN_SIZE;
>>>>>>> -static int min_rcvbuf = SMC_BUF_MIN_SIZE;
>>>>>>> +static int initial_sndbuf	= SMC_SNDBUF_INIT_SIZE;
>>>>>>> +static int initial_rcvbuf	= SMC_RCVBUF_INIT_SIZE;
>>>>>>> +static int min_sndbuf		= SMC_BUF_MIN_SIZE;
>>>>>>> +static int min_rcvbuf		= SMC_BUF_MIN_SIZE;
>>> Broken formatting
>>>>>>>    static struct ctl_table smc_table[] = {
>>>>>>>    	{
>>>>>>> @@ -88,8 +90,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
>>>>>>>    	net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
>>>>>>>    	net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
>>>>>>>    	net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
>>>>>>> -	WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
>>>>>>> -	WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
>>>>>>> +	WRITE_ONCE(net->smc.sysctl_wmem, initial_sndbuf);
>>>>>>> +	WRITE_ONCE(net->smc.sysctl_rmem, initial_rcvbuf);
>>>>>>
>>>>>> Maybe we can use SMC_{SND|RCV}BUF_INIT_SIZE macro directly, instead of
>>>>>> new variables.
>>>>>
>>>>> The reason i created the new variables is that min_{snd|rcv}buf also have
>>>>> their own variables. I know it is not needed but thought it was cleaner.
>>>>> If you have a strong opinion on using the value directly i can change it.
>>>>> Please let me know if you want it changed.
>>>>
>>>> Yep, it's okay for me to use variables or macros. Just let it be.
>>> I think it's better coding style to use the macros instead of unneccessary variables.
>>> At least the variables could be defined as const.

I just thought about that. I'm going to fix the formatting any way in a 
version 2 so i try to clean up here as well.

Thank you for your comments
- Jan

>>>>
>>>> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
>>>>
>>>> Cheers,
>>>> Tony Lu
>>>>
>>>>>
>>>>> - Jan
>>>>>>
>>>>>> Cheers,
>>>>>> Tony Lu
>>>>>>
>>>>>>>    	return 0;
>>>>>>> -- 
>>>>>>> 2.34.1
Jan Karcher Nov. 25, 2022, 6:37 a.m. UTC | #11
On 24/11/2022 18:15, Tony Lu wrote:
> On Thu, Nov 24, 2022 at 02:00:35PM +0100, Alexandra Winter wrote:
>>
>>
>> On 23.11.22 12:25, Tony Lu wrote:
>>> On Wed, Nov 23, 2022 at 12:19:19PM +0100, Jan Karcher wrote:
>>>>
>>>>
>>>> On 23/11/2022 12:13, Tony Lu wrote:
>>>>> On Wed, Nov 23, 2022 at 11:58:30AM +0100, Jan Karcher wrote:
>>>>>> In the past SMC used the values of tcp_{w|r}mem to create the send
>>>>>> buffer and RMB. We now have our own sysctl knobs to tune them without
>>>>>> influencing the TCP default.
>>>>>>
>>>>>> This patch removes the dependency on the TCP control by providing our
>>>>>> own initial values which aim for a low memory footprint.
>>>>>
>>>>> +1, before introducing sysctl knobs of SMC, we were going to get rid of
>>>>> TCP and have SMC own values. Now this does it, So I very much agree with
>>>>> this.
>>>>>
>> Iiuc you are changing the default values in this a patch and your other patch:
>> Default values for real_buf for send and receive:
>>
>> before 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
>>      real_buf=net.ipv4.tcp_{w|r}mem[1]/2   send: 8k  recv: 64k
>>      
>> after 0227f058aa29 ("net/smc: Unbind r/w buffer size from clcsock and make them tunable")
>> real_buf=net.ipv4.tcp_{w|r}mem[1]   send: 16k (16*1024) recv: 128k (131072)
>>
>> after net/smc: Fix expected buffersizes and sync logic
>> real_buf=net.ipv4.tcp_{w|r}mem[1]   send: 16k (16*1024) recv: 128k (131072)
>>
>> after net/smc: Unbind smc control from tcp control
>> real_buf=SMC_*BUF_INIT_SIZE   send: 16k (16384) recv: 64k (65536)
>>
>> If my understanding is correct, then I nack this.
>> Defaults should be restored to the values before 0227f058aa29.
>> Otherwise users will notice a change in memory usage that needs to
>> be avoided or announced more explicitely. (and don't change them twice)
> 
> The logic of buffer size are changed indeed. I very much agree that do
> not break the user space. I am wondering that the values of user-defined
> configurations should be the ABI/API compatibilities.
> 
> Actually before the patch of adding sysctls of buffers, the values of
> buffer size is bind to tcp_{w|r}mem[1] tightly. The people who changed
> the value of tcp_{w|r}mem[1] may break the convention of SMC by
> accident.

That's true. I think we cannot change our buffers without risking to 
break some user configuration. Which leaves us with the question if we 
value the benefit of having SMC uncoupled higher then the breaking of 
those configurations.
For me i would answer that with a yes with the following reasoning:
We do this to be more flexible and it is a one time action. So we do not 
expect to break it again.
But we should be aware of it and communicate it clearly, which also 
includes the next point:

> 
> After getting rid of tcp_{w|r}mem[1], SMC have its own sysctl for
> buffer size. I do think this a really good chance for us to determined
> the reasonable values of buffers and document them in a place that
> people are easy to learn, the logic of {set|get}sockopt in SMC are
> different from socket manual. What do you think?

Indeed. I think this is a reasonable approach for the future. I'm 
wondering - and maybe you have some experience/opinion ther Tony - where 
we should documen such things. I mean there are RFCs relating to SMC [1] 
we have IBM documentation [2] and there is a documentation file 
regarding our control in the kernel tree [3].

Having that many different places where information is stored inevitably 
means that someone forgets something at one point which results in 
parallel evolution which i would like to prevent.
So please share your thoughts on this!

[1] https://www.rfc-editor.org/rfc/rfc7609
[2] 
https://www.ibm.com/docs/en/linux-on-systems?topic=n-smc-protocol-support
[3] 
https://kernel.googlesource.com/pub/scm/linux/kernel/git/torvalds/linux/+/refs/heads/master/Documentation/networking/smc-sysctl.rst

Thank You
- Jan
> 
> Cheers,
> Tony Lu
> 
>>
>>>>>>
>>>>>> Signed-off-by: Jan Karcher <jaka@linux.ibm.com>
>>>>>> Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
>>>>>> ---
>>>>>>    Documentation/networking/smc-sysctl.rst |  4 ++--
>>>>>>    net/smc/smc_core.h                      |  6 ++++--
>>>>>>    net/smc/smc_sysctl.c                    | 10 ++++++----
>>>>>>    3 files changed, 12 insertions(+), 8 deletions(-)
>>>>>>
>>>>>> diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst
>>>>>> index 6d8acdbe9be1..a1c634d3690a 100644
>>>>>> --- a/Documentation/networking/smc-sysctl.rst
>>>>>> +++ b/Documentation/networking/smc-sysctl.rst
>>>>>> @@ -44,7 +44,7 @@ smcr_testlink_time - INTEGER
>>>>>>    wmem - INTEGER
>>>>>>    	Initial size of send buffer used by SMC sockets.
>>>>>> -	The default value inherits from net.ipv4.tcp_wmem[1].
>>>>>> +	The default value aims for a small memory footprint and is set to 16KiB.
>>>>>>    	The minimum value is 16KiB and there is no hard limit for max value, but
>>>>>>    	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
>>>>>> @@ -53,7 +53,7 @@ wmem - INTEGER
>>>>>>    rmem - INTEGER
>>>>>>    	Initial size of receive buffer (RMB) used by SMC sockets.
>>>>>> -	The default value inherits from net.ipv4.tcp_rmem[1].
>>>>>> +	The default value aims for a small memory footprint and is set to 64KiB.
>>>>>>    	The minimum value is 16KiB and there is no hard limit for max value, but
>>>>>>    	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
>>>>>> diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
>>>>>> index 285f9bd8e232..67c3937f341d 100644
>>>>>> --- a/net/smc/smc_core.h
>>>>>> +++ b/net/smc/smc_core.h
>>>>>> @@ -206,8 +206,10 @@ struct smc_rtoken {				/* address/key of remote RMB */
>>>>>>    	u32			rkey;
>>>>>>    };
>>>>>> -#define SMC_BUF_MIN_SIZE	16384	/* minimum size of an RMB */
>>>>>> -#define SMC_RMBE_SIZES		16	/* number of distinct RMBE sizes */
>>>>>> +#define SMC_SNDBUF_INIT_SIZE 16384 /* initial size of send buffer */
>>>>>> +#define SMC_RCVBUF_INIT_SIZE 65536 /* initial size of receive buffer */
>>>>>> +#define SMC_BUF_MIN_SIZE	 16384	/* minimum size of an RMB */
>>>>>> +#define SMC_RMBE_SIZES		 16	/* number of distinct RMBE sizes */
>>>>>>    /* theoretically, the RFC states that largest size would be 512K,
>>>>>>     * i.e. compressed 5 and thus 6 sizes (0..5), despite
>>>>>>     * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
>>>>>> diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
>>>>>> index b6f79fabb9d3..a63aa79d4856 100644
>>>>>> --- a/net/smc/smc_sysctl.c
>>>>>> +++ b/net/smc/smc_sysctl.c
>>>>>> @@ -19,8 +19,10 @@
>>>>>>    #include "smc_llc.h"
>>>>>>    #include "smc_sysctl.h"
>>>>>> -static int min_sndbuf = SMC_BUF_MIN_SIZE;
>>>>>> -static int min_rcvbuf = SMC_BUF_MIN_SIZE;
>>>>>> +static int initial_sndbuf	= SMC_SNDBUF_INIT_SIZE;
>>>>>> +static int initial_rcvbuf	= SMC_RCVBUF_INIT_SIZE;
>>>>>> +static int min_sndbuf		= SMC_BUF_MIN_SIZE;
>>>>>> +static int min_rcvbuf		= SMC_BUF_MIN_SIZE;
>> Broken formatting
>>>>>>    static struct ctl_table smc_table[] = {
>>>>>>    	{
>>>>>> @@ -88,8 +90,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
>>>>>>    	net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
>>>>>>    	net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
>>>>>>    	net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
>>>>>> -	WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
>>>>>> -	WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
>>>>>> +	WRITE_ONCE(net->smc.sysctl_wmem, initial_sndbuf);
>>>>>> +	WRITE_ONCE(net->smc.sysctl_rmem, initial_rcvbuf);
>>>>>
>>>>> Maybe we can use SMC_{SND|RCV}BUF_INIT_SIZE macro directly, instead of
>>>>> new variables.
>>>>
>>>> The reason i created the new variables is that min_{snd|rcv}buf also have
>>>> their own variables. I know it is not needed but thought it was cleaner.
>>>> If you have a strong opinion on using the value directly i can change it.
>>>> Please let me know if you want it changed.
>>>
>>> Yep, it's okay for me to use variables or macros. Just let it be.
>> I think it's better coding style to use the macros instead of unneccessary variables.
>> At least the variables could be defined as const.
>>>
>>> Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
>>>
>>> Cheers,
>>> Tony Lu
>>>
>>>>
>>>> - Jan
>>>>>
>>>>> Cheers,
>>>>> Tony Lu
>>>>>
>>>>>>    	return 0;
>>>>>> -- 
>>>>>> 2.34.1
diff mbox series

Patch

diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst
index 6d8acdbe9be1..a1c634d3690a 100644
--- a/Documentation/networking/smc-sysctl.rst
+++ b/Documentation/networking/smc-sysctl.rst
@@ -44,7 +44,7 @@  smcr_testlink_time - INTEGER
 
 wmem - INTEGER
 	Initial size of send buffer used by SMC sockets.
-	The default value inherits from net.ipv4.tcp_wmem[1].
+	The default value aims for a small memory footprint and is set to 16KiB.
 
 	The minimum value is 16KiB and there is no hard limit for max value, but
 	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
@@ -53,7 +53,7 @@  wmem - INTEGER
 
 rmem - INTEGER
 	Initial size of receive buffer (RMB) used by SMC sockets.
-	The default value inherits from net.ipv4.tcp_rmem[1].
+	The default value aims for a small memory footprint and is set to 64KiB.
 
 	The minimum value is 16KiB and there is no hard limit for max value, but
 	only allowed 512KiB for SMC-R and 1MiB for SMC-D.
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 285f9bd8e232..67c3937f341d 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -206,8 +206,10 @@  struct smc_rtoken {				/* address/key of remote RMB */
 	u32			rkey;
 };
 
-#define SMC_BUF_MIN_SIZE	16384	/* minimum size of an RMB */
-#define SMC_RMBE_SIZES		16	/* number of distinct RMBE sizes */
+#define SMC_SNDBUF_INIT_SIZE 16384 /* initial size of send buffer */
+#define SMC_RCVBUF_INIT_SIZE 65536 /* initial size of receive buffer */
+#define SMC_BUF_MIN_SIZE	 16384	/* minimum size of an RMB */
+#define SMC_RMBE_SIZES		 16	/* number of distinct RMBE sizes */
 /* theoretically, the RFC states that largest size would be 512K,
  * i.e. compressed 5 and thus 6 sizes (0..5), despite
  * struct smc_clc_msg_accept_confirm.rmbe_size being a 4 bit value (0..15)
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index b6f79fabb9d3..a63aa79d4856 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -19,8 +19,10 @@ 
 #include "smc_llc.h"
 #include "smc_sysctl.h"
 
-static int min_sndbuf = SMC_BUF_MIN_SIZE;
-static int min_rcvbuf = SMC_BUF_MIN_SIZE;
+static int initial_sndbuf	= SMC_SNDBUF_INIT_SIZE;
+static int initial_rcvbuf	= SMC_RCVBUF_INIT_SIZE;
+static int min_sndbuf		= SMC_BUF_MIN_SIZE;
+static int min_rcvbuf		= SMC_BUF_MIN_SIZE;
 
 static struct ctl_table smc_table[] = {
 	{
@@ -88,8 +90,8 @@  int __net_init smc_sysctl_net_init(struct net *net)
 	net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
 	net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
 	net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
-	WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
-	WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
+	WRITE_ONCE(net->smc.sysctl_wmem, initial_sndbuf);
+	WRITE_ONCE(net->smc.sysctl_rmem, initial_rcvbuf);
 
 	return 0;