diff mbox series

[net-next,v4,3/3] net/smc: Introduce IPPROTO_SMC

Message ID 1716955147-88923-4-git-send-email-alibuda@linux.alibaba.com (mailing list archive)
State Superseded
Headers show
Series Introduce IPPROTO_SMC | expand

Commit Message

D. Wythe May 29, 2024, 3:59 a.m. UTC
From: "D. Wythe" <alibuda@linux.alibaba.com>

This patch allows to create smc socket via AF_INET,
similar to the following code,

/* create v4 smc sock */
v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);

/* create v6 smc sock */
v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);

There are several reasons why we believe it is appropriate here:

1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
address. There is no AF_SMC address at all.

2. Create smc socket in the AF_INET(6) path, which allows us to reuse
the infrastructure of AF_INET(6) path, such as common ebpf hooks.
Otherwise, smc have to implement it again in AF_SMC path.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 include/uapi/linux/in.h |   2 +
 net/smc/Makefile        |   2 +-
 net/smc/af_smc.c        |  36 ++++++++++++++++
 net/smc/inet_smc.c      | 108 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/inet_smc.h      |  34 +++++++++++++++
 5 files changed, 181 insertions(+), 1 deletion(-)
 create mode 100644 net/smc/inet_smc.c
 create mode 100644 net/smc/inet_smc.h

Comments

Dust Li May 29, 2024, 11:12 a.m. UTC | #1
On 2024-05-29 11:59:07, D. Wythe wrote:
>From: "D. Wythe" <alibuda@linux.alibaba.com>
>
>This patch allows to create smc socket via AF_INET,
>similar to the following code,
>
>/* create v4 smc sock */
>v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>
>/* create v6 smc sock */
>v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>
>There are several reasons why we believe it is appropriate here:
>
>1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>address. There is no AF_SMC address at all.
>
>2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>Otherwise, smc have to implement it again in AF_SMC path.
>
>Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>---
> include/uapi/linux/in.h |   2 +
> net/smc/Makefile        |   2 +-
> net/smc/af_smc.c        |  36 ++++++++++++++++
> net/smc/inet_smc.c      | 108 ++++++++++++++++++++++++++++++++++++++++++++++++
> net/smc/inet_smc.h      |  34 +++++++++++++++
> 5 files changed, 181 insertions(+), 1 deletion(-)
> create mode 100644 net/smc/inet_smc.c
> create mode 100644 net/smc/inet_smc.h
>
>diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>index e682ab6..0c6322b 100644
>--- a/include/uapi/linux/in.h
>+++ b/include/uapi/linux/in.h
>@@ -83,6 +83,8 @@ enum {
> #define IPPROTO_RAW		IPPROTO_RAW
>   IPPROTO_MPTCP = 262,		/* Multipath TCP connection		*/
> #define IPPROTO_MPTCP		IPPROTO_MPTCP
>+  IPPROTO_SMC = 263,		/* Shared Memory Communications		*/
>+#define IPPROTO_SMC		IPPROTO_SMC
>   IPPROTO_MAX
> };
> #endif
>diff --git a/net/smc/Makefile b/net/smc/Makefile
>index 2c510d54..472b9ee 100644
>--- a/net/smc/Makefile
>+++ b/net/smc/Makefile
>@@ -4,6 +4,6 @@ obj-$(CONFIG_SMC)	+= smc.o
> obj-$(CONFIG_SMC_DIAG)	+= smc_diag.o
> smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
> smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
>-smc-y += smc_tracepoint.o
>+smc-y += smc_tracepoint.o inet_smc.o
> smc-$(CONFIG_SYSCTL) += smc_sysctl.o
> smc-$(CONFIG_SMC_LO) += smc_loopback.o
>diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>index 8e3ce76..320624c 100644
>--- a/net/smc/af_smc.c
>+++ b/net/smc/af_smc.c
>@@ -54,6 +54,7 @@
> #include "smc_tracepoint.h"
> #include "smc_sysctl.h"
> #include "smc_loopback.h"
>+#include "inet_smc.h"
> 
> static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
> 						 * creation on server
>@@ -3594,9 +3595,31 @@ static int __init smc_init(void)
> 		goto out_lo;
> 	}
> 
>+	rc = proto_register(&smc_inet_prot, 1);
>+	if (rc) {
>+		pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
>+		goto out_ulp;
>+	}
>+	inet_register_protosw(&smc_inet_protosw);
>+#if IS_ENABLED(CONFIG_IPV6)
>+	rc = proto_register(&smc_inet6_prot, 1);
>+	if (rc) {
>+		pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
>+		goto out_inet_prot;
>+	}
>+	inet6_register_protosw(&smc_inet6_protosw);
>+#endif
>+

What do you think of moving all those inet initialization code into
something like smc_inet_init() and move it to smc_inet.c ?


> 	static_branch_enable(&tcp_have_smc);
> 	return 0;
> 
>+#if IS_ENABLED(CONFIG_IPV6)
>+out_inet_prot:
>+	inet_unregister_protosw(&smc_inet_protosw);
>+	proto_unregister(&smc_inet_prot);
>+#endif
>+out_ulp:
>+	tcp_unregister_ulp(&smc_ulp_ops);
> out_lo:
> 	smc_loopback_exit();
> out_ib:
>@@ -3633,6 +3656,10 @@ static int __init smc_init(void)
> static void __exit smc_exit(void)
> {
> 	static_branch_disable(&tcp_have_smc);
>+	inet_unregister_protosw(&smc_inet_protosw);
>+#if IS_ENABLED(CONFIG_IPV6)
>+	inet6_unregister_protosw(&smc_inet6_protosw);
>+#endif
> 	tcp_unregister_ulp(&smc_ulp_ops);
> 	sock_unregister(PF_SMC);
> 	smc_core_exit();
>@@ -3644,6 +3671,10 @@ static void __exit smc_exit(void)
> 	destroy_workqueue(smc_hs_wq);
> 	proto_unregister(&smc_proto6);
> 	proto_unregister(&smc_proto);
>+	proto_unregister(&smc_inet_prot);
>+#if IS_ENABLED(CONFIG_IPV6)
>+	proto_unregister(&smc_inet6_prot);
>+#endif
> 	smc_pnet_exit();
> 	smc_nl_exit();
> 	smc_clc_exit();
>@@ -3660,4 +3691,9 @@ static void __exit smc_exit(void)
> MODULE_LICENSE("GPL");
> MODULE_ALIAS_NETPROTO(PF_SMC);
> MODULE_ALIAS_TCP_ULP("smc");
>+/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
>+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
>+#if IS_ENABLED(CONFIG_IPV6)
>+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
>+#endif
> MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
>diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c
>new file mode 100644
>index 00000000..1ba73d7
>--- /dev/null
>+++ b/net/smc/inet_smc.c
>@@ -0,0 +1,108 @@
>+// SPDX-License-Identifier: GPL-2.0-only
>+/*
>+ *  Shared Memory Communications over RDMA (SMC-R) and RoCE
>+ *
>+ *  Definitions for the IPPROTO_SMC (socket related)
>+ *
>+ *  Copyright IBM Corp. 2016, 2018
>+ *  Copyright (c) 2024, Alibaba Inc.
>+ *
>+ *  Author: D. Wythe <alibuda@linux.alibaba.com>
>+ */
>+
>+#include "inet_smc.h"
>+#include "smc.h"
>+
>+struct proto smc_inet_prot = {
>+	.name		= "INET_SMC",
>+	.owner		= THIS_MODULE,
>+	.init		= smc_inet_init_sock,
>+	.hash		= smc_hash_sk,
>+	.unhash		= smc_unhash_sk,
>+	.release_cb	= smc_release_cb,
>+	.obj_size	= sizeof(struct smc_sock),
>+	.h.smc_hash	= &smc_v4_hashinfo,
>+	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
>+};
>+
>+const struct proto_ops smc_inet_stream_ops = {
>+	.family		= PF_INET,
>+	.owner		= THIS_MODULE,
>+	.release	= smc_release,
>+	.bind		= smc_bind,
>+	.connect	= smc_connect,
>+	.socketpair	= sock_no_socketpair,
>+	.accept		= smc_accept,
>+	.getname	= smc_getname,
>+	.poll		= smc_poll,
>+	.ioctl		= smc_ioctl,
>+	.listen		= smc_listen,
>+	.shutdown	= smc_shutdown,
>+	.setsockopt	= smc_setsockopt,
>+	.getsockopt	= smc_getsockopt,
>+	.sendmsg	= smc_sendmsg,
>+	.recvmsg	= smc_recvmsg,
>+	.mmap		= sock_no_mmap,
>+	.splice_read	= smc_splice_read,
>+};
>+
>+struct inet_protosw smc_inet_protosw = {
>+	.type		= SOCK_STREAM,
>+	.protocol	= IPPROTO_SMC,
>+	.prot		= &smc_inet_prot,
>+	.ops		= &smc_inet_stream_ops,
>+	.flags		= INET_PROTOSW_ICSK,
>+};
>+
>+#if IS_ENABLED(CONFIG_IPV6)
>+struct proto smc_inet6_prot = {
>+	.name		= "INET6_SMC",
>+	.owner		= THIS_MODULE,
>+	.init		= smc_inet_init_sock,
>+	.hash		= smc_hash_sk,
>+	.unhash		= smc_unhash_sk,
>+	.release_cb	= smc_release_cb,
>+	.obj_size	= sizeof(struct smc_sock),
>+	.h.smc_hash	= &smc_v6_hashinfo,
>+	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
>+};
>+
>+const struct proto_ops smc_inet6_stream_ops = {
>+	.family		= PF_INET6,
>+	.owner		= THIS_MODULE,
>+	.release	= smc_release,
>+	.bind		= smc_bind,
>+	.connect	= smc_connect,
>+	.socketpair	= sock_no_socketpair,
>+	.accept		= smc_accept,
>+	.getname	= smc_getname,
>+	.poll		= smc_poll,
>+	.ioctl		= smc_ioctl,
>+	.listen		= smc_listen,
>+	.shutdown	= smc_shutdown,
>+	.setsockopt	= smc_setsockopt,
>+	.getsockopt	= smc_getsockopt,
>+	.sendmsg	= smc_sendmsg,
>+	.recvmsg	= smc_recvmsg,
>+	.mmap		= sock_no_mmap,
>+	.splice_read	= smc_splice_read,
>+};
>+
>+struct inet_protosw smc_inet6_protosw = {
>+	.type		= SOCK_STREAM,
>+	.protocol	= IPPROTO_SMC,
>+	.prot		= &smc_inet6_prot,
>+	.ops		= &smc_inet6_stream_ops,
>+	.flags		= INET_PROTOSW_ICSK,
>+};
>+#endif
>+
>+int smc_inet_init_sock(struct sock *sk)
>+{
>+	struct net *net = sock_net(sk);
>+
>+	/* init common smc sock */
>+	smc_sk_init(net, sk, IPPROTO_SMC);
>+	/* create clcsock */
>+	return smc_create_clcsk(net, sk, sk->sk_family);
>+}
>diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
>new file mode 100644
>index 00000000..c55345d
>--- /dev/null
>+++ b/net/smc/inet_smc.h
>@@ -0,0 +1,34 @@
>+/* SPDX-License-Identifier: GPL-2.0 */
>+/*
>+ *  Shared Memory Communications over RDMA (SMC-R) and RoCE
>+ *
>+ *  Definitions for the IPPROTO_SMC (socket related)
>+
>+ *  Copyright IBM Corp. 2016
>+ *  Copyright (c) 2024, Alibaba Inc.
>+ *
>+ *  Author: D. Wythe <alibuda@linux.alibaba.com>
>+ */
>+#ifndef __INET_SMC
>+#define __INET_SMC
>+
>+#include <net/protocol.h>
>+#include <net/sock.h>
>+#include <net/tcp.h>

Why not put those 'include's in the .c file ?

>+
>+extern struct proto smc_inet_prot;
>+extern const struct proto_ops smc_inet_stream_ops;
>+extern struct inet_protosw smc_inet_protosw;
>+
>+#if IS_ENABLED(CONFIG_IPV6)
>+#include <net/ipv6.h>
>+/* MUST after net/tcp.h or warning */
>+#include <net/transp_v6.h>
>+extern struct proto smc_inet6_prot;
>+extern const struct proto_ops smc_inet6_stream_ops;
>+extern struct inet_protosw smc_inet6_protosw;
>+#endif
>+
>+int smc_inet_init_sock(struct sock *sk);

Seems smc_inet_init_sock() is only used in smc_inet.c,
why not defined it as a static function ?

Best regards,
Dust

>+
>+#endif /* __INET_SMC */
>-- 
>1.8.3.1
>
Wenjia Zhang May 29, 2024, 11:58 a.m. UTC | #2
On 29.05.24 05:59, D. Wythe wrote:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
> 
> This patch allows to create smc socket via AF_INET,
> similar to the following code,
> 
> /* create v4 smc sock */
> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
> 
> /* create v6 smc sock */
> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
> 
> There are several reasons why we believe it is appropriate here:
> 
> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
> address. There is no AF_SMC address at all.
> 
> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
> Otherwise, smc have to implement it again in AF_SMC path.
> 
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ---
>   include/uapi/linux/in.h |   2 +
>   net/smc/Makefile        |   2 +-
>   net/smc/af_smc.c        |  36 ++++++++++++++++
>   net/smc/inet_smc.c      | 108 ++++++++++++++++++++++++++++++++++++++++++++++++
>   net/smc/inet_smc.h      |  34 +++++++++++++++
>   5 files changed, 181 insertions(+), 1 deletion(-)
>   create mode 100644 net/smc/inet_smc.c
>   create mode 100644 net/smc/inet_smc.h
> 
> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
> index e682ab6..0c6322b 100644
> --- a/include/uapi/linux/in.h
> +++ b/include/uapi/linux/in.h
> @@ -83,6 +83,8 @@ enum {
>   #define IPPROTO_RAW		IPPROTO_RAW
>     IPPROTO_MPTCP = 262,		/* Multipath TCP connection		*/
>   #define IPPROTO_MPTCP		IPPROTO_MPTCP
> +  IPPROTO_SMC = 263,		/* Shared Memory Communications		*/
> +#define IPPROTO_SMC		IPPROTO_SMC
>     IPPROTO_MAX
>   };
>   #endif
> diff --git a/net/smc/Makefile b/net/smc/Makefile
> index 2c510d54..472b9ee 100644
> --- a/net/smc/Makefile
> +++ b/net/smc/Makefile
> @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC)	+= smc.o
>   obj-$(CONFIG_SMC_DIAG)	+= smc_diag.o
>   smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
>   smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
> -smc-y += smc_tracepoint.o
> +smc-y += smc_tracepoint.o inet_smc.o
>   smc-$(CONFIG_SYSCTL) += smc_sysctl.o
>   smc-$(CONFIG_SMC_LO) += smc_loopback.o
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index 8e3ce76..320624c 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -54,6 +54,7 @@
>   #include "smc_tracepoint.h"
>   #include "smc_sysctl.h"
>   #include "smc_loopback.h"
> +#include "inet_smc.h"
>   
>   static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
>   						 * creation on server
> @@ -3594,9 +3595,31 @@ static int __init smc_init(void)
>   		goto out_lo;
>   	}
>   
> +	rc = proto_register(&smc_inet_prot, 1);
> +	if (rc) {
> +		pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
> +		goto out_ulp;
> +	}
> +	inet_register_protosw(&smc_inet_protosw);
> +#if IS_ENABLED(CONFIG_IPV6)
> +	rc = proto_register(&smc_inet6_prot, 1);
> +	if (rc) {
> +		pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
> +		goto out_inet_prot;
> +	}
> +	inet6_register_protosw(&smc_inet6_protosw);

Comparing to inet_register_protosw(), the inet6_register_protosw() 
returns an integer. Thus, making error check and direct corresponding 
housekeeping here looks IMO much cleaner.

> +#endif
> +
>   	static_branch_enable(&tcp_have_smc);
>   	return 0;
>   
> +#if IS_ENABLED(CONFIG_IPV6)
> +out_inet_prot:
> +	inet_unregister_protosw(&smc_inet_protosw);
> +	proto_unregister(&smc_inet_prot);
> +#endif
> +out_ulp:
> +	tcp_unregister_ulp(&smc_ulp_ops);
>   out_lo:
>   	smc_loopback_exit();
>   out_ib:
> @@ -3633,6 +3656,10 @@ static int __init smc_init(void)
>   static void __exit smc_exit(void)
>   {
>   	static_branch_disable(&tcp_have_smc);
> +	inet_unregister_protosw(&smc_inet_protosw);
> +#if IS_ENABLED(CONFIG_IPV6)
> +	inet6_unregister_protosw(&smc_inet6_protosw);
> +#endif
>   	tcp_unregister_ulp(&smc_ulp_ops);
>   	sock_unregister(PF_SMC);
>   	smc_core_exit();
> @@ -3644,6 +3671,10 @@ static void __exit smc_exit(void)
>   	destroy_workqueue(smc_hs_wq);
>   	proto_unregister(&smc_proto6);
>   	proto_unregister(&smc_proto);
> +	proto_unregister(&smc_inet_prot);
> +#if IS_ENABLED(CONFIG_IPV6)
> +	proto_unregister(&smc_inet6_prot);
> +#end

Since there is already inet_smc.c, I'd recommend to group these register 
and unregister stuff respectively in functions like e.g. smc_inet_init() 
and smc_inet_exit() in inet_smc.c

>   	smc_pnet_exit();
>   	smc_nl_exit();
>   	smc_clc_exit();
> @@ -3660,4 +3691,9 @@ static void __exit smc_exit(void)
>   MODULE_LICENSE("GPL");
>   MODULE_ALIAS_NETPROTO(PF_SMC);
>   MODULE_ALIAS_TCP_ULP("smc");
> +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
> +#if IS_ENABLED(CONFIG_IPV6)
> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
> +#endif
>   MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
> diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c
> new file mode 100644
> index 00000000..1ba73d7
> --- /dev/null
> +++ b/net/smc/inet_smc.c

In order to keep the consistency with the structure and function names 
in the files, I'm wondering why not to use smc_inet.h and smc_inet.c
instead of inet_smc.h and inet_smc.c respectively

> @@ -0,0 +1,108 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + *  Shared Memory Communications over RDMA (SMC-R) and RoCE
> + *
> + *  Definitions for the IPPROTO_SMC (socket related)
> + *
> + *  Copyright IBM Corp. 2016, 2018
> + *  Copyright (c) 2024, Alibaba Inc.
> + *
> + *  Author: D. Wythe <alibuda@linux.alibaba.com>
> + */
> +
> +#include "inet_smc.h"
> +#include "smc.h"
> +
> +struct proto smc_inet_prot = {
> +	.name		= "INET_SMC",
> +	.owner		= THIS_MODULE,
> +	.init		= smc_inet_init_sock,
> +	.hash		= smc_hash_sk,
> +	.unhash		= smc_unhash_sk,
> +	.release_cb	= smc_release_cb,
> +	.obj_size	= sizeof(struct smc_sock),
> +	.h.smc_hash	= &smc_v4_hashinfo,
> +	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
> +};
> +
> +const struct proto_ops smc_inet_stream_ops = {
> +	.family		= PF_INET,
> +	.owner		= THIS_MODULE,
> +	.release	= smc_release,
> +	.bind		= smc_bind,
> +	.connect	= smc_connect,
> +	.socketpair	= sock_no_socketpair,
> +	.accept		= smc_accept,
> +	.getname	= smc_getname,
> +	.poll		= smc_poll,
> +	.ioctl		= smc_ioctl,
> +	.listen		= smc_listen,
> +	.shutdown	= smc_shutdown,
> +	.setsockopt	= smc_setsockopt,
> +	.getsockopt	= smc_getsockopt,
> +	.sendmsg	= smc_sendmsg,
> +	.recvmsg	= smc_recvmsg,
> +	.mmap		= sock_no_mmap,
> +	.splice_read	= smc_splice_read,
> +};
> +
> +struct inet_protosw smc_inet_protosw = {
> +	.type		= SOCK_STREAM,
> +	.protocol	= IPPROTO_SMC,
> +	.prot		= &smc_inet_prot,
> +	.ops		= &smc_inet_stream_ops,
> +	.flags		= INET_PROTOSW_ICSK,
> +};
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +struct proto smc_inet6_prot = {
> +	.name		= "INET6_SMC",
> +	.owner		= THIS_MODULE,
> +	.init		= smc_inet_init_sock,
> +	.hash		= smc_hash_sk,
> +	.unhash		= smc_unhash_sk,
> +	.release_cb	= smc_release_cb,
> +	.obj_size	= sizeof(struct smc_sock),
> +	.h.smc_hash	= &smc_v6_hashinfo,
> +	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
> +};
> +
> +const struct proto_ops smc_inet6_stream_ops = {
> +	.family		= PF_INET6,
> +	.owner		= THIS_MODULE,
> +	.release	= smc_release,
> +	.bind		= smc_bind,
> +	.connect	= smc_connect,
> +	.socketpair	= sock_no_socketpair,
> +	.accept		= smc_accept,
> +	.getname	= smc_getname,
> +	.poll		= smc_poll,
> +	.ioctl		= smc_ioctl,
> +	.listen		= smc_listen,
> +	.shutdown	= smc_shutdown,
> +	.setsockopt	= smc_setsockopt,
> +	.getsockopt	= smc_getsockopt,
> +	.sendmsg	= smc_sendmsg,
> +	.recvmsg	= smc_recvmsg,
> +	.mmap		= sock_no_mmap,
> +	.splice_read	= smc_splice_read,
> +};
> +
> +struct inet_protosw smc_inet6_protosw = {
> +	.type		= SOCK_STREAM,
> +	.protocol	= IPPROTO_SMC,
> +	.prot		= &smc_inet6_prot,
> +	.ops		= &smc_inet6_stream_ops,
> +	.flags		= INET_PROTOSW_ICSK,
> +};
> +#endif
> +
> +int smc_inet_init_sock(struct sock *sk)
> +{
> +	struct net *net = sock_net(sk);
> +
> +	/* init common smc sock */
> +	smc_sk_init(net, sk, IPPROTO_SMC);
> +	/* create clcsock */
> +	return smc_create_clcsk(net, sk, sk->sk_family);
> +}
> diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
> new file mode 100644
> index 00000000..c55345d
> --- /dev/null
> +++ b/net/smc/inet_smc.h
> @@ -0,0 +1,34 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + *  Shared Memory Communications over RDMA (SMC-R) and RoCE
> + *
> + *  Definitions for the IPPROTO_SMC (socket related)
> +
> + *  Copyright IBM Corp. 2016
> + *  Copyright (c) 2024, Alibaba Inc.
> + *
> + *  Author: D. Wythe <alibuda@linux.alibaba.com>
> + */
> +#ifndef __INET_SMC
> +#define __INET_SMC
> +
> +#include <net/protocol.h>
> +#include <net/sock.h>
> +#include <net/tcp.h>
> +
> +extern struct proto smc_inet_prot;
> +extern const struct proto_ops smc_inet_stream_ops;
> +extern struct inet_protosw smc_inet_protosw;
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +#include <net/ipv6.h>
> +/* MUST after net/tcp.h or warning */
> +#include <net/transp_v6.h>
> +extern struct proto smc_inet6_prot;
> +extern const struct proto_ops smc_inet6_stream_ops;
> +extern struct inet_protosw smc_inet6_protosw;
> +#endif
> +
> +int smc_inet_init_sock(struct sock *sk);
> +
> +#endif /* __INET_SMC */
Zhu Yanjun May 29, 2024, 7:55 p.m. UTC | #3
在 2024/5/29 5:59, D. Wythe 写道:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
> 
> This patch allows to create smc socket via AF_INET,
> similar to the following code,
> 
> /* create v4 smc sock */
> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
> 
> /* create v6 smc sock */
> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
> 
> There are several reasons why we believe it is appropriate here:
> 
> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
> address. There is no AF_SMC address at all.
> 
> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
> Otherwise, smc have to implement it again in AF_SMC path.
> 
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ---
>   include/uapi/linux/in.h |   2 +
>   net/smc/Makefile        |   2 +-
>   net/smc/af_smc.c        |  36 ++++++++++++++++
>   net/smc/inet_smc.c      | 108 ++++++++++++++++++++++++++++++++++++++++++++++++
>   net/smc/inet_smc.h      |  34 +++++++++++++++
>   5 files changed, 181 insertions(+), 1 deletion(-)
>   create mode 100644 net/smc/inet_smc.c
>   create mode 100644 net/smc/inet_smc.h
> 
> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
> index e682ab6..0c6322b 100644
> --- a/include/uapi/linux/in.h
> +++ b/include/uapi/linux/in.h
> @@ -83,6 +83,8 @@ enum {
>   #define IPPROTO_RAW		IPPROTO_RAW
>     IPPROTO_MPTCP = 262,		/* Multipath TCP connection		*/
>   #define IPPROTO_MPTCP		IPPROTO_MPTCP
> +  IPPROTO_SMC = 263,		/* Shared Memory Communications		*/
> +#define IPPROTO_SMC		IPPROTO_SMC
>     IPPROTO_MAX
>   };
>   #endif
> diff --git a/net/smc/Makefile b/net/smc/Makefile
> index 2c510d54..472b9ee 100644
> --- a/net/smc/Makefile
> +++ b/net/smc/Makefile
> @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC)	+= smc.o
>   obj-$(CONFIG_SMC_DIAG)	+= smc_diag.o
>   smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
>   smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
> -smc-y += smc_tracepoint.o
> +smc-y += smc_tracepoint.o inet_smc.o
>   smc-$(CONFIG_SYSCTL) += smc_sysctl.o
>   smc-$(CONFIG_SMC_LO) += smc_loopback.o
> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> index 8e3ce76..320624c 100644
> --- a/net/smc/af_smc.c
> +++ b/net/smc/af_smc.c
> @@ -54,6 +54,7 @@
>   #include "smc_tracepoint.h"
>   #include "smc_sysctl.h"
>   #include "smc_loopback.h"
> +#include "inet_smc.h"
>   
>   static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
>   						 * creation on server
> @@ -3594,9 +3595,31 @@ static int __init smc_init(void)
>   		goto out_lo;
>   	}
>   
> +	rc = proto_register(&smc_inet_prot, 1);
> +	if (rc) {
> +		pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
> +		goto out_ulp;
> +	}
> +	inet_register_protosw(&smc_inet_protosw);
> +#if IS_ENABLED(CONFIG_IPV6)
> +	rc = proto_register(&smc_inet6_prot, 1);
> +	if (rc) {
> +		pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
> +		goto out_inet_prot;
> +	}
> +	inet6_register_protosw(&smc_inet6_protosw);
> +#endif
> +
>   	static_branch_enable(&tcp_have_smc);
>   	return 0;
>   
> +#if IS_ENABLED(CONFIG_IPV6)
> +out_inet_prot:
> +	inet_unregister_protosw(&smc_inet_protosw);
> +	proto_unregister(&smc_inet_prot);
> +#endif
> +out_ulp:
> +	tcp_unregister_ulp(&smc_ulp_ops);
>   out_lo:
>   	smc_loopback_exit();
>   out_ib:
> @@ -3633,6 +3656,10 @@ static int __init smc_init(void)
>   static void __exit smc_exit(void)
>   {
>   	static_branch_disable(&tcp_have_smc);
> +	inet_unregister_protosw(&smc_inet_protosw);
> +#if IS_ENABLED(CONFIG_IPV6)
> +	inet6_unregister_protosw(&smc_inet6_protosw);
> +#endif
>   	tcp_unregister_ulp(&smc_ulp_ops);
>   	sock_unregister(PF_SMC);
>   	smc_core_exit();
> @@ -3644,6 +3671,10 @@ static void __exit smc_exit(void)
>   	destroy_workqueue(smc_hs_wq);
>   	proto_unregister(&smc_proto6);
>   	proto_unregister(&smc_proto);
> +	proto_unregister(&smc_inet_prot);
> +#if IS_ENABLED(CONFIG_IPV6)
> +	proto_unregister(&smc_inet6_prot);
> +#endif
>   	smc_pnet_exit();
>   	smc_nl_exit();
>   	smc_clc_exit();
> @@ -3660,4 +3691,9 @@ static void __exit smc_exit(void)
>   MODULE_LICENSE("GPL");
>   MODULE_ALIAS_NETPROTO(PF_SMC);
>   MODULE_ALIAS_TCP_ULP("smc");
> +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
> +#if IS_ENABLED(CONFIG_IPV6)
> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
> +#endif
>   MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
> diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c
> new file mode 100644
> index 00000000..1ba73d7
> --- /dev/null
> +++ b/net/smc/inet_smc.c
> @@ -0,0 +1,108 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + *  Shared Memory Communications over RDMA (SMC-R) and RoCE
> + *
> + *  Definitions for the IPPROTO_SMC (socket related)
> + *
> + *  Copyright IBM Corp. 2016, 2018
> + *  Copyright (c) 2024, Alibaba Inc.
> + *
> + *  Author: D. Wythe <alibuda@linux.alibaba.com>
> + */
> +
> +#include "inet_smc.h"
> +#include "smc.h"
> +
> +struct proto smc_inet_prot = {
> +	.name		= "INET_SMC",
> +	.owner		= THIS_MODULE,
> +	.init		= smc_inet_init_sock,
> +	.hash		= smc_hash_sk,
> +	.unhash		= smc_unhash_sk,
> +	.release_cb	= smc_release_cb,
> +	.obj_size	= sizeof(struct smc_sock),
> +	.h.smc_hash	= &smc_v4_hashinfo,
> +	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
> +};
> +
> +const struct proto_ops smc_inet_stream_ops = {
> +	.family		= PF_INET,
> +	.owner		= THIS_MODULE,
> +	.release	= smc_release,
> +	.bind		= smc_bind,
> +	.connect	= smc_connect,
> +	.socketpair	= sock_no_socketpair,
> +	.accept		= smc_accept,
> +	.getname	= smc_getname,
> +	.poll		= smc_poll,
> +	.ioctl		= smc_ioctl,
> +	.listen		= smc_listen,
> +	.shutdown	= smc_shutdown,
> +	.setsockopt	= smc_setsockopt,
> +	.getsockopt	= smc_getsockopt,
> +	.sendmsg	= smc_sendmsg,
> +	.recvmsg	= smc_recvmsg,
> +	.mmap		= sock_no_mmap,
> +	.splice_read	= smc_splice_read,
> +};
> +
> +struct inet_protosw smc_inet_protosw = {
> +	.type		= SOCK_STREAM,
> +	.protocol	= IPPROTO_SMC,
> +	.prot		= &smc_inet_prot,
> +	.ops		= &smc_inet_stream_ops,
> +	.flags		= INET_PROTOSW_ICSK,
> +};
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +struct proto smc_inet6_prot = {
> +	.name		= "INET6_SMC",
> +	.owner		= THIS_MODULE,
> +	.init		= smc_inet_init_sock,
> +	.hash		= smc_hash_sk,
> +	.unhash		= smc_unhash_sk,
> +	.release_cb	= smc_release_cb,
> +	.obj_size	= sizeof(struct smc_sock),
> +	.h.smc_hash	= &smc_v6_hashinfo,
> +	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
> +};
> +
> +const struct proto_ops smc_inet6_stream_ops = {
> +	.family		= PF_INET6,
> +	.owner		= THIS_MODULE,
> +	.release	= smc_release,
> +	.bind		= smc_bind,
> +	.connect	= smc_connect,
> +	.socketpair	= sock_no_socketpair,
> +	.accept		= smc_accept,
> +	.getname	= smc_getname,
> +	.poll		= smc_poll,
> +	.ioctl		= smc_ioctl,
> +	.listen		= smc_listen,
> +	.shutdown	= smc_shutdown,
> +	.setsockopt	= smc_setsockopt,
> +	.getsockopt	= smc_getsockopt,
> +	.sendmsg	= smc_sendmsg,
> +	.recvmsg	= smc_recvmsg,
> +	.mmap		= sock_no_mmap,
> +	.splice_read	= smc_splice_read,
> +};
> +
> +struct inet_protosw smc_inet6_protosw = {
> +	.type		= SOCK_STREAM,
> +	.protocol	= IPPROTO_SMC,
> +	.prot		= &smc_inet6_prot,
> +	.ops		= &smc_inet6_stream_ops,
> +	.flags		= INET_PROTOSW_ICSK,
> +};
> +#endif
> +
> +int smc_inet_init_sock(struct sock *sk)
> +{
> +	struct net *net = sock_net(sk);
> +
> +	/* init common smc sock */
> +	smc_sk_init(net, sk, IPPROTO_SMC);
> +	/* create clcsock */
> +	return smc_create_clcsk(net, sk, sk->sk_family);
> +}
> diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
> new file mode 100644
> index 00000000..c55345d
> --- /dev/null
> +++ b/net/smc/inet_smc.h
> @@ -0,0 +1,34 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + *  Shared Memory Communications over RDMA (SMC-R) and RoCE
> + *
> + *  Definitions for the IPPROTO_SMC (socket related)
> +
> + *  Copyright IBM Corp. 2016
> + *  Copyright (c) 2024, Alibaba Inc.
> + *
> + *  Author: D. Wythe <alibuda@linux.alibaba.com>
> + */
> +#ifndef __INET_SMC
> +#define __INET_SMC
> +
> +#include <net/protocol.h>
> +#include <net/sock.h>
> +#include <net/tcp.h>
> +
> +extern struct proto smc_inet_prot;
> +extern const struct proto_ops smc_inet_stream_ops;
> +extern struct inet_protosw smc_inet_protosw;
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +#include <net/ipv6.h>
> +/* MUST after net/tcp.h or warning */
> +#include <net/transp_v6.h>
> +extern struct proto smc_inet6_prot;
> +extern const struct proto_ops smc_inet6_stream_ops;
> +extern struct inet_protosw smc_inet6_protosw;
> +#endif

If we append /* CONFIG_IPV6 */ to #endif to indicate that it is the end 
of CONFIG_IPV6, it is a good habit. When browsing the source code, it is 
easy for us to know that it is the end of CONFIG_IPV6.
Just my 2 cent suggestions. It is a trivial problem. You can ignore it.
But if you fix it, it can make the source code more readable.

Zhu Yanjun

> +
> +int smc_inet_init_sock(struct sock *sk);
> +
> +#endif /* __INET_SMC */
D. Wythe May 30, 2024, 2:35 a.m. UTC | #4
On 5/30/24 3:55 AM, Zhu Yanjun wrote:
> 在 2024/5/29 5:59, D. Wythe 写道:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> This patch allows to create smc socket via AF_INET,
>> similar to the following code,
>>
>> /* create v4 smc sock */
>> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>>
>> /* create v6 smc sock */
>> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>>
>> There are several reasons why we believe it is appropriate here:
>>
>> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>> address. There is no AF_SMC address at all.
>>
>> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>> Otherwise, smc have to implement it again in AF_SMC path.
>>
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> ---
>>   include/uapi/linux/in.h |   2 +
>>   net/smc/Makefile        |   2 +-
>>   net/smc/af_smc.c        |  36 ++++++++++++++++
>>   net/smc/inet_smc.c      | 108 
>> ++++++++++++++++++++++++++++++++++++++++++++++++
>>   net/smc/inet_smc.h      |  34 +++++++++++++++
>>   5 files changed, 181 insertions(+), 1 deletion(-)
>>   create mode 100644 net/smc/inet_smc.c
>>   create mode 100644 net/smc/inet_smc.h
>>
>> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>> index e682ab6..0c6322b 100644
>> --- a/include/uapi/linux/in.h
>> +++ b/include/uapi/linux/in.h
>> @@ -83,6 +83,8 @@ enum {
>>   #define IPPROTO_RAW        IPPROTO_RAW
>>     IPPROTO_MPTCP = 262,        /* Multipath TCP connection        */
>>   #define IPPROTO_MPTCP        IPPROTO_MPTCP
>> +  IPPROTO_SMC = 263,        /* Shared Memory Communications        */
>> +#define IPPROTO_SMC        IPPROTO_SMC
>>     IPPROTO_MAX
>>   };
>>   #endif
>> diff --git a/net/smc/Makefile b/net/smc/Makefile
>> index 2c510d54..472b9ee 100644
>> --- a/net/smc/Makefile
>> +++ b/net/smc/Makefile
>> @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC)    += smc.o
>>   obj-$(CONFIG_SMC_DIAG)    += smc_diag.o
>>   smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o 
>> smc_llc.o
>>   smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o 
>> smc_netlink.o smc_stats.o
>> -smc-y += smc_tracepoint.o
>> +smc-y += smc_tracepoint.o inet_smc.o
>>   smc-$(CONFIG_SYSCTL) += smc_sysctl.o
>>   smc-$(CONFIG_SMC_LO) += smc_loopback.o
>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> index 8e3ce76..320624c 100644
>> --- a/net/smc/af_smc.c
>> +++ b/net/smc/af_smc.c
>> @@ -54,6 +54,7 @@
>>   #include "smc_tracepoint.h"
>>   #include "smc_sysctl.h"
>>   #include "smc_loopback.h"
>> +#include "inet_smc.h"
>>     static DEFINE_MUTEX(smc_server_lgr_pending);    /* serialize link 
>> group
>>                            * creation on server
>> @@ -3594,9 +3595,31 @@ static int __init smc_init(void)
>>           goto out_lo;
>>       }
>>   +    rc = proto_register(&smc_inet_prot, 1);
>> +    if (rc) {
>> +        pr_err("%s: proto_register smc_inet_prot fails with %d\n", 
>> __func__, rc);
>> +        goto out_ulp;
>> +    }
>> +    inet_register_protosw(&smc_inet_protosw);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +    rc = proto_register(&smc_inet6_prot, 1);
>> +    if (rc) {
>> +        pr_err("%s: proto_register smc_inet6_prot fails with %d\n", 
>> __func__, rc);
>> +        goto out_inet_prot;
>> +    }
>> +    inet6_register_protosw(&smc_inet6_protosw);
>> +#endif
>> +
>>       static_branch_enable(&tcp_have_smc);
>>       return 0;
>>   +#if IS_ENABLED(CONFIG_IPV6)
>> +out_inet_prot:
>> +    inet_unregister_protosw(&smc_inet_protosw);
>> +    proto_unregister(&smc_inet_prot);
>> +#endif
>> +out_ulp:
>> +    tcp_unregister_ulp(&smc_ulp_ops);
>>   out_lo:
>>       smc_loopback_exit();
>>   out_ib:
>> @@ -3633,6 +3656,10 @@ static int __init smc_init(void)
>>   static void __exit smc_exit(void)
>>   {
>>       static_branch_disable(&tcp_have_smc);
>> +    inet_unregister_protosw(&smc_inet_protosw);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +    inet6_unregister_protosw(&smc_inet6_protosw);
>> +#endif
>>       tcp_unregister_ulp(&smc_ulp_ops);
>>       sock_unregister(PF_SMC);
>>       smc_core_exit();
>> @@ -3644,6 +3671,10 @@ static void __exit smc_exit(void)
>>       destroy_workqueue(smc_hs_wq);
>>       proto_unregister(&smc_proto6);
>>       proto_unregister(&smc_proto);
>> +    proto_unregister(&smc_inet_prot);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +    proto_unregister(&smc_inet6_prot);
>> +#endif
>>       smc_pnet_exit();
>>       smc_nl_exit();
>>       smc_clc_exit();
>> @@ -3660,4 +3691,9 @@ static void __exit smc_exit(void)
>>   MODULE_LICENSE("GPL");
>>   MODULE_ALIAS_NETPROTO(PF_SMC);
>>   MODULE_ALIAS_TCP_ULP("smc");
>> +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
>> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
>> +#endif
>>   MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
>> diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c
>> new file mode 100644
>> index 00000000..1ba73d7
>> --- /dev/null
>> +++ b/net/smc/inet_smc.c
>> @@ -0,0 +1,108 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + *  Shared Memory Communications over RDMA (SMC-R) and RoCE
>> + *
>> + *  Definitions for the IPPROTO_SMC (socket related)
>> + *
>> + *  Copyright IBM Corp. 2016, 2018
>> + *  Copyright (c) 2024, Alibaba Inc.
>> + *
>> + *  Author: D. Wythe <alibuda@linux.alibaba.com>
>> + */
>> +
>> +#include "inet_smc.h"
>> +#include "smc.h"
>> +
>> +struct proto smc_inet_prot = {
>> +    .name        = "INET_SMC",
>> +    .owner        = THIS_MODULE,
>> +    .init        = smc_inet_init_sock,
>> +    .hash        = smc_hash_sk,
>> +    .unhash        = smc_unhash_sk,
>> +    .release_cb    = smc_release_cb,
>> +    .obj_size    = sizeof(struct smc_sock),
>> +    .h.smc_hash    = &smc_v4_hashinfo,
>> +    .slab_flags    = SLAB_TYPESAFE_BY_RCU,
>> +};
>> +
>> +const struct proto_ops smc_inet_stream_ops = {
>> +    .family        = PF_INET,
>> +    .owner        = THIS_MODULE,
>> +    .release    = smc_release,
>> +    .bind        = smc_bind,
>> +    .connect    = smc_connect,
>> +    .socketpair    = sock_no_socketpair,
>> +    .accept        = smc_accept,
>> +    .getname    = smc_getname,
>> +    .poll        = smc_poll,
>> +    .ioctl        = smc_ioctl,
>> +    .listen        = smc_listen,
>> +    .shutdown    = smc_shutdown,
>> +    .setsockopt    = smc_setsockopt,
>> +    .getsockopt    = smc_getsockopt,
>> +    .sendmsg    = smc_sendmsg,
>> +    .recvmsg    = smc_recvmsg,
>> +    .mmap        = sock_no_mmap,
>> +    .splice_read    = smc_splice_read,
>> +};
>> +
>> +struct inet_protosw smc_inet_protosw = {
>> +    .type        = SOCK_STREAM,
>> +    .protocol    = IPPROTO_SMC,
>> +    .prot        = &smc_inet_prot,
>> +    .ops        = &smc_inet_stream_ops,
>> +    .flags        = INET_PROTOSW_ICSK,
>> +};
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +struct proto smc_inet6_prot = {
>> +    .name        = "INET6_SMC",
>> +    .owner        = THIS_MODULE,
>> +    .init        = smc_inet_init_sock,
>> +    .hash        = smc_hash_sk,
>> +    .unhash        = smc_unhash_sk,
>> +    .release_cb    = smc_release_cb,
>> +    .obj_size    = sizeof(struct smc_sock),
>> +    .h.smc_hash    = &smc_v6_hashinfo,
>> +    .slab_flags    = SLAB_TYPESAFE_BY_RCU,
>> +};
>> +
>> +const struct proto_ops smc_inet6_stream_ops = {
>> +    .family        = PF_INET6,
>> +    .owner        = THIS_MODULE,
>> +    .release    = smc_release,
>> +    .bind        = smc_bind,
>> +    .connect    = smc_connect,
>> +    .socketpair    = sock_no_socketpair,
>> +    .accept        = smc_accept,
>> +    .getname    = smc_getname,
>> +    .poll        = smc_poll,
>> +    .ioctl        = smc_ioctl,
>> +    .listen        = smc_listen,
>> +    .shutdown    = smc_shutdown,
>> +    .setsockopt    = smc_setsockopt,
>> +    .getsockopt    = smc_getsockopt,
>> +    .sendmsg    = smc_sendmsg,
>> +    .recvmsg    = smc_recvmsg,
>> +    .mmap        = sock_no_mmap,
>> +    .splice_read    = smc_splice_read,
>> +};
>> +
>> +struct inet_protosw smc_inet6_protosw = {
>> +    .type        = SOCK_STREAM,
>> +    .protocol    = IPPROTO_SMC,
>> +    .prot        = &smc_inet6_prot,
>> +    .ops        = &smc_inet6_stream_ops,
>> +    .flags        = INET_PROTOSW_ICSK,
>> +};
>> +#endif
>> +
>> +int smc_inet_init_sock(struct sock *sk)
>> +{
>> +    struct net *net = sock_net(sk);
>> +
>> +    /* init common smc sock */
>> +    smc_sk_init(net, sk, IPPROTO_SMC);
>> +    /* create clcsock */
>> +    return smc_create_clcsk(net, sk, sk->sk_family);
>> +}
>> diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
>> new file mode 100644
>> index 00000000..c55345d
>> --- /dev/null
>> +++ b/net/smc/inet_smc.h
>> @@ -0,0 +1,34 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + *  Shared Memory Communications over RDMA (SMC-R) and RoCE
>> + *
>> + *  Definitions for the IPPROTO_SMC (socket related)
>> +
>> + *  Copyright IBM Corp. 2016
>> + *  Copyright (c) 2024, Alibaba Inc.
>> + *
>> + *  Author: D. Wythe <alibuda@linux.alibaba.com>
>> + */
>> +#ifndef __INET_SMC
>> +#define __INET_SMC
>> +
>> +#include <net/protocol.h>
>> +#include <net/sock.h>
>> +#include <net/tcp.h>
>> +
>> +extern struct proto smc_inet_prot;
>> +extern const struct proto_ops smc_inet_stream_ops;
>> +extern struct inet_protosw smc_inet_protosw;
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +#include <net/ipv6.h>
>> +/* MUST after net/tcp.h or warning */
>> +#include <net/transp_v6.h>
>> +extern struct proto smc_inet6_prot;
>> +extern const struct proto_ops smc_inet6_stream_ops;
>> +extern struct inet_protosw smc_inet6_protosw;
>> +#endif
>
> If we append /* CONFIG_IPV6 */ to #endif to indicate that it is the 
> end of CONFIG_IPV6, it is a good habit. When browsing the source code, 
> it is easy for us to know that it is the end of CONFIG_IPV6.
> Just my 2 cent suggestions. It is a trivial problem. You can ignore it.
> But if you fix it, it can make the source code more readable.
>
> Zhu Yanjun

I really like the style you said, I will use it in the next version.

Best wishes,
D. Wythe

>
>> +
>> +int smc_inet_init_sock(struct sock *sk);
>> +
>> +#endif /* __INET_SMC */
D. Wythe May 30, 2024, 2:51 a.m. UTC | #5
On 5/29/24 7:58 PM, Wenjia Zhang wrote:
>
>
> On 29.05.24 05:59, D. Wythe wrote:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> This patch allows to create smc socket via AF_INET,
>> similar to the following code,
>>
>> /* create v4 smc sock */
>> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>>
>> /* create v6 smc sock */
>> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>>
>> There are several reasons why we believe it is appropriate here:
>>
>> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>> address. There is no AF_SMC address at all.
>>
>> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>> Otherwise, smc have to implement it again in AF_SMC path.
>>
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> ---
>>   include/uapi/linux/in.h |   2 +
>>   net/smc/Makefile        |   2 +-
>>   net/smc/af_smc.c        |  36 ++++++++++++++++
>>   net/smc/inet_smc.c      | 108 
>> ++++++++++++++++++++++++++++++++++++++++++++++++
>>   net/smc/inet_smc.h      |  34 +++++++++++++++
>>   5 files changed, 181 insertions(+), 1 deletion(-)
>>   create mode 100644 net/smc/inet_smc.c
>>   create mode 100644 net/smc/inet_smc.h
>>
>> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>> index e682ab6..0c6322b 100644
>> --- a/include/uapi/linux/in.h
>> +++ b/include/uapi/linux/in.h
>> @@ -83,6 +83,8 @@ enum {
>>   #define IPPROTO_RAW        IPPROTO_RAW
>>     IPPROTO_MPTCP = 262,        /* Multipath TCP connection        */
>>   #define IPPROTO_MPTCP        IPPROTO_MPTCP
>> +  IPPROTO_SMC = 263,        /* Shared Memory Communications        */
>> +#define IPPROTO_SMC        IPPROTO_SMC
>>     IPPROTO_MAX
>>   };
>>   #endif
>> diff --git a/net/smc/Makefile b/net/smc/Makefile
>> index 2c510d54..472b9ee 100644
>> --- a/net/smc/Makefile
>> +++ b/net/smc/Makefile
>> @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC)    += smc.o
>>   obj-$(CONFIG_SMC_DIAG)    += smc_diag.o
>>   smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o 
>> smc_llc.o
>>   smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o 
>> smc_netlink.o smc_stats.o
>> -smc-y += smc_tracepoint.o
>> +smc-y += smc_tracepoint.o inet_smc.o
>>   smc-$(CONFIG_SYSCTL) += smc_sysctl.o
>>   smc-$(CONFIG_SMC_LO) += smc_loopback.o
>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> index 8e3ce76..320624c 100644
>> --- a/net/smc/af_smc.c
>> +++ b/net/smc/af_smc.c
>> @@ -54,6 +54,7 @@
>>   #include "smc_tracepoint.h"
>>   #include "smc_sysctl.h"
>>   #include "smc_loopback.h"
>> +#include "inet_smc.h"
>>     static DEFINE_MUTEX(smc_server_lgr_pending);    /* serialize link 
>> group
>>                            * creation on server
>> @@ -3594,9 +3595,31 @@ static int __init smc_init(void)
>>           goto out_lo;
>>       }
>>   +    rc = proto_register(&smc_inet_prot, 1);
>> +    if (rc) {
>> +        pr_err("%s: proto_register smc_inet_prot fails with %d\n", 
>> __func__, rc);
>> +        goto out_ulp;
>> +    }
>> +    inet_register_protosw(&smc_inet_protosw);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +    rc = proto_register(&smc_inet6_prot, 1);
>> +    if (rc) {
>> +        pr_err("%s: proto_register smc_inet6_prot fails with %d\n", 
>> __func__, rc);
>> +        goto out_inet_prot;
>> +    }
>> +    inet6_register_protosw(&smc_inet6_protosw);
>
> Comparing to inet_register_protosw(), the inet6_register_protosw() 
> returns an integer. Thus, making error check and direct corresponding 
> housekeeping here looks IMO much cleaner.
>
Oops... I was under the impression that it had no return. In the prior 
RFC, I even commented that it had no return. Quite the oversight on my part.

>> +#endif
>> +
>>       static_branch_enable(&tcp_have_smc);
>>       return 0;
>>   +#if IS_ENABLED(CONFIG_IPV6)
>> +out_inet_prot:
>> +    inet_unregister_protosw(&smc_inet_protosw);
>> +    proto_unregister(&smc_inet_prot);
>> +#endif
>> +out_ulp:
>> +    tcp_unregister_ulp(&smc_ulp_ops);
>>   out_lo:
>>       smc_loopback_exit();
>>   out_ib:
>> @@ -3633,6 +3656,10 @@ static int __init smc_init(void)
>>   static void __exit smc_exit(void)
>>   {
>>       static_branch_disable(&tcp_have_smc);
>> +    inet_unregister_protosw(&smc_inet_protosw);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +    inet6_unregister_protosw(&smc_inet6_protosw);
>> +#endif
>>       tcp_unregister_ulp(&smc_ulp_ops);
>>       sock_unregister(PF_SMC);
>>       smc_core_exit();
>> @@ -3644,6 +3671,10 @@ static void __exit smc_exit(void)
>>       destroy_workqueue(smc_hs_wq);
>>       proto_unregister(&smc_proto6);
>>       proto_unregister(&smc_proto);
>> +    proto_unregister(&smc_inet_prot);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +    proto_unregister(&smc_inet6_prot);
>> +#end
>
> Since there is already inet_smc.c, I'd recommend to group these 
> register and unregister stuff respectively in functions like e.g. 
> smc_inet_init() and smc_inet_exit() in inet_smc.c
>

Agreed, I also see similar opinions from the community, and I will 
improve it in the next version.


>>       smc_pnet_exit();
>>       smc_nl_exit();
>>       smc_clc_exit();
>> @@ -3660,4 +3691,9 @@ static void __exit smc_exit(void)
>>   MODULE_LICENSE("GPL");
>>   MODULE_ALIAS_NETPROTO(PF_SMC);
>>   MODULE_ALIAS_TCP_ULP("smc");
>> +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
>> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
>> +#endif
>>   MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
>> diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c
>> new file mode 100644
>> index 00000000..1ba73d7
>> --- /dev/null
>> +++ b/net/smc/inet_smc.c
>
> In order to keep the consistency with the structure and function names 
> in the files, I'm wondering why not to use smc_inet.h and smc_inet.c
> instead of inet_smc.h and inet_smc.c respectively

That's because I am trying to keep the name style to be consistent with 
af_smc.c. But I don't insist on this, using smc_inet
is also good for me.

Thanks,
D. Wythe

>
>> @@ -0,0 +1,108 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + *  Shared Memory Communications over RDMA (SMC-R) and RoCE
>> + *
>> + *  Definitions for the IPPROTO_SMC (socket related)
>> + *
>> + *  Copyright IBM Corp. 2016, 2018
>> + *  Copyright (c) 2024, Alibaba Inc.
>> + *
>> + *  Author: D. Wythe <alibuda@linux.alibaba.com>
>> + */
>> +
>> +#include "inet_smc.h"
>> +#include "smc.h"
>> +
>> +struct proto smc_inet_prot = {
>> +    .name        = "INET_SMC",
>> +    .owner        = THIS_MODULE,
>> +    .init        = smc_inet_init_sock,
>> +    .hash        = smc_hash_sk,
>> +    .unhash        = smc_unhash_sk,
>> +    .release_cb    = smc_release_cb,
>> +    .obj_size    = sizeof(struct smc_sock),
>> +    .h.smc_hash    = &smc_v4_hashinfo,
>> +    .slab_flags    = SLAB_TYPESAFE_BY_RCU,
>> +};
>> +
>> +const struct proto_ops smc_inet_stream_ops = {
>> +    .family        = PF_INET,
>> +    .owner        = THIS_MODULE,
>> +    .release    = smc_release,
>> +    .bind        = smc_bind,
>> +    .connect    = smc_connect,
>> +    .socketpair    = sock_no_socketpair,
>> +    .accept        = smc_accept,
>> +    .getname    = smc_getname,
>> +    .poll        = smc_poll,
>> +    .ioctl        = smc_ioctl,
>> +    .listen        = smc_listen,
>> +    .shutdown    = smc_shutdown,
>> +    .setsockopt    = smc_setsockopt,
>> +    .getsockopt    = smc_getsockopt,
>> +    .sendmsg    = smc_sendmsg,
>> +    .recvmsg    = smc_recvmsg,
>> +    .mmap        = sock_no_mmap,
>> +    .splice_read    = smc_splice_read,
>> +};
>> +
>> +struct inet_protosw smc_inet_protosw = {
>> +    .type        = SOCK_STREAM,
>> +    .protocol    = IPPROTO_SMC,
>> +    .prot        = &smc_inet_prot,
>> +    .ops        = &smc_inet_stream_ops,
>> +    .flags        = INET_PROTOSW_ICSK,
>> +};
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +struct proto smc_inet6_prot = {
>> +    .name        = "INET6_SMC",
>> +    .owner        = THIS_MODULE,
>> +    .init        = smc_inet_init_sock,
>> +    .hash        = smc_hash_sk,
>> +    .unhash        = smc_unhash_sk,
>> +    .release_cb    = smc_release_cb,
>> +    .obj_size    = sizeof(struct smc_sock),
>> +    .h.smc_hash    = &smc_v6_hashinfo,
>> +    .slab_flags    = SLAB_TYPESAFE_BY_RCU,
>> +};
>> +
>> +const struct proto_ops smc_inet6_stream_ops = {
>> +    .family        = PF_INET6,
>> +    .owner        = THIS_MODULE,
>> +    .release    = smc_release,
>> +    .bind        = smc_bind,
>> +    .connect    = smc_connect,
>> +    .socketpair    = sock_no_socketpair,
>> +    .accept        = smc_accept,
>> +    .getname    = smc_getname,
>> +    .poll        = smc_poll,
>> +    .ioctl        = smc_ioctl,
>> +    .listen        = smc_listen,
>> +    .shutdown    = smc_shutdown,
>> +    .setsockopt    = smc_setsockopt,
>> +    .getsockopt    = smc_getsockopt,
>> +    .sendmsg    = smc_sendmsg,
>> +    .recvmsg    = smc_recvmsg,
>> +    .mmap        = sock_no_mmap,
>> +    .splice_read    = smc_splice_read,
>> +};
>> +
>> +struct inet_protosw smc_inet6_protosw = {
>> +    .type        = SOCK_STREAM,
>> +    .protocol    = IPPROTO_SMC,
>> +    .prot        = &smc_inet6_prot,
>> +    .ops        = &smc_inet6_stream_ops,
>> +    .flags        = INET_PROTOSW_ICSK,
>> +};
>> +#endif
>> +
>> +int smc_inet_init_sock(struct sock *sk)
>> +{
>> +    struct net *net = sock_net(sk);
>> +
>> +    /* init common smc sock */
>> +    smc_sk_init(net, sk, IPPROTO_SMC);
>> +    /* create clcsock */
>> +    return smc_create_clcsk(net, sk, sk->sk_family);
>> +}
>> diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
>> new file mode 100644
>> index 00000000..c55345d
>> --- /dev/null
>> +++ b/net/smc/inet_smc.h
>> @@ -0,0 +1,34 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + *  Shared Memory Communications over RDMA (SMC-R) and RoCE
>> + *
>> + *  Definitions for the IPPROTO_SMC (socket related)
>> +
>> + *  Copyright IBM Corp. 2016
>> + *  Copyright (c) 2024, Alibaba Inc.
>> + *
>> + *  Author: D. Wythe <alibuda@linux.alibaba.com>
>> + */
>> +#ifndef __INET_SMC
>> +#define __INET_SMC
>> +
>> +#include <net/protocol.h>
>> +#include <net/sock.h>
>> +#include <net/tcp.h>
>> +
>> +extern struct proto smc_inet_prot;
>> +extern const struct proto_ops smc_inet_stream_ops;
>> +extern struct inet_protosw smc_inet_protosw;
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +#include <net/ipv6.h>
>> +/* MUST after net/tcp.h or warning */
>> +#include <net/transp_v6.h>
>> +extern struct proto smc_inet6_prot;
>> +extern const struct proto_ops smc_inet6_stream_ops;
>> +extern struct inet_protosw smc_inet6_protosw;
>> +#endif
>> +
>> +int smc_inet_init_sock(struct sock *sk);
>> +
>> +#endif /* __INET_SMC */
D. Wythe May 30, 2024, 3:11 a.m. UTC | #6
On 5/29/24 7:12 PM, Dust Li wrote:
> On 2024-05-29 11:59:07, D. Wythe wrote:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> This patch allows to create smc socket via AF_INET,
>> similar to the following code,
>>
>> /* create v4 smc sock */
>> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>>
>> /* create v6 smc sock */
>> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>>
>> There are several reasons why we believe it is appropriate here:
>>
>> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>> address. There is no AF_SMC address at all.
>>
>> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>> Otherwise, smc have to implement it again in AF_SMC path.
>>
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> ---
>> include/uapi/linux/in.h |   2 +
>> net/smc/Makefile        |   2 +-
>> net/smc/af_smc.c        |  36 ++++++++++++++++
>> net/smc/inet_smc.c      | 108 ++++++++++++++++++++++++++++++++++++++++++++++++
>> net/smc/inet_smc.h      |  34 +++++++++++++++
>> 5 files changed, 181 insertions(+), 1 deletion(-)
>> create mode 100644 net/smc/inet_smc.c
>> create mode 100644 net/smc/inet_smc.h
>>
>> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>> index e682ab6..0c6322b 100644
>> --- a/include/uapi/linux/in.h
>> +++ b/include/uapi/linux/in.h
>> @@ -83,6 +83,8 @@ enum {
>> #define IPPROTO_RAW		IPPROTO_RAW
>>    IPPROTO_MPTCP = 262,		/* Multipath TCP connection		*/
>> #define IPPROTO_MPTCP		IPPROTO_MPTCP
>> +  IPPROTO_SMC = 263,		/* Shared Memory Communications		*/
>> +#define IPPROTO_SMC		IPPROTO_SMC
>>    IPPROTO_MAX
>> };
>> #endif
>> diff --git a/net/smc/Makefile b/net/smc/Makefile
>> index 2c510d54..472b9ee 100644
>> --- a/net/smc/Makefile
>> +++ b/net/smc/Makefile
>> @@ -4,6 +4,6 @@ obj-$(CONFIG_SMC)	+= smc.o
>> obj-$(CONFIG_SMC_DIAG)	+= smc_diag.o
>> smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
>> smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
>> -smc-y += smc_tracepoint.o
>> +smc-y += smc_tracepoint.o inet_smc.o
>> smc-$(CONFIG_SYSCTL) += smc_sysctl.o
>> smc-$(CONFIG_SMC_LO) += smc_loopback.o
>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
>> index 8e3ce76..320624c 100644
>> --- a/net/smc/af_smc.c
>> +++ b/net/smc/af_smc.c
>> @@ -54,6 +54,7 @@
>> #include "smc_tracepoint.h"
>> #include "smc_sysctl.h"
>> #include "smc_loopback.h"
>> +#include "inet_smc.h"
>>
>> static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
>> 						 * creation on server
>> @@ -3594,9 +3595,31 @@ static int __init smc_init(void)
>> 		goto out_lo;
>> 	}
>>
>> +	rc = proto_register(&smc_inet_prot, 1);
>> +	if (rc) {
>> +		pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
>> +		goto out_ulp;
>> +	}
>> +	inet_register_protosw(&smc_inet_protosw);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +	rc = proto_register(&smc_inet6_prot, 1);
>> +	if (rc) {
>> +		pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
>> +		goto out_inet_prot;
>> +	}
>> +	inet6_register_protosw(&smc_inet6_protosw);
>> +#endif
>> +
> What do you think of moving all those inet initialization code into
> something like smc_inet_init() and move it to smc_inet.c ?
>
Agreed.

>> 	static_branch_enable(&tcp_have_smc);
>> 	return 0;
>>
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +out_inet_prot:
>> +	inet_unregister_protosw(&smc_inet_protosw);
>> +	proto_unregister(&smc_inet_prot);
>> +#endif
>> +out_ulp:
>> +	tcp_unregister_ulp(&smc_ulp_ops);
>> out_lo:
>> 	smc_loopback_exit();
>> out_ib:
>> @@ -3633,6 +3656,10 @@ static int __init smc_init(void)
>> static void __exit smc_exit(void)
>> {
>> 	static_branch_disable(&tcp_have_smc);
>> +	inet_unregister_protosw(&smc_inet_protosw);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +	inet6_unregister_protosw(&smc_inet6_protosw);
>> +#endif
>> 	tcp_unregister_ulp(&smc_ulp_ops);
>> 	sock_unregister(PF_SMC);
>> 	smc_core_exit();
>> @@ -3644,6 +3671,10 @@ static void __exit smc_exit(void)
>> 	destroy_workqueue(smc_hs_wq);
>> 	proto_unregister(&smc_proto6);
>> 	proto_unregister(&smc_proto);
>> +	proto_unregister(&smc_inet_prot);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +	proto_unregister(&smc_inet6_prot);
>> +#endif
>> 	smc_pnet_exit();
>> 	smc_nl_exit();
>> 	smc_clc_exit();
>> @@ -3660,4 +3691,9 @@ static void __exit smc_exit(void)
>> MODULE_LICENSE("GPL");
>> MODULE_ALIAS_NETPROTO(PF_SMC);
>> MODULE_ALIAS_TCP_ULP("smc");
>> +/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
>> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
>> +#endif
>> MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
>> diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c
>> new file mode 100644
>> index 00000000..1ba73d7
>> --- /dev/null
>> +++ b/net/smc/inet_smc.c
>> @@ -0,0 +1,108 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/*
>> + *  Shared Memory Communications over RDMA (SMC-R) and RoCE
>> + *
>> + *  Definitions for the IPPROTO_SMC (socket related)
>> + *
>> + *  Copyright IBM Corp. 2016, 2018
>> + *  Copyright (c) 2024, Alibaba Inc.
>> + *
>> + *  Author: D. Wythe <alibuda@linux.alibaba.com>
>> + */
>> +
>> +#include "inet_smc.h"
>> +#include "smc.h"
>> +
>> +struct proto smc_inet_prot = {
>> +	.name		= "INET_SMC",
>> +	.owner		= THIS_MODULE,
>> +	.init		= smc_inet_init_sock,
>> +	.hash		= smc_hash_sk,
>> +	.unhash		= smc_unhash_sk,
>> +	.release_cb	= smc_release_cb,
>> +	.obj_size	= sizeof(struct smc_sock),
>> +	.h.smc_hash	= &smc_v4_hashinfo,
>> +	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
>> +};
>> +
>> +const struct proto_ops smc_inet_stream_ops = {
>> +	.family		= PF_INET,
>> +	.owner		= THIS_MODULE,
>> +	.release	= smc_release,
>> +	.bind		= smc_bind,
>> +	.connect	= smc_connect,
>> +	.socketpair	= sock_no_socketpair,
>> +	.accept		= smc_accept,
>> +	.getname	= smc_getname,
>> +	.poll		= smc_poll,
>> +	.ioctl		= smc_ioctl,
>> +	.listen		= smc_listen,
>> +	.shutdown	= smc_shutdown,
>> +	.setsockopt	= smc_setsockopt,
>> +	.getsockopt	= smc_getsockopt,
>> +	.sendmsg	= smc_sendmsg,
>> +	.recvmsg	= smc_recvmsg,
>> +	.mmap		= sock_no_mmap,
>> +	.splice_read	= smc_splice_read,
>> +};
>> +
>> +struct inet_protosw smc_inet_protosw = {
>> +	.type		= SOCK_STREAM,
>> +	.protocol	= IPPROTO_SMC,
>> +	.prot		= &smc_inet_prot,
>> +	.ops		= &smc_inet_stream_ops,
>> +	.flags		= INET_PROTOSW_ICSK,
>> +};
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +struct proto smc_inet6_prot = {
>> +	.name		= "INET6_SMC",
>> +	.owner		= THIS_MODULE,
>> +	.init		= smc_inet_init_sock,
>> +	.hash		= smc_hash_sk,
>> +	.unhash		= smc_unhash_sk,
>> +	.release_cb	= smc_release_cb,
>> +	.obj_size	= sizeof(struct smc_sock),
>> +	.h.smc_hash	= &smc_v6_hashinfo,
>> +	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
>> +};
>> +
>> +const struct proto_ops smc_inet6_stream_ops = {
>> +	.family		= PF_INET6,
>> +	.owner		= THIS_MODULE,
>> +	.release	= smc_release,
>> +	.bind		= smc_bind,
>> +	.connect	= smc_connect,
>> +	.socketpair	= sock_no_socketpair,
>> +	.accept		= smc_accept,
>> +	.getname	= smc_getname,
>> +	.poll		= smc_poll,
>> +	.ioctl		= smc_ioctl,
>> +	.listen		= smc_listen,
>> +	.shutdown	= smc_shutdown,
>> +	.setsockopt	= smc_setsockopt,
>> +	.getsockopt	= smc_getsockopt,
>> +	.sendmsg	= smc_sendmsg,
>> +	.recvmsg	= smc_recvmsg,
>> +	.mmap		= sock_no_mmap,
>> +	.splice_read	= smc_splice_read,
>> +};
>> +
>> +struct inet_protosw smc_inet6_protosw = {
>> +	.type		= SOCK_STREAM,
>> +	.protocol	= IPPROTO_SMC,
>> +	.prot		= &smc_inet6_prot,
>> +	.ops		= &smc_inet6_stream_ops,
>> +	.flags		= INET_PROTOSW_ICSK,
>> +};
>> +#endif
>> +
>> +int smc_inet_init_sock(struct sock *sk)
>> +{
>> +	struct net *net = sock_net(sk);
>> +
>> +	/* init common smc sock */
>> +	smc_sk_init(net, sk, IPPROTO_SMC);
>> +	/* create clcsock */
>> +	return smc_create_clcsk(net, sk, sk->sk_family);
>> +}
>> diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
>> new file mode 100644
>> index 00000000..c55345d
>> --- /dev/null
>> +++ b/net/smc/inet_smc.h
>> @@ -0,0 +1,34 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + *  Shared Memory Communications over RDMA (SMC-R) and RoCE
>> + *
>> + *  Definitions for the IPPROTO_SMC (socket related)
>> +
>> + *  Copyright IBM Corp. 2016
>> + *  Copyright (c) 2024, Alibaba Inc.
>> + *
>> + *  Author: D. Wythe <alibuda@linux.alibaba.com>
>> + */
>> +#ifndef __INET_SMC
>> +#define __INET_SMC
>> +
>> +#include <net/protocol.h>
>> +#include <net/sock.h>
>> +#include <net/tcp.h>
> Why not put those 'include's in the .c file ?

Agreed.  But I think that  <net/protocol. h> is needed to ensure that 
the header file itself is complete.

>> +
>> +extern struct proto smc_inet_prot;
>> +extern const struct proto_ops smc_inet_stream_ops;
>> +extern struct inet_protosw smc_inet_protosw;
>> +
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +#include <net/ipv6.h>
>> +/* MUST after net/tcp.h or warning */
>> +#include <net/transp_v6.h>
>> +extern struct proto smc_inet6_prot;
>> +extern const struct proto_ops smc_inet6_stream_ops;
>> +extern struct inet_protosw smc_inet6_protosw;
>> +#endif
>> +
>> +int smc_inet_init_sock(struct sock *sk);
> Seems smc_inet_init_sock() is only used in smc_inet.c,
> why not defined it as a static function ?
>
> Best regards,
> Dust

That's true, I will fix it.


Best wishes,
D. Wythe

>> +
>> +#endif /* __INET_SMC */
>> -- 
>> 1.8.3.1
>>
Simon Horman June 1, 2024, 1:06 p.m. UTC | #7
On Wed, May 29, 2024 at 11:59:07AM +0800, D. Wythe wrote:
> From: "D. Wythe" <alibuda@linux.alibaba.com>
> 
> This patch allows to create smc socket via AF_INET,
> similar to the following code,
> 
> /* create v4 smc sock */
> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
> 
> /* create v6 smc sock */
> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
> 
> There are several reasons why we believe it is appropriate here:
> 
> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
> address. There is no AF_SMC address at all.
> 
> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
> Otherwise, smc have to implement it again in AF_SMC path.
> 
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>

...

> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c

...

> @@ -3594,9 +3595,31 @@ static int __init smc_init(void)
>  		goto out_lo;
>  	}
>  
> +	rc = proto_register(&smc_inet_prot, 1);
> +	if (rc) {
> +		pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);

Hi,

FWIIW, my feeling is that if a log message includes __func__ then it should
be a debug level message, and even then I'm dubious about the value of
__func__: we do have many tools including dynamic tracing or pinpointing
problems.

So I would suggest rephrasing this message and dropping __func__.
Or maybe removing it entirely.
Or if not, lowering the priority of this message to debug.

If for some reason __func__ remains, please do consider wrapping
the line to 80c columns or less, which can be trivially done here
(please don't split the format string in any case).

Flagged by checkpatch.pl --max-line-length=80

> +		goto out_ulp;
> +	}
> +	inet_register_protosw(&smc_inet_protosw);
> +#if IS_ENABLED(CONFIG_IPV6)
> +	rc = proto_register(&smc_inet6_prot, 1);
> +	if (rc) {
> +		pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);

Here too.

> +		goto out_inet_prot;
> +	}
> +	inet6_register_protosw(&smc_inet6_protosw);
> +#endif

...
D. Wythe June 3, 2024, 2:57 a.m. UTC | #8
On 6/1/24 9:06 PM, Simon Horman wrote:
> On Wed, May 29, 2024 at 11:59:07AM +0800, D. Wythe wrote:
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> This patch allows to create smc socket via AF_INET,
>> similar to the following code,
>>
>> /* create v4 smc sock */
>> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>>
>> /* create v6 smc sock */
>> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>>
>> There are several reasons why we believe it is appropriate here:
>>
>> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>> address. There is no AF_SMC address at all.
>>
>> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>> Otherwise, smc have to implement it again in AF_SMC path.
>>
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> ...
>
>> diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> ...
>
>> @@ -3594,9 +3595,31 @@ static int __init smc_init(void)
>>   		goto out_lo;
>>   	}
>>   
>> +	rc = proto_register(&smc_inet_prot, 1);
>> +	if (rc) {
>> +		pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
> Hi,
>
> FWIIW, my feeling is that if a log message includes __func__ then it should
> be a debug level message, and even then I'm dubious about the value of
> __func__: we do have many tools including dynamic tracing or pinpointing
> problems.
>
> So I would suggest rephrasing this message and dropping __func__.
> Or maybe removing it entirely.
> Or if not, lowering the priority of this message to debug.
>
> If for some reason __func__ remains, please do consider wrapping
> the line to 80c columns or less, which can be trivially done here
> (please don't split the format string in any case).
>
> Flagged by checkpatch.pl --max-line-length=80


Hi Simon,

Thank you very much for your feedback.

Allow me to briefly explain the reasons for using pr_err and __func__ here.

Regarding pr_err, the failure here leads to the failure of the module 
loading, which is definitely an error-level message rather than a 
debug-level one.

As for __func__, I must admit that the purpose here is simply to align 
with the format of other error messages in smc_init(). In fact, I also 
feel that the presence of
__func__ doesn't hold significant value because this error will only 
occur within this function. It's meaningless information for both users 
and kernel developers.
Perhaps a more suitable format would be “smc: xxx: %d”.

However, if changes are needed, I think they should be made across the 
board in order to maintain a consistent style. Maybe this can be 
addressed by
submitting a new patch after this patch. @Wenjia, what do you think?

Therefore, for now, I would like to wrap this line to not exceed 80 
characters, to ensure it can pass the checkpatch.pl.
What do you think?

Best wishes,
D. Wythe

>
>> +		goto out_ulp;
>> +	}
>> +	inet_register_protosw(&smc_inet_protosw);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +	rc = proto_register(&smc_inet6_prot, 1);
>> +	if (rc) {
>> +		pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
> Here too.
>
>> +		goto out_inet_prot;
>> +	}
>> +	inet6_register_protosw(&smc_inet6_protosw);
>> +#endif
> ...
Simon Horman June 3, 2024, 7:47 a.m. UTC | #9
On Mon, Jun 03, 2024 at 10:57:55AM +0800, D. Wythe wrote:
> 
> 
> On 6/1/24 9:06 PM, Simon Horman wrote:
> > On Wed, May 29, 2024 at 11:59:07AM +0800, D. Wythe wrote:
> > > From: "D. Wythe" <alibuda@linux.alibaba.com>
> > > 
> > > This patch allows to create smc socket via AF_INET,
> > > similar to the following code,
> > > 
> > > /* create v4 smc sock */
> > > v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
> > > 
> > > /* create v6 smc sock */
> > > v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
> > > 
> > > There are several reasons why we believe it is appropriate here:
> > > 
> > > 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
> > > address. There is no AF_SMC address at all.
> > > 
> > > 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
> > > the infrastructure of AF_INET(6) path, such as common ebpf hooks.
> > > Otherwise, smc have to implement it again in AF_SMC path.
> > > 
> > > Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> > ...
> > 
> > > diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
> > ...
> > 
> > > @@ -3594,9 +3595,31 @@ static int __init smc_init(void)
> > >   		goto out_lo;
> > >   	}
> > > +	rc = proto_register(&smc_inet_prot, 1);
> > > +	if (rc) {
> > > +		pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
> > Hi,
> > 
> > FWIIW, my feeling is that if a log message includes __func__ then it should
> > be a debug level message, and even then I'm dubious about the value of
> > __func__: we do have many tools including dynamic tracing or pinpointing
> > problems.
> > 
> > So I would suggest rephrasing this message and dropping __func__.
> > Or maybe removing it entirely.
> > Or if not, lowering the priority of this message to debug.
> > 
> > If for some reason __func__ remains, please do consider wrapping
> > the line to 80c columns or less, which can be trivially done here
> > (please don't split the format string in any case).
> > 
> > Flagged by checkpatch.pl --max-line-length=80
> 
> 
> Hi Simon,
> 
> Thank you very much for your feedback.
> 
> Allow me to briefly explain the reasons for using pr_err and __func__ here.
> 
> Regarding pr_err, the failure here leads to the failure of the module
> loading, which is definitely an error-level message rather than a
> debug-level one.
> 
> As for __func__, I must admit that the purpose here is simply to align with
> the format of other error messages in smc_init(). In fact, I also feel that
> the presence of
> __func__ doesn't hold significant value because this error will only occur
> within this function. It's meaningless information for both users and kernel
> developers.
> Perhaps a more suitable format would be “smc: xxx: %d”.
> 
> However, if changes are needed, I think they should be made across the board
> in order to maintain a consistent style. Maybe this can be addressed by
> submitting a new patch after this patch. @Wenjia, what do you think?
> 
> Therefore, for now, I would like to wrap this line to not exceed 80
> characters, to ensure it can pass the checkpatch.pl.
> What do you think?

Thanks, I agree with your reasoning.
And I think this is a good approach for this patch.

> 
> Best wishes,
> D. Wythe
> 
> > 
> > > +		goto out_ulp;
> > > +	}
> > > +	inet_register_protosw(&smc_inet_protosw);
> > > +#if IS_ENABLED(CONFIG_IPV6)
> > > +	rc = proto_register(&smc_inet6_prot, 1);
> > > +	if (rc) {
> > > +		pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
> > Here too.
> > 
> > > +		goto out_inet_prot;
> > > +	}
> > > +	inet6_register_protosw(&smc_inet6_protosw);
> > > +#endif
> > ...
>
diff mbox series

Patch

diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index e682ab6..0c6322b 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -83,6 +83,8 @@  enum {
 #define IPPROTO_RAW		IPPROTO_RAW
   IPPROTO_MPTCP = 262,		/* Multipath TCP connection		*/
 #define IPPROTO_MPTCP		IPPROTO_MPTCP
+  IPPROTO_SMC = 263,		/* Shared Memory Communications		*/
+#define IPPROTO_SMC		IPPROTO_SMC
   IPPROTO_MAX
 };
 #endif
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 2c510d54..472b9ee 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -4,6 +4,6 @@  obj-$(CONFIG_SMC)	+= smc.o
 obj-$(CONFIG_SMC_DIAG)	+= smc_diag.o
 smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
 smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
-smc-y += smc_tracepoint.o
+smc-y += smc_tracepoint.o inet_smc.o
 smc-$(CONFIG_SYSCTL) += smc_sysctl.o
 smc-$(CONFIG_SMC_LO) += smc_loopback.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 8e3ce76..320624c 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -54,6 +54,7 @@ 
 #include "smc_tracepoint.h"
 #include "smc_sysctl.h"
 #include "smc_loopback.h"
+#include "inet_smc.h"
 
 static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
 						 * creation on server
@@ -3594,9 +3595,31 @@  static int __init smc_init(void)
 		goto out_lo;
 	}
 
+	rc = proto_register(&smc_inet_prot, 1);
+	if (rc) {
+		pr_err("%s: proto_register smc_inet_prot fails with %d\n", __func__, rc);
+		goto out_ulp;
+	}
+	inet_register_protosw(&smc_inet_protosw);
+#if IS_ENABLED(CONFIG_IPV6)
+	rc = proto_register(&smc_inet6_prot, 1);
+	if (rc) {
+		pr_err("%s: proto_register smc_inet6_prot fails with %d\n", __func__, rc);
+		goto out_inet_prot;
+	}
+	inet6_register_protosw(&smc_inet6_protosw);
+#endif
+
 	static_branch_enable(&tcp_have_smc);
 	return 0;
 
+#if IS_ENABLED(CONFIG_IPV6)
+out_inet_prot:
+	inet_unregister_protosw(&smc_inet_protosw);
+	proto_unregister(&smc_inet_prot);
+#endif
+out_ulp:
+	tcp_unregister_ulp(&smc_ulp_ops);
 out_lo:
 	smc_loopback_exit();
 out_ib:
@@ -3633,6 +3656,10 @@  static int __init smc_init(void)
 static void __exit smc_exit(void)
 {
 	static_branch_disable(&tcp_have_smc);
+	inet_unregister_protosw(&smc_inet_protosw);
+#if IS_ENABLED(CONFIG_IPV6)
+	inet6_unregister_protosw(&smc_inet6_protosw);
+#endif
 	tcp_unregister_ulp(&smc_ulp_ops);
 	sock_unregister(PF_SMC);
 	smc_core_exit();
@@ -3644,6 +3671,10 @@  static void __exit smc_exit(void)
 	destroy_workqueue(smc_hs_wq);
 	proto_unregister(&smc_proto6);
 	proto_unregister(&smc_proto);
+	proto_unregister(&smc_inet_prot);
+#if IS_ENABLED(CONFIG_IPV6)
+	proto_unregister(&smc_inet6_prot);
+#endif
 	smc_pnet_exit();
 	smc_nl_exit();
 	smc_clc_exit();
@@ -3660,4 +3691,9 @@  static void __exit smc_exit(void)
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_NETPROTO(PF_SMC);
 MODULE_ALIAS_TCP_ULP("smc");
+/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
+#if IS_ENABLED(CONFIG_IPV6)
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
+#endif
 MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
diff --git a/net/smc/inet_smc.c b/net/smc/inet_smc.c
new file mode 100644
index 00000000..1ba73d7
--- /dev/null
+++ b/net/smc/inet_smc.c
@@ -0,0 +1,108 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ *  Definitions for the IPPROTO_SMC (socket related)
+ *
+ *  Copyright IBM Corp. 2016, 2018
+ *  Copyright (c) 2024, Alibaba Inc.
+ *
+ *  Author: D. Wythe <alibuda@linux.alibaba.com>
+ */
+
+#include "inet_smc.h"
+#include "smc.h"
+
+struct proto smc_inet_prot = {
+	.name		= "INET_SMC",
+	.owner		= THIS_MODULE,
+	.init		= smc_inet_init_sock,
+	.hash		= smc_hash_sk,
+	.unhash		= smc_unhash_sk,
+	.release_cb	= smc_release_cb,
+	.obj_size	= sizeof(struct smc_sock),
+	.h.smc_hash	= &smc_v4_hashinfo,
+	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
+};
+
+const struct proto_ops smc_inet_stream_ops = {
+	.family		= PF_INET,
+	.owner		= THIS_MODULE,
+	.release	= smc_release,
+	.bind		= smc_bind,
+	.connect	= smc_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= smc_accept,
+	.getname	= smc_getname,
+	.poll		= smc_poll,
+	.ioctl		= smc_ioctl,
+	.listen		= smc_listen,
+	.shutdown	= smc_shutdown,
+	.setsockopt	= smc_setsockopt,
+	.getsockopt	= smc_getsockopt,
+	.sendmsg	= smc_sendmsg,
+	.recvmsg	= smc_recvmsg,
+	.mmap		= sock_no_mmap,
+	.splice_read	= smc_splice_read,
+};
+
+struct inet_protosw smc_inet_protosw = {
+	.type		= SOCK_STREAM,
+	.protocol	= IPPROTO_SMC,
+	.prot		= &smc_inet_prot,
+	.ops		= &smc_inet_stream_ops,
+	.flags		= INET_PROTOSW_ICSK,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+struct proto smc_inet6_prot = {
+	.name		= "INET6_SMC",
+	.owner		= THIS_MODULE,
+	.init		= smc_inet_init_sock,
+	.hash		= smc_hash_sk,
+	.unhash		= smc_unhash_sk,
+	.release_cb	= smc_release_cb,
+	.obj_size	= sizeof(struct smc_sock),
+	.h.smc_hash	= &smc_v6_hashinfo,
+	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
+};
+
+const struct proto_ops smc_inet6_stream_ops = {
+	.family		= PF_INET6,
+	.owner		= THIS_MODULE,
+	.release	= smc_release,
+	.bind		= smc_bind,
+	.connect	= smc_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= smc_accept,
+	.getname	= smc_getname,
+	.poll		= smc_poll,
+	.ioctl		= smc_ioctl,
+	.listen		= smc_listen,
+	.shutdown	= smc_shutdown,
+	.setsockopt	= smc_setsockopt,
+	.getsockopt	= smc_getsockopt,
+	.sendmsg	= smc_sendmsg,
+	.recvmsg	= smc_recvmsg,
+	.mmap		= sock_no_mmap,
+	.splice_read	= smc_splice_read,
+};
+
+struct inet_protosw smc_inet6_protosw = {
+	.type		= SOCK_STREAM,
+	.protocol	= IPPROTO_SMC,
+	.prot		= &smc_inet6_prot,
+	.ops		= &smc_inet6_stream_ops,
+	.flags		= INET_PROTOSW_ICSK,
+};
+#endif
+
+int smc_inet_init_sock(struct sock *sk)
+{
+	struct net *net = sock_net(sk);
+
+	/* init common smc sock */
+	smc_sk_init(net, sk, IPPROTO_SMC);
+	/* create clcsock */
+	return smc_create_clcsk(net, sk, sk->sk_family);
+}
diff --git a/net/smc/inet_smc.h b/net/smc/inet_smc.h
new file mode 100644
index 00000000..c55345d
--- /dev/null
+++ b/net/smc/inet_smc.h
@@ -0,0 +1,34 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ *  Definitions for the IPPROTO_SMC (socket related)
+
+ *  Copyright IBM Corp. 2016
+ *  Copyright (c) 2024, Alibaba Inc.
+ *
+ *  Author: D. Wythe <alibuda@linux.alibaba.com>
+ */
+#ifndef __INET_SMC
+#define __INET_SMC
+
+#include <net/protocol.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+
+extern struct proto smc_inet_prot;
+extern const struct proto_ops smc_inet_stream_ops;
+extern struct inet_protosw smc_inet_protosw;
+
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+/* MUST after net/tcp.h or warning */
+#include <net/transp_v6.h>
+extern struct proto smc_inet6_prot;
+extern const struct proto_ops smc_inet6_stream_ops;
+extern struct inet_protosw smc_inet6_protosw;
+#endif
+
+int smc_inet_init_sock(struct sock *sk);
+
+#endif /* __INET_SMC */