diff mbox series

[net-next,v6,3/3] net/smc: Introduce IPPROTO_SMC

Message ID 1717592180-66181-4-git-send-email-alibuda@linux.alibaba.com (mailing list archive)
State Superseded
Headers show
Series Introduce IPPROTO_SMC | expand

Commit Message

D. Wythe June 5, 2024, 12:56 p.m. UTC
From: "D. Wythe" <alibuda@linux.alibaba.com>

This patch allows to create smc socket via AF_INET,
similar to the following code,

/* create v4 smc sock */
v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);

/* create v6 smc sock */
v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);

There are several reasons why we believe it is appropriate here:

1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
address. There is no AF_SMC address at all.

2. Create smc socket in the AF_INET(6) path, which allows us to reuse
the infrastructure of AF_INET(6) path, such as common ebpf hooks.
Otherwise, smc have to implement it again in AF_SMC path.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
Tested-by: Niklas Schnelle <schnelle@linux.ibm.com>
---
 include/uapi/linux/in.h |   2 +
 net/smc/Makefile        |   2 +-
 net/smc/af_smc.c        |  16 ++++-
 net/smc/smc_inet.c      | 169 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_inet.h      |  22 +++++++
 5 files changed, 208 insertions(+), 3 deletions(-)
 create mode 100644 net/smc/smc_inet.c
 create mode 100644 net/smc/smc_inet.h

Comments

Mat Martineau June 6, 2024, 9:22 p.m. UTC | #1
On Wed, 5 Jun 2024, D. Wythe wrote:

> From: "D. Wythe" <alibuda@linux.alibaba.com>
>
> This patch allows to create smc socket via AF_INET,
> similar to the following code,
>
> /* create v4 smc sock */
> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>
> /* create v6 smc sock */
> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>
> There are several reasons why we believe it is appropriate here:
>
> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
> address. There is no AF_SMC address at all.
>
> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
> Otherwise, smc have to implement it again in AF_SMC path.
>
> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> Tested-by: Niklas Schnelle <schnelle@linux.ibm.com>
> ---
> include/uapi/linux/in.h |   2 +
> net/smc/Makefile        |   2 +-
> net/smc/af_smc.c        |  16 ++++-
> net/smc/smc_inet.c      | 169 ++++++++++++++++++++++++++++++++++++++++++++++++
> net/smc/smc_inet.h      |  22 +++++++
> 5 files changed, 208 insertions(+), 3 deletions(-)
> create mode 100644 net/smc/smc_inet.c
> create mode 100644 net/smc/smc_inet.h
>
> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
> index e682ab6..0c6322b 100644
> --- a/include/uapi/linux/in.h
> +++ b/include/uapi/linux/in.h
> @@ -83,6 +83,8 @@ enum {
> #define IPPROTO_RAW		IPPROTO_RAW
>   IPPROTO_MPTCP = 262,		/* Multipath TCP connection		*/
> #define IPPROTO_MPTCP		IPPROTO_MPTCP
> +  IPPROTO_SMC = 263,		/* Shared Memory Communications		*/
> +#define IPPROTO_SMC		IPPROTO_SMC

Hello,

It's not required to assign IPPROTO_MPTCP+1 as your new IPPROTO_SMC value. 
Making IPPROTO_MAX larger does increase the size of the inet_diag_table. 
Values from 256 to 261 are usable for IPPROTO_SMC without increasing 
IPPROTO_MAX.

Just for background: When we added IPPROTO_MPTCP, we chose 262 because it 
is IPPROTO_TCP+0x100. The IANA reserved protocol numbers are 8 bits wide 
so we knew we would not conflict with any future additions, and in the 
case of MPTCP is was convenient that truncating the proto value to 8 bits 
would match IPPROTO_TCP.

- Mat

>   IPPROTO_MAX
> };
D. Wythe June 7, 2024, 5:09 a.m. UTC | #2
On 6/7/24 5:22 AM, Mat Martineau wrote:
> On Wed, 5 Jun 2024, D. Wythe wrote:
>
>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>
>> This patch allows to create smc socket via AF_INET,
>> similar to the following code,
>>
>> /* create v4 smc sock */
>> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>>
>> /* create v6 smc sock */
>> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>>
>> There are several reasons why we believe it is appropriate here:
>>
>> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>> address. There is no AF_SMC address at all.
>>
>> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>> Otherwise, smc have to implement it again in AF_SMC path.
>>
>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>> Tested-by: Niklas Schnelle <schnelle@linux.ibm.com>
>> ---
>> include/uapi/linux/in.h |   2 +
>> net/smc/Makefile        |   2 +-
>> net/smc/af_smc.c        |  16 ++++-
>> net/smc/smc_inet.c      | 169 
>> ++++++++++++++++++++++++++++++++++++++++++++++++
>> net/smc/smc_inet.h      |  22 +++++++
>> 5 files changed, 208 insertions(+), 3 deletions(-)
>> create mode 100644 net/smc/smc_inet.c
>> create mode 100644 net/smc/smc_inet.h
>>
>> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>> index e682ab6..0c6322b 100644
>> --- a/include/uapi/linux/in.h
>> +++ b/include/uapi/linux/in.h
>> @@ -83,6 +83,8 @@ enum {
>> #define IPPROTO_RAW        IPPROTO_RAW
>>   IPPROTO_MPTCP = 262,        /* Multipath TCP connection */
>> #define IPPROTO_MPTCP        IPPROTO_MPTCP
>> +  IPPROTO_SMC = 263,        /* Shared Memory Communications        */
>> +#define IPPROTO_SMC        IPPROTO_SMC
>
> Hello,
>
> It's not required to assign IPPROTO_MPTCP+1 as your new IPPROTO_SMC 
> value. Making IPPROTO_MAX larger does increase the size of the 
> inet_diag_table. Values from 256 to 261 are usable for IPPROTO_SMC 
> without increasing IPPROTO_MAX.
>
> Just for background: When we added IPPROTO_MPTCP, we chose 262 because 
> it is IPPROTO_TCP+0x100. The IANA reserved protocol numbers are 8 bits 
> wide so we knew we would not conflict with any future additions, and 
> in the case of MPTCP is was convenient that truncating the proto value 
> to 8 bits would match IPPROTO_TCP.
>
> - Mat
>

Hi Mat,

Thank you very much for your feedback, I have always been curious about 
the origins of IPPROTO_MPTCP and I am glad to
have learned new knowledge.

Regarding the size issue of inet_diag_tables, what you said does make 
sense. However, we still hope to continue using 263,
although the rationale may not be fully sufficient, as this series has 
been under community evaluation for quite some time now,
and we haven't received any feedback about this value, so we’ve been 
using it in some user-space tools ... 
Matthieu Baerts (NGI0) June 7, 2024, 2:47 p.m. UTC | #3
Hi D.Wythe,

On 07/06/2024 07:09, D. Wythe wrote:
> 
> On 6/7/24 5:22 AM, Mat Martineau wrote:
>> On Wed, 5 Jun 2024, D. Wythe wrote:
>>
>>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>>
>>> This patch allows to create smc socket via AF_INET,
>>> similar to the following code,
>>>
>>> /* create v4 smc sock */
>>> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>>>
>>> /* create v6 smc sock */
>>> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>>>
>>> There are several reasons why we believe it is appropriate here:
>>>
>>> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>>> address. There is no AF_SMC address at all.
>>>
>>> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>>> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>>> Otherwise, smc have to implement it again in AF_SMC path.
>>>
>>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>>> Tested-by: Niklas Schnelle <schnelle@linux.ibm.com>
>>> ---
>>> include/uapi/linux/in.h |   2 +
>>> net/smc/Makefile        |   2 +-
>>> net/smc/af_smc.c        |  16 ++++-
>>> net/smc/smc_inet.c      | 169 +++++++++++++++++++++++++++++++++++++++
>>> +++++++++
>>> net/smc/smc_inet.h      |  22 +++++++
>>> 5 files changed, 208 insertions(+), 3 deletions(-)
>>> create mode 100644 net/smc/smc_inet.c
>>> create mode 100644 net/smc/smc_inet.h
>>>
>>> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>>> index e682ab6..0c6322b 100644
>>> --- a/include/uapi/linux/in.h
>>> +++ b/include/uapi/linux/in.h
>>> @@ -83,6 +83,8 @@ enum {
>>> #define IPPROTO_RAW        IPPROTO_RAW
>>>   IPPROTO_MPTCP = 262,        /* Multipath TCP connection */
>>> #define IPPROTO_MPTCP        IPPROTO_MPTCP
>>> +  IPPROTO_SMC = 263,        /* Shared Memory Communications        */
>>> +#define IPPROTO_SMC        IPPROTO_SMC
>>
>> Hello,
>>
>> It's not required to assign IPPROTO_MPTCP+1 as your new IPPROTO_SMC
>> value. Making IPPROTO_MAX larger does increase the size of the
>> inet_diag_table. Values from 256 to 261 are usable for IPPROTO_SMC
>> without increasing IPPROTO_MAX.
>>
>> Just for background: When we added IPPROTO_MPTCP, we chose 262 because
>> it is IPPROTO_TCP+0x100. The IANA reserved protocol numbers are 8 bits
>> wide so we knew we would not conflict with any future additions, and
>> in the case of MPTCP is was convenient that truncating the proto value
>> to 8 bits would match IPPROTO_TCP.
>>
>> - Mat
>>
> 
> Hi Mat,
> 
> Thank you very much for your feedback, I have always been curious about
> the origins of IPPROTO_MPTCP and I am glad to
> have learned new knowledge.
> 
> Regarding the size issue of inet_diag_tables, what you said does make
> sense. However, we still hope to continue using 263,
> although the rationale may not be fully sufficient, as this series has
> been under community evaluation for quite some time now,
> and we haven't received any feedback about this value, so we’ve been
> using it in some user-space tools ... 
Mat Martineau June 7, 2024, 4:47 p.m. UTC | #4
On Fri, 7 Jun 2024, Matthieu Baerts wrote:

> Hi D.Wythe,
>
> On 07/06/2024 07:09, D. Wythe wrote:
>>
>> On 6/7/24 5:22 AM, Mat Martineau wrote:
>>> On Wed, 5 Jun 2024, D. Wythe wrote:
>>>
>>>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>>>
>>>> This patch allows to create smc socket via AF_INET,
>>>> similar to the following code,
>>>>
>>>> /* create v4 smc sock */
>>>> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>>>>
>>>> /* create v6 smc sock */
>>>> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>>>>
>>>> There are several reasons why we believe it is appropriate here:
>>>>
>>>> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>>>> address. There is no AF_SMC address at all.
>>>>
>>>> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>>>> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>>>> Otherwise, smc have to implement it again in AF_SMC path.
>>>>
>>>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>>>> Tested-by: Niklas Schnelle <schnelle@linux.ibm.com>
>>>> ---
>>>> include/uapi/linux/in.h |   2 +
>>>> net/smc/Makefile        |   2 +-
>>>> net/smc/af_smc.c        |  16 ++++-
>>>> net/smc/smc_inet.c      | 169 +++++++++++++++++++++++++++++++++++++++
>>>> +++++++++
>>>> net/smc/smc_inet.h      |  22 +++++++
>>>> 5 files changed, 208 insertions(+), 3 deletions(-)
>>>> create mode 100644 net/smc/smc_inet.c
>>>> create mode 100644 net/smc/smc_inet.h
>>>>
>>>> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>>>> index e682ab6..0c6322b 100644
>>>> --- a/include/uapi/linux/in.h
>>>> +++ b/include/uapi/linux/in.h
>>>> @@ -83,6 +83,8 @@ enum {
>>>> #define IPPROTO_RAW        IPPROTO_RAW
>>>>   IPPROTO_MPTCP = 262,        /* Multipath TCP connection */
>>>> #define IPPROTO_MPTCP        IPPROTO_MPTCP
>>>> +  IPPROTO_SMC = 263,        /* Shared Memory Communications        */
>>>> +#define IPPROTO_SMC        IPPROTO_SMC
>>>
>>> Hello,
>>>
>>> It's not required to assign IPPROTO_MPTCP+1 as your new IPPROTO_SMC
>>> value. Making IPPROTO_MAX larger does increase the size of the
>>> inet_diag_table. Values from 256 to 261 are usable for IPPROTO_SMC
>>> without increasing IPPROTO_MAX.
>>>
>>> Just for background: When we added IPPROTO_MPTCP, we chose 262 because
>>> it is IPPROTO_TCP+0x100. The IANA reserved protocol numbers are 8 bits
>>> wide so we knew we would not conflict with any future additions, and
>>> in the case of MPTCP is was convenient that truncating the proto value
>>> to 8 bits would match IPPROTO_TCP.
>>>
>>> - Mat
>>>
>>
>> Hi Mat,
>>
>> Thank you very much for your feedback, I have always been curious about
>> the origins of IPPROTO_MPTCP and I am glad to
>> have learned new knowledge.
>>

Hi D. Whythe -

Sure, you're welcome!

>> Regarding the size issue of inet_diag_tables, what you said does make
>> sense. However, we still hope to continue using 263,
>> although the rationale may not be fully sufficient, as this series has
>> been under community evaluation for quite some time now,
>> and we haven't received any feedback about this value, so we’ve been
>> using it in some user-space tools ... 
D. Wythe June 7, 2024, 7:35 p.m. UTC | #5
On 6/8/24 12:47 AM, Mat Martineau wrote:
> On Fri, 7 Jun 2024, Matthieu Baerts wrote:
>
>> Hi D.Wythe,
>>
>> On 07/06/2024 07:09, D. Wythe wrote:
>>>
>>> On 6/7/24 5:22 AM, Mat Martineau wrote:
>>>> On Wed, 5 Jun 2024, D. Wythe wrote:
>>>>
>>>>> From: "D. Wythe" <alibuda@linux.alibaba.com>
>>>>>
>>>>> This patch allows to create smc socket via AF_INET,
>>>>> similar to the following code,
>>>>>
>>>>> /* create v4 smc sock */
>>>>> v4 = socket(AF_INET, SOCK_STREAM, IPPROTO_SMC);
>>>>>
>>>>> /* create v6 smc sock */
>>>>> v6 = socket(AF_INET6, SOCK_STREAM, IPPROTO_SMC);
>>>>>
>>>>> There are several reasons why we believe it is appropriate here:
>>>>>
>>>>> 1. For smc sockets, it actually use IPv4 (AF-INET) or IPv6 (AF-INET6)
>>>>> address. There is no AF_SMC address at all.
>>>>>
>>>>> 2. Create smc socket in the AF_INET(6) path, which allows us to reuse
>>>>> the infrastructure of AF_INET(6) path, such as common ebpf hooks.
>>>>> Otherwise, smc have to implement it again in AF_SMC path.
>>>>>
>>>>> Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>>>>> Tested-by: Niklas Schnelle <schnelle@linux.ibm.com>
>>>>> ---
>>>>> include/uapi/linux/in.h |   2 +
>>>>> net/smc/Makefile        |   2 +-
>>>>> net/smc/af_smc.c        |  16 ++++-
>>>>> net/smc/smc_inet.c      | 169 +++++++++++++++++++++++++++++++++++++++
>>>>> +++++++++
>>>>> net/smc/smc_inet.h      |  22 +++++++
>>>>> 5 files changed, 208 insertions(+), 3 deletions(-)
>>>>> create mode 100644 net/smc/smc_inet.c
>>>>> create mode 100644 net/smc/smc_inet.h
>>>>>
>>>>> diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
>>>>> index e682ab6..0c6322b 100644
>>>>> --- a/include/uapi/linux/in.h
>>>>> +++ b/include/uapi/linux/in.h
>>>>> @@ -83,6 +83,8 @@ enum {
>>>>> #define IPPROTO_RAW        IPPROTO_RAW
>>>>>   IPPROTO_MPTCP = 262,        /* Multipath TCP connection */
>>>>> #define IPPROTO_MPTCP        IPPROTO_MPTCP
>>>>> +  IPPROTO_SMC = 263,        /* Shared Memory 
>>>>> Communications        */
>>>>> +#define IPPROTO_SMC        IPPROTO_SMC
>>>>
>>>> Hello,
>>>>
>>>> It's not required to assign IPPROTO_MPTCP+1 as your new IPPROTO_SMC
>>>> value. Making IPPROTO_MAX larger does increase the size of the
>>>> inet_diag_table. Values from 256 to 261 are usable for IPPROTO_SMC
>>>> without increasing IPPROTO_MAX.
>>>>
>>>> Just for background: When we added IPPROTO_MPTCP, we chose 262 because
>>>> it is IPPROTO_TCP+0x100. The IANA reserved protocol numbers are 8 bits
>>>> wide so we knew we would not conflict with any future additions, and
>>>> in the case of MPTCP is was convenient that truncating the proto value
>>>> to 8 bits would match IPPROTO_TCP.
>>>>
>>>> - Mat
>>>>
>>>
>>> Hi Mat,
>>>
>>> Thank you very much for your feedback, I have always been curious about
>>> the origins of IPPROTO_MPTCP and I am glad to
>>> have learned new knowledge.
>>>
>
> Hi D. Whythe -
>
> Sure, you're welcome!
>
>>> Regarding the size issue of inet_diag_tables, what you said does make
>>> sense. However, we still hope to continue using 263,
>>> although the rationale may not be fully sufficient, as this series has
>>> been under community evaluation for quite some time now,
>>> and we haven't received any feedback about this value, so we’ve been
>>> using it in some user-space tools ... 
Mat Martineau June 7, 2024, 8:32 p.m. UTC | #6
On Sat, 8 Jun 2024, D. Wythe wrote:

> Hi Mat and Matthieu,
>
> Thanks very much for your feedback!  The reasons you all have provided are 
> already quite convincing.
> In fact, as I mentioned earlier, I actually don't have sufficient grounds to 
> insist on 263.  It seems it's time for a change. 
diff mbox series

Patch

diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index e682ab6..0c6322b 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -83,6 +83,8 @@  enum {
 #define IPPROTO_RAW		IPPROTO_RAW
   IPPROTO_MPTCP = 262,		/* Multipath TCP connection		*/
 #define IPPROTO_MPTCP		IPPROTO_MPTCP
+  IPPROTO_SMC = 263,		/* Shared Memory Communications		*/
+#define IPPROTO_SMC		IPPROTO_SMC
   IPPROTO_MAX
 };
 #endif
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 2c510d54..60f1c87 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -4,6 +4,6 @@  obj-$(CONFIG_SMC)	+= smc.o
 obj-$(CONFIG_SMC_DIAG)	+= smc_diag.o
 smc-y := af_smc.o smc_pnet.o smc_ib.o smc_clc.o smc_core.o smc_wr.o smc_llc.o
 smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_stats.o
-smc-y += smc_tracepoint.o
+smc-y += smc_tracepoint.o smc_inet.o
 smc-$(CONFIG_SYSCTL) += smc_sysctl.o
 smc-$(CONFIG_SMC_LO) += smc_loopback.o
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 8e3ce76..743c27e 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -54,6 +54,7 @@ 
 #include "smc_tracepoint.h"
 #include "smc_sysctl.h"
 #include "smc_loopback.h"
+#include "smc_inet.h"
 
 static DEFINE_MUTEX(smc_server_lgr_pending);	/* serialize link group
 						 * creation on server
@@ -3593,10 +3594,15 @@  static int __init smc_init(void)
 		pr_err("%s: tcp_ulp_register fails with %d\n", __func__, rc);
 		goto out_lo;
 	}
-
+	rc = smc_inet_init();
+	if (rc) {
+		pr_err("%s: smc_inet_init fails with %d\n", __func__, rc);
+		goto out_ulp;
+	}
 	static_branch_enable(&tcp_have_smc);
 	return 0;
-
+out_ulp:
+	tcp_unregister_ulp(&smc_ulp_ops);
 out_lo:
 	smc_loopback_exit();
 out_ib:
@@ -3633,6 +3639,7 @@  static int __init smc_init(void)
 static void __exit smc_exit(void)
 {
 	static_branch_disable(&tcp_have_smc);
+	smc_inet_exit();
 	tcp_unregister_ulp(&smc_ulp_ops);
 	sock_unregister(PF_SMC);
 	smc_core_exit();
@@ -3660,4 +3667,9 @@  static void __exit smc_exit(void)
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_NETPROTO(PF_SMC);
 MODULE_ALIAS_TCP_ULP("smc");
+/* 263 for IPPROTO_SMC and 1 for SOCK_STREAM */
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 263, 1);
+#if IS_ENABLED(CONFIG_IPV6)
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 263, 1);
+#endif /* CONFIG_IPV6 */
 MODULE_ALIAS_GENL_FAMILY(SMC_GENL_FAMILY_NAME);
diff --git a/net/smc/smc_inet.c b/net/smc/smc_inet.c
new file mode 100644
index 00000000..bca57ae
--- /dev/null
+++ b/net/smc/smc_inet.c
@@ -0,0 +1,169 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ *  Definitions for the IPPROTO_SMC (socket related)
+ *
+ *  Copyright IBM Corp. 2016, 2018
+ *  Copyright (c) 2024, Alibaba Inc.
+ *
+ *  Author: D. Wythe <alibuda@linux.alibaba.com>
+ */
+
+#include <net/protocol.h>
+#include <net/sock.h>
+
+#include "smc_inet.h"
+#include "smc.h"
+
+static struct proto smc_inet_prot;
+static const struct proto_ops smc_inet_stream_ops;
+static struct inet_protosw smc_inet_protosw;
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct proto smc_inet6_prot;
+static const struct proto_ops smc_inet6_stream_ops;
+static struct inet_protosw smc_inet6_protosw;
+#endif /* CONFIG_IPV6 */
+
+static int smc_inet_init_sock(struct sock *sk);
+
+static struct proto smc_inet_prot = {
+	.name		= "INET_SMC",
+	.owner		= THIS_MODULE,
+	.init		= smc_inet_init_sock,
+	.hash		= smc_hash_sk,
+	.unhash		= smc_unhash_sk,
+	.release_cb	= smc_release_cb,
+	.obj_size	= sizeof(struct smc_sock),
+	.h.smc_hash	= &smc_v4_hashinfo,
+	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
+};
+
+static const struct proto_ops smc_inet_stream_ops = {
+	.family		= PF_INET,
+	.owner		= THIS_MODULE,
+	.release	= smc_release,
+	.bind		= smc_bind,
+	.connect	= smc_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= smc_accept,
+	.getname	= smc_getname,
+	.poll		= smc_poll,
+	.ioctl		= smc_ioctl,
+	.listen		= smc_listen,
+	.shutdown	= smc_shutdown,
+	.setsockopt	= smc_setsockopt,
+	.getsockopt	= smc_getsockopt,
+	.sendmsg	= smc_sendmsg,
+	.recvmsg	= smc_recvmsg,
+	.mmap		= sock_no_mmap,
+	.splice_read	= smc_splice_read,
+};
+
+static struct inet_protosw smc_inet_protosw = {
+	.type		= SOCK_STREAM,
+	.protocol	= IPPROTO_SMC,
+	.prot		= &smc_inet_prot,
+	.ops		= &smc_inet_stream_ops,
+	.flags		= INET_PROTOSW_ICSK,
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct proto smc_inet6_prot = {
+	.name		= "INET6_SMC",
+	.owner		= THIS_MODULE,
+	.init		= smc_inet_init_sock,
+	.hash		= smc_hash_sk,
+	.unhash		= smc_unhash_sk,
+	.release_cb	= smc_release_cb,
+	.obj_size	= sizeof(struct smc_sock),
+	.h.smc_hash	= &smc_v6_hashinfo,
+	.slab_flags	= SLAB_TYPESAFE_BY_RCU,
+};
+
+static const struct proto_ops smc_inet6_stream_ops = {
+	.family		= PF_INET6,
+	.owner		= THIS_MODULE,
+	.release	= smc_release,
+	.bind		= smc_bind,
+	.connect	= smc_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= smc_accept,
+	.getname	= smc_getname,
+	.poll		= smc_poll,
+	.ioctl		= smc_ioctl,
+	.listen		= smc_listen,
+	.shutdown	= smc_shutdown,
+	.setsockopt	= smc_setsockopt,
+	.getsockopt	= smc_getsockopt,
+	.sendmsg	= smc_sendmsg,
+	.recvmsg	= smc_recvmsg,
+	.mmap		= sock_no_mmap,
+	.splice_read	= smc_splice_read,
+};
+
+static struct inet_protosw smc_inet6_protosw = {
+	.type		= SOCK_STREAM,
+	.protocol	= IPPROTO_SMC,
+	.prot		= &smc_inet6_prot,
+	.ops		= &smc_inet6_stream_ops,
+	.flags		= INET_PROTOSW_ICSK,
+};
+#endif /* CONFIG_IPV6 */
+
+static int smc_inet_init_sock(struct sock *sk)
+{
+	struct net *net = sock_net(sk);
+
+	/* init common smc sock */
+	smc_sk_init(net, sk, IPPROTO_SMC);
+	/* create clcsock */
+	return smc_create_clcsk(net, sk, sk->sk_family);
+}
+
+int __init smc_inet_init(void)
+{
+	int rc;
+
+	rc = proto_register(&smc_inet_prot, 1);
+	if (rc) {
+		pr_err("%s: proto_register smc_inet_prot fails with %d\n",
+		       __func__, rc);
+		return rc;
+	}
+	/* no return value */
+	inet_register_protosw(&smc_inet_protosw);
+
+#if IS_ENABLED(CONFIG_IPV6)
+	rc = proto_register(&smc_inet6_prot, 1);
+	if (rc) {
+		pr_err("%s: proto_register smc_inet6_prot fails with %d\n",
+		       __func__, rc);
+		goto out_inet6_prot;
+	}
+	rc = inet6_register_protosw(&smc_inet6_protosw);
+	if (rc) {
+		pr_err("%s: inet6_register_protosw smc_inet6_protosw fails with %d\n",
+		       __func__, rc);
+		goto out_inet6_protosw;
+	}
+	return rc;
+out_inet6_protosw:
+	proto_unregister(&smc_inet6_prot);
+out_inet6_prot:
+	inet_unregister_protosw(&smc_inet_protosw);
+	proto_unregister(&smc_inet_prot);
+#endif /* CONFIG_IPV6 */
+	return rc;
+}
+
+void smc_inet_exit(void)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	inet6_unregister_protosw(&smc_inet6_protosw);
+	proto_unregister(&smc_inet6_prot);
+#endif /* CONFIG_IPV6 */
+	inet_unregister_protosw(&smc_inet_protosw);
+	proto_unregister(&smc_inet_prot);
+}
diff --git a/net/smc/smc_inet.h b/net/smc/smc_inet.h
new file mode 100644
index 00000000..a489c8a
--- /dev/null
+++ b/net/smc/smc_inet.h
@@ -0,0 +1,22 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ *  Definitions for the IPPROTO_SMC (socket related)
+
+ *  Copyright IBM Corp. 2016
+ *  Copyright (c) 2024, Alibaba Inc.
+ *
+ *  Author: D. Wythe <alibuda@linux.alibaba.com>
+ */
+#ifndef __INET_SMC
+#define __INET_SMC
+
+/* Initialize protocol registration on IPPROTO_SMC,
+ * @return 0 on success
+ */
+int smc_inet_init(void);
+
+void smc_inet_exit(void);
+
+#endif /* __INET_SMC */