diff mbox series

[bpf-next,v7,3/6] net/smc: Introduce generic hook smc_ops

Message ID 20250123015942.94810-4-alibuda@linux.alibaba.com (mailing list archive)
State New
Delegated to: BPF
Headers show
Series net/smc: Introduce smc_ops | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 39 this patch: 39
netdev/build_tools success Errors and warnings before: 0 (+0) this patch: 0 (+0)
netdev/cc_maintainers warning 3 maintainers not CCed: horms@kernel.org tonylu@linux.alibaba.com dsahern@kernel.org
netdev/build_clang success Errors and warnings before: 6717 this patch: 6717
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 4256 this patch: 4260
netdev/checkpatch warning CHECK: Macro argument 'func' may be better as '(func)' to avoid precedence issues WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 1 this patch: 1
netdev/source_inline success Was 0 now: 0

Commit Message

D. Wythe Jan. 23, 2025, 1:59 a.m. UTC
The introduction of IPPROTO_SMC enables eBPF programs to determine
whether to use SMC based on the context of socket creation, such as
network namespaces, PID and comm name, etc.

As a subsequent enhancement, to introduce a new generic hook that
allows decisions on whether to use SMC or not at runtime, including
but not limited to local/remote IP address or ports.

Moreover, in the future, we can achieve more complex extensions to the
protocol stack by extending this ops.

Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
---
 include/net/netns/smc.h |  3 ++
 include/net/smc.h       | 53 ++++++++++++++++++++++++
 net/ipv4/tcp_output.c   | 18 +++++++--
 net/smc/Kconfig         | 12 ++++++
 net/smc/Makefile        |  1 +
 net/smc/smc_ops.c       | 53 ++++++++++++++++++++++++
 net/smc/smc_ops.h       | 28 +++++++++++++
 net/smc/smc_sysctl.c    | 90 +++++++++++++++++++++++++++++++++++++++++
 8 files changed, 254 insertions(+), 4 deletions(-)
 create mode 100644 net/smc/smc_ops.c
 create mode 100644 net/smc/smc_ops.h

Comments

Dust Li Jan. 23, 2025, 7:30 a.m. UTC | #1
On 2025-01-23 09:59:39, D. Wythe wrote:
>The introduction of IPPROTO_SMC enables eBPF programs to determine
>whether to use SMC based on the context of socket creation, such as
>network namespaces, PID and comm name, etc.
>
>As a subsequent enhancement, to introduce a new generic hook that
>allows decisions on whether to use SMC or not at runtime, including
>but not limited to local/remote IP address or ports.
>
>Moreover, in the future, we can achieve more complex extensions to the
>protocol stack by extending this ops.
>
>Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
>---
> include/net/netns/smc.h |  3 ++
> include/net/smc.h       | 53 ++++++++++++++++++++++++
> net/ipv4/tcp_output.c   | 18 +++++++--
> net/smc/Kconfig         | 12 ++++++
> net/smc/Makefile        |  1 +
> net/smc/smc_ops.c       | 53 ++++++++++++++++++++++++
> net/smc/smc_ops.h       | 28 +++++++++++++
> net/smc/smc_sysctl.c    | 90 +++++++++++++++++++++++++++++++++++++++++
> 8 files changed, 254 insertions(+), 4 deletions(-)
> create mode 100644 net/smc/smc_ops.c
> create mode 100644 net/smc/smc_ops.h
>
>diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h
>index fc752a50f91b..81b3fdb39cd2 100644
>--- a/include/net/netns/smc.h
>+++ b/include/net/netns/smc.h
>@@ -17,6 +17,9 @@ struct netns_smc {
> #ifdef CONFIG_SYSCTL
> 	struct ctl_table_header		*smc_hdr;
> #endif
>+#if IS_ENABLED(CONFIG_SMC_OPS)
>+	struct smc_ops __rcu		*ops;
>+#endif /* CONFIG_SMC_OPS */
> 	unsigned int			sysctl_autocorking_size;
> 	unsigned int			sysctl_smcr_buf_type;
> 	int				sysctl_smcr_testlink_time;
>diff --git a/include/net/smc.h b/include/net/smc.h
>index db84e4e35080..844f98a6296a 100644
>--- a/include/net/smc.h
>+++ b/include/net/smc.h
>@@ -18,6 +18,8 @@
> #include "linux/ism.h"
> 
> struct sock;
>+struct tcp_sock;
>+struct inet_request_sock;
> 
> #define SMC_MAX_PNETID_LEN	16	/* Max. length of PNET id */
> 
>@@ -97,4 +99,55 @@ struct smcd_dev {
> 	u8 going_away : 1;
> };
> 
>+#define  SMC_OPS_NAME_MAX 16
>+
>+enum {
>+	/* ops can be inherit from init_net */
>+	SMC_OPS_FLAG_INHERITABLE = 0x1,
>+
>+	SMC_OPS_ALL_FLAGS = SMC_OPS_FLAG_INHERITABLE,
>+};
>+
>+struct smc_ops {
>+	/* priavte */
>+
>+	struct list_head list;
>+	struct module *owner;
>+
>+	/* public */
>+
>+	/* unique name */
>+	char name[SMC_OPS_NAME_MAX];
>+	int flags;
>+
>+	/* Invoked before computing SMC option for SYN packets.
>+	 * We can control whether to set SMC options by returning varios value.
>+	 * Return 0 to disable SMC, or return any other value to enable it.
>+	 */
>+	int (*set_option)(struct tcp_sock *tp);
>+
>+	/* Invoked before Set up SMC options for SYN-ACK packets
>+	 * We can control whether to respond SMC options by returning varios
>+	 * value. Return 0 to disable SMC, or return any other value to enable
>+	 * it.
>+	 */
>+	int (*set_option_cond)(const struct tcp_sock *tp,
>+			       struct inet_request_sock *ireq);
>+};
>+
>+#if IS_ENABLED(CONFIG_SMC_OPS)
>+#define smc_call_retops(init_val, sk, func, ...) ({	\
>+	typeof(init_val) __ret = (init_val);		\
>+	struct smc_ops *ops;				\
>+	rcu_read_lock();				\
>+	ops = READ_ONCE(sock_net(sk)->smc.ops);		\
>+	if (ops && ops->func)				\
>+		__ret = ops->func(__VA_ARGS__);		\
>+	rcu_read_unlock();				\
>+	__ret;						\
>+})
>+#else
>+#define smc_call_retops(init_val, ...) (init_val)
>+#endif /* CONFIG_SMC_OPS */
>+
> #endif	/* _SMC_H */
>diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
>index 0e5b9a654254..f62e30b4ffc8 100644
>--- a/net/ipv4/tcp_output.c
>+++ b/net/ipv4/tcp_output.c
>@@ -40,6 +40,7 @@
> #include <net/tcp.h>
> #include <net/mptcp.h>
> #include <net/proto_memory.h>
>+#include <net/smc.h>
> 
> #include <linux/compiler.h>
> #include <linux/gfp.h>
>@@ -759,14 +760,18 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
> 	mptcp_options_write(th, ptr, tp, opts);
> }
> 
>-static void smc_set_option(const struct tcp_sock *tp,
>+static void smc_set_option(struct tcp_sock *tp,
> 			   struct tcp_out_options *opts,
> 			   unsigned int *remaining)
> {
> #if IS_ENABLED(CONFIG_SMC)
>+	struct sock *sk = &tp->inet_conn.icsk_inet.sk;
> 	if (static_branch_unlikely(&tcp_have_smc)) {
> 		if (tp->syn_smc) {
>-			if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
>+			tp->syn_smc = !!smc_call_retops(1, sk, set_option, tp);
>+			/* re-check syn_smc */
>+			if (tp->syn_smc &&
>+			    *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
> 				opts->options |= OPTION_SMC;
> 				*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
> 			}
>@@ -776,14 +781,19 @@ static void smc_set_option(const struct tcp_sock *tp,
> }
> 
> static void smc_set_option_cond(const struct tcp_sock *tp,
>-				const struct inet_request_sock *ireq,
>+				struct inet_request_sock *ireq,
> 				struct tcp_out_options *opts,
> 				unsigned int *remaining)
> {
> #if IS_ENABLED(CONFIG_SMC)
>+	const struct sock *sk = &tp->inet_conn.icsk_inet.sk;
> 	if (static_branch_unlikely(&tcp_have_smc)) {
> 		if (tp->syn_smc && ireq->smc_ok) {
>-			if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
>+			ireq->smc_ok = !!smc_call_retops(1, sk, set_option_cond,
>+							 tp, ireq);
>+			/* re-check smc_ok */
>+			if (ireq->smc_ok &&
>+			    *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
> 				opts->options |= OPTION_SMC;
> 				*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
> 			}
>diff --git a/net/smc/Kconfig b/net/smc/Kconfig
>index ba5e6a2dd2fd..27f35064d04c 100644
>--- a/net/smc/Kconfig
>+++ b/net/smc/Kconfig
>@@ -33,3 +33,15 @@ config SMC_LO
> 	  of architecture or hardware.
> 
> 	  if unsure, say N.
>+
>+config SMC_OPS
>+	bool "Generic hook for SMC subsystem"
>+	depends on SMC && BPF_SYSCALL
>+	default n
>+	help
>+	  SMC_OPS enables support to register generic hook via eBPF programs
>+	  for SMC subsystem. eBPF programs offer much greater flexibility
>+	  in modifying the behavior of the SMC protocol stack compared
>+	  to a complete kernel-based approach.
>+
>+	  if unsure, say N.

I'm still not completely satisfied with the name smc_ops. Since this
will be the API for our users, we need to be carefull on the name.

It seems like you're aiming to define a common set of operations, but
the implementation appears to be intertwined with BPF. If this is
intended to be a common interface, and if we are using another operation,
there shouldn’t be a need to hold a BPF reference.

As your 'help' sugguest, What about smc_hook ?

Best regards,
Dust
Gerd Bayer Jan. 23, 2025, 10:15 a.m. UTC | #2
On Thu, 2025-01-23 at 15:30 +0800, Dust Li wrote:
> On 2025-01-23 09:59:39, D. Wythe wrote:
> > The introduction of IPPROTO_SMC enables eBPF programs to determine
> > whether to use SMC based on the context of socket creation, such as
> > network namespaces, PID and comm name, etc.
> > 
> > As a subsequent enhancement, to introduce a new generic hook that
> > allows decisions on whether to use SMC or not at runtime, including
> > but not limited to local/remote IP address or ports.
> > 
> > Moreover, in the future, we can achieve more complex extensions to the
> > protocol stack by extending this ops.
> > 
> > Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
> > ---
> > include/net/netns/smc.h |  3 ++
> > include/net/smc.h       | 53 ++++++++++++++++++++++++
> > net/ipv4/tcp_output.c   | 18 +++++++--
> > net/smc/Kconfig         | 12 ++++++
> > net/smc/Makefile        |  1 +
> > net/smc/smc_ops.c       | 53 ++++++++++++++++++++++++
> > net/smc/smc_ops.h       | 28 +++++++++++++
> > net/smc/smc_sysctl.c    | 90 +++++++++++++++++++++++++++++++++++++++++
> > 8 files changed, 254 insertions(+), 4 deletions(-)
> > create mode 100644 net/smc/smc_ops.c
> > create mode 100644 net/smc/smc_ops.h
> > 
> > diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h
> > index fc752a50f91b..81b3fdb39cd2 100644
> > --- a/include/net/netns/smc.h
> > +++ b/include/net/netns/smc.h
> > @@ -17,6 +17,9 @@ struct netns_smc {
> > #ifdef CONFIG_SYSCTL
> > 	struct ctl_table_header		*smc_hdr;
> > #endif
> > +#if IS_ENABLED(CONFIG_SMC_OPS)
> > +	struct smc_ops __rcu		*ops;
> > +#endif /* CONFIG_SMC_OPS */
> > 	unsigned int			sysctl_autocorking_size;
> > 	unsigned int			sysctl_smcr_buf_type;
> > 	int				sysctl_smcr_testlink_time;
> > diff --git a/include/net/smc.h b/include/net/smc.h
> > index db84e4e35080..844f98a6296a 100644
> > --- a/include/net/smc.h
> > +++ b/include/net/smc.h
> > @@ -18,6 +18,8 @@
> > #include "linux/ism.h"
> > 
> > struct sock;
> > +struct tcp_sock;
> > +struct inet_request_sock;
> > 
> > #define SMC_MAX_PNETID_LEN	16	/* Max. length of PNET id */
> > 
> > @@ -97,4 +99,55 @@ struct smcd_dev {
> > 	u8 going_away : 1;
> > };
> > 
> > +#define  SMC_OPS_NAME_MAX 16
> > +
> > +enum {
> > +	/* ops can be inherit from init_net */
> > +	SMC_OPS_FLAG_INHERITABLE = 0x1,
> > +
> > +	SMC_OPS_ALL_FLAGS = SMC_OPS_FLAG_INHERITABLE,
> > +};
> > +
> > +struct smc_ops {
> > +	/* priavte */
> > +
> > +	struct list_head list;
> > +	struct module *owner;
> > +
> > +	/* public */
> > +
> > +	/* unique name */
> > +	char name[SMC_OPS_NAME_MAX];
> > +	int flags;
> > +
> > +	/* Invoked before computing SMC option for SYN packets.
> > +	 * We can control whether to set SMC options by returning varios value.
> > +	 * Return 0 to disable SMC, or return any other value to enable it.
> > +	 */
> > +	int (*set_option)(struct tcp_sock *tp);
> > +
> > +	/* Invoked before Set up SMC options for SYN-ACK packets
> > +	 * We can control whether to respond SMC options by returning varios
> > +	 * value. Return 0 to disable SMC, or return any other value to enable
> > +	 * it.
> > +	 */
> > +	int (*set_option_cond)(const struct tcp_sock *tp,
> > +			       struct inet_request_sock *ireq);
> > +};
> > +
> > +#if IS_ENABLED(CONFIG_SMC_OPS)
> > +#define smc_call_retops(init_val, sk, func, ...) ({	\
> > +	typeof(init_val) __ret = (init_val);		\
> > +	struct smc_ops *ops;				\
> > +	rcu_read_lock();				\
> > +	ops = READ_ONCE(sock_net(sk)->smc.ops);		\
> > +	if (ops && ops->func)				\
> > +		__ret = ops->func(__VA_ARGS__);		\
> > +	rcu_read_unlock();				\
> > +	__ret;						\
> > +})
> > +#else
> > +#define smc_call_retops(init_val, ...) (init_val)
> > +#endif /* CONFIG_SMC_OPS */
> > +
> > #endif	/* _SMC_H */
> > diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> > index 0e5b9a654254..f62e30b4ffc8 100644
> > --- a/net/ipv4/tcp_output.c
> > +++ b/net/ipv4/tcp_output.c
> > @@ -40,6 +40,7 @@
> > #include <net/tcp.h>
> > #include <net/mptcp.h>
> > #include <net/proto_memory.h>
> > +#include <net/smc.h>
> > 
> > #include <linux/compiler.h>
> > #include <linux/gfp.h>
> > @@ -759,14 +760,18 @@ static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
> > 	mptcp_options_write(th, ptr, tp, opts);
> > }
> > 
> > -static void smc_set_option(const struct tcp_sock *tp,
> > +static void smc_set_option(struct tcp_sock *tp,
> > 			   struct tcp_out_options *opts,
> > 			   unsigned int *remaining)
> > {
> > #if IS_ENABLED(CONFIG_SMC)
> > +	struct sock *sk = &tp->inet_conn.icsk_inet.sk;
> > 	if (static_branch_unlikely(&tcp_have_smc)) {
> > 		if (tp->syn_smc) {
> > -			if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
> > +			tp->syn_smc = !!smc_call_retops(1, sk, set_option, tp);
> > +			/* re-check syn_smc */
> > +			if (tp->syn_smc &&
> > +			    *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
> > 				opts->options |= OPTION_SMC;
> > 				*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
> > 			}
> > @@ -776,14 +781,19 @@ static void smc_set_option(const struct tcp_sock *tp,
> > }
> > 
> > static void smc_set_option_cond(const struct tcp_sock *tp,
> > -				const struct inet_request_sock *ireq,
> > +				struct inet_request_sock *ireq,
> > 				struct tcp_out_options *opts,
> > 				unsigned int *remaining)
> > {
> > #if IS_ENABLED(CONFIG_SMC)
> > +	const struct sock *sk = &tp->inet_conn.icsk_inet.sk;
> > 	if (static_branch_unlikely(&tcp_have_smc)) {
> > 		if (tp->syn_smc && ireq->smc_ok) {
> > -			if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
> > +			ireq->smc_ok = !!smc_call_retops(1, sk, set_option_cond,
> > +							 tp, ireq);
> > +			/* re-check smc_ok */
> > +			if (ireq->smc_ok &&
> > +			    *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
> > 				opts->options |= OPTION_SMC;
> > 				*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
> > 			}
> > diff --git a/net/smc/Kconfig b/net/smc/Kconfig
> > index ba5e6a2dd2fd..27f35064d04c 100644
> > --- a/net/smc/Kconfig
> > +++ b/net/smc/Kconfig
> > @@ -33,3 +33,15 @@ config SMC_LO
> > 	  of architecture or hardware.
> > 
> > 	  if unsure, say N.
> > +
> > +config SMC_OPS
> > +	bool "Generic hook for SMC subsystem"
> > +	depends on SMC && BPF_SYSCALL
> > +	default n
> > +	help
> > +	  SMC_OPS enables support to register generic hook via eBPF programs
> > +	  for SMC subsystem. eBPF programs offer much greater flexibility
> > +	  in modifying the behavior of the SMC protocol stack compared
> > +	  to a complete kernel-based approach.
> > +
> > +	  if unsure, say N.
> 
> I'm still not completely satisfied with the name smc_ops. Since this
> will be the API for our users, we need to be carefull on the name.

If I may jump in with a suggestion here:
On my first glance, I'd expect SMC_OPS to offer OPS as a general API.
The description however suggest that this adds "contol points" or hooks
in the SMC code, that eBPF programs can use to tweak the protocol's
behavior. Exclusively eBPF programs, it seems.

So how about naming this SMC_EBPF_HOOKS or SMC_EBPF_SUPPORT?

Just my 2ct,
Gerd

> 
> It seems like you're aiming to define a common set of operations, but
> the implementation appears to be intertwined with BPF. If this is
> intended to be a common interface, and if we are using another operation,
> there shouldn’t be a need to hold a BPF reference.
> 
> As your 'help' sugguest, What about smc_hook ?
> 
> Best regards,
> Dust
> 
>
diff mbox series

Patch

diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h
index fc752a50f91b..81b3fdb39cd2 100644
--- a/include/net/netns/smc.h
+++ b/include/net/netns/smc.h
@@ -17,6 +17,9 @@  struct netns_smc {
 #ifdef CONFIG_SYSCTL
 	struct ctl_table_header		*smc_hdr;
 #endif
+#if IS_ENABLED(CONFIG_SMC_OPS)
+	struct smc_ops __rcu		*ops;
+#endif /* CONFIG_SMC_OPS */
 	unsigned int			sysctl_autocorking_size;
 	unsigned int			sysctl_smcr_buf_type;
 	int				sysctl_smcr_testlink_time;
diff --git a/include/net/smc.h b/include/net/smc.h
index db84e4e35080..844f98a6296a 100644
--- a/include/net/smc.h
+++ b/include/net/smc.h
@@ -18,6 +18,8 @@ 
 #include "linux/ism.h"
 
 struct sock;
+struct tcp_sock;
+struct inet_request_sock;
 
 #define SMC_MAX_PNETID_LEN	16	/* Max. length of PNET id */
 
@@ -97,4 +99,55 @@  struct smcd_dev {
 	u8 going_away : 1;
 };
 
+#define  SMC_OPS_NAME_MAX 16
+
+enum {
+	/* ops can be inherit from init_net */
+	SMC_OPS_FLAG_INHERITABLE = 0x1,
+
+	SMC_OPS_ALL_FLAGS = SMC_OPS_FLAG_INHERITABLE,
+};
+
+struct smc_ops {
+	/* priavte */
+
+	struct list_head list;
+	struct module *owner;
+
+	/* public */
+
+	/* unique name */
+	char name[SMC_OPS_NAME_MAX];
+	int flags;
+
+	/* Invoked before computing SMC option for SYN packets.
+	 * We can control whether to set SMC options by returning varios value.
+	 * Return 0 to disable SMC, or return any other value to enable it.
+	 */
+	int (*set_option)(struct tcp_sock *tp);
+
+	/* Invoked before Set up SMC options for SYN-ACK packets
+	 * We can control whether to respond SMC options by returning varios
+	 * value. Return 0 to disable SMC, or return any other value to enable
+	 * it.
+	 */
+	int (*set_option_cond)(const struct tcp_sock *tp,
+			       struct inet_request_sock *ireq);
+};
+
+#if IS_ENABLED(CONFIG_SMC_OPS)
+#define smc_call_retops(init_val, sk, func, ...) ({	\
+	typeof(init_val) __ret = (init_val);		\
+	struct smc_ops *ops;				\
+	rcu_read_lock();				\
+	ops = READ_ONCE(sock_net(sk)->smc.ops);		\
+	if (ops && ops->func)				\
+		__ret = ops->func(__VA_ARGS__);		\
+	rcu_read_unlock();				\
+	__ret;						\
+})
+#else
+#define smc_call_retops(init_val, ...) (init_val)
+#endif /* CONFIG_SMC_OPS */
+
 #endif	/* _SMC_H */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0e5b9a654254..f62e30b4ffc8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -40,6 +40,7 @@ 
 #include <net/tcp.h>
 #include <net/mptcp.h>
 #include <net/proto_memory.h>
+#include <net/smc.h>
 
 #include <linux/compiler.h>
 #include <linux/gfp.h>
@@ -759,14 +760,18 @@  static void tcp_options_write(struct tcphdr *th, struct tcp_sock *tp,
 	mptcp_options_write(th, ptr, tp, opts);
 }
 
-static void smc_set_option(const struct tcp_sock *tp,
+static void smc_set_option(struct tcp_sock *tp,
 			   struct tcp_out_options *opts,
 			   unsigned int *remaining)
 {
 #if IS_ENABLED(CONFIG_SMC)
+	struct sock *sk = &tp->inet_conn.icsk_inet.sk;
 	if (static_branch_unlikely(&tcp_have_smc)) {
 		if (tp->syn_smc) {
-			if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+			tp->syn_smc = !!smc_call_retops(1, sk, set_option, tp);
+			/* re-check syn_smc */
+			if (tp->syn_smc &&
+			    *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
 				opts->options |= OPTION_SMC;
 				*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
 			}
@@ -776,14 +781,19 @@  static void smc_set_option(const struct tcp_sock *tp,
 }
 
 static void smc_set_option_cond(const struct tcp_sock *tp,
-				const struct inet_request_sock *ireq,
+				struct inet_request_sock *ireq,
 				struct tcp_out_options *opts,
 				unsigned int *remaining)
 {
 #if IS_ENABLED(CONFIG_SMC)
+	const struct sock *sk = &tp->inet_conn.icsk_inet.sk;
 	if (static_branch_unlikely(&tcp_have_smc)) {
 		if (tp->syn_smc && ireq->smc_ok) {
-			if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
+			ireq->smc_ok = !!smc_call_retops(1, sk, set_option_cond,
+							 tp, ireq);
+			/* re-check smc_ok */
+			if (ireq->smc_ok &&
+			    *remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) {
 				opts->options |= OPTION_SMC;
 				*remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED;
 			}
diff --git a/net/smc/Kconfig b/net/smc/Kconfig
index ba5e6a2dd2fd..27f35064d04c 100644
--- a/net/smc/Kconfig
+++ b/net/smc/Kconfig
@@ -33,3 +33,15 @@  config SMC_LO
 	  of architecture or hardware.
 
 	  if unsure, say N.
+
+config SMC_OPS
+	bool "Generic hook for SMC subsystem"
+	depends on SMC && BPF_SYSCALL
+	default n
+	help
+	  SMC_OPS enables support to register generic hook via eBPF programs
+	  for SMC subsystem. eBPF programs offer much greater flexibility
+	  in modifying the behavior of the SMC protocol stack compared
+	  to a complete kernel-based approach.
+
+	  if unsure, say N.
diff --git a/net/smc/Makefile b/net/smc/Makefile
index 60f1c87d5212..5dd706b2927a 100644
--- a/net/smc/Makefile
+++ b/net/smc/Makefile
@@ -7,3 +7,4 @@  smc-y += smc_cdc.o smc_tx.o smc_rx.o smc_close.o smc_ism.o smc_netlink.o smc_sta
 smc-y += smc_tracepoint.o smc_inet.o
 smc-$(CONFIG_SYSCTL) += smc_sysctl.o
 smc-$(CONFIG_SMC_LO) += smc_loopback.o
+smc-$(CONFIG_SMC_OPS) += smc_ops.o
\ No newline at end of file
diff --git a/net/smc/smc_ops.c b/net/smc/smc_ops.c
new file mode 100644
index 000000000000..86c71f6c5ea6
--- /dev/null
+++ b/net/smc/smc_ops.c
@@ -0,0 +1,53 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ *  Generic hook for SMC subsystem.
+ *
+ *  Copyright IBM Corp. 2016
+ *  Copyright (c) 2024, Alibaba Inc.
+ *
+ *  Author: D. Wythe <alibuda@linux.alibaba.com>
+ */
+
+#include <linux/rculist.h>
+
+#include "smc_ops.h"
+
+static DEFINE_SPINLOCK(smc_ops_list_lock);
+static LIST_HEAD(smc_ops_list);
+
+int smc_ops_reg(struct smc_ops *ops)
+{
+	int ret = 0;
+
+	spin_lock(&smc_ops_list_lock);
+	/* already exist or duplicate name */
+	if (smc_ops_find_by_name(ops->name))
+		ret = -EEXIST;
+	else
+		list_add_tail_rcu(&ops->list, &smc_ops_list);
+	spin_unlock(&smc_ops_list_lock);
+	return ret;
+}
+
+void smc_ops_unreg(struct smc_ops *ops)
+{
+	spin_lock(&smc_ops_list_lock);
+	list_del_rcu(&ops->list);
+	spin_unlock(&smc_ops_list_lock);
+
+	/* Ensure that all readers to complete */
+	synchronize_rcu();
+}
+
+struct smc_ops *smc_ops_find_by_name(const char *name)
+{
+	struct smc_ops *ops;
+
+	list_for_each_entry_rcu(ops, &smc_ops_list, list) {
+		if (strcmp(ops->name, name) == 0)
+			return ops;
+	}
+	return NULL;
+}
diff --git a/net/smc/smc_ops.h b/net/smc/smc_ops.h
new file mode 100644
index 000000000000..24f094464b45
--- /dev/null
+++ b/net/smc/smc_ops.h
@@ -0,0 +1,28 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  Shared Memory Communications over RDMA (SMC-R) and RoCE
+ *
+ *  Generic hook for SMC subsystem.
+ *
+ *  Copyright IBM Corp. 2016
+ *  Copyright (c) 2024, Alibaba Inc.
+ *
+ *  Author: D. Wythe <alibuda@linux.alibaba.com>
+ */
+
+#ifndef __SMC_OPS
+#define __SMC_OPS
+
+#include <net/smc.h>
+
+int smc_ops_reg(struct smc_ops *ops);
+void smc_ops_unreg(struct smc_ops *ops);
+
+/* Find ops by the target name, which required to be a c-string.
+ * Return NULL if no such ops was found,otherwise, return a valid ops.
+ *
+ * Note: Caller MUST ensure it's was invoked under rcu_read_lock.
+ */
+struct smc_ops *smc_ops_find_by_name(const char *name);
+
+#endif /* __SMC_OPS */
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index 2fab6456f765..539058992adc 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -18,6 +18,7 @@ 
 #include "smc_core.h"
 #include "smc_llc.h"
 #include "smc_sysctl.h"
+#include "smc_ops.h"
 
 static int min_sndbuf = SMC_BUF_MIN_SIZE;
 static int min_rcvbuf = SMC_BUF_MIN_SIZE;
@@ -30,6 +31,69 @@  static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX;
 static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN;
 static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX;
 
+#if IS_ENABLED(CONFIG_SMC_OPS)
+static int smc_net_replace_smc_ops(struct net *net, const char *name)
+{
+	struct smc_ops *ops = NULL;
+
+	rcu_read_lock();
+	/* null or empty name ask to clear current ops */
+	if (name && name[0]) {
+		ops = smc_ops_find_by_name(name);
+		if (!ops) {
+			rcu_read_unlock();
+			return -EINVAL;
+		}
+		/* no change, just return */
+		if (ops == rcu_dereference(net->smc.ops)) {
+			rcu_read_unlock();
+			return 0;
+		}
+		if (!bpf_try_module_get(ops, ops->owner)) {
+			rcu_read_unlock();
+			return -EBUSY;
+		}
+	}
+	/* xhcg old ops with the new one atomically */
+	ops = xchg(&net->smc.ops, ops);
+	/* release old ops */
+	if (ops)
+		bpf_module_put(ops, ops->owner);
+
+	rcu_read_unlock();
+	return 0;
+}
+
+static int proc_smc_ops(const struct ctl_table *ctl, int write,
+			void *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct net *net = container_of(ctl->data, struct net, smc.ops);
+	char val[SMC_OPS_NAME_MAX];
+	const struct ctl_table tbl = {
+		.data = val,
+		.maxlen = SMC_OPS_NAME_MAX,
+	};
+	struct smc_ops *ops;
+	int ret;
+
+	rcu_read_lock();
+	ops = rcu_dereference(net->smc.ops);
+	if (ops)
+		memcpy(val, ops->name, sizeof(ops->name));
+	else
+		val[0] = '\0';
+	rcu_read_unlock();
+
+	ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+	if (ret)
+		return ret;
+
+	if (write)
+		ret = smc_net_replace_smc_ops(net, val);
+	return ret;
+}
+#endif /* CONFIG_SMC_OPS */
+
 static struct ctl_table smc_table[] = {
 	{
 		.procname       = "autocorking_size",
@@ -99,6 +163,15 @@  static struct ctl_table smc_table[] = {
 		.extra1		= SYSCTL_ZERO,
 		.extra2		= SYSCTL_ONE,
 	},
+#if IS_ENABLED(CONFIG_SMC_OPS)
+	{
+		.procname	= "ops",
+		.data		= &init_net.smc.ops,
+		.mode		= 0644,
+		.maxlen		= SMC_OPS_NAME_MAX,
+		.proc_handler	= proc_smc_ops,
+	},
+#endif /* CONFIG_SMC_OPS */
 };
 
 int __net_init smc_sysctl_net_init(struct net *net)
@@ -109,6 +182,16 @@  int __net_init smc_sysctl_net_init(struct net *net)
 	table = smc_table;
 	if (!net_eq(net, &init_net)) {
 		int i;
+#if IS_ENABLED(CONFIG_SMC_OPS)
+		struct smc_ops *ops;
+
+		rcu_read_lock();
+		ops = rcu_dereference(init_net.smc.ops);
+		if (ops && ops->flags & SMC_OPS_FLAG_INHERITABLE &&
+		    bpf_try_module_get(ops, ops->owner))
+			rcu_assign_pointer(net->smc.ops, ops);
+		rcu_read_unlock();
+#endif /* CONFIG_SMC_OPS */
 
 		table = kmemdup(table, sizeof(smc_table), GFP_KERNEL);
 		if (!table)
@@ -139,6 +222,9 @@  int __net_init smc_sysctl_net_init(struct net *net)
 	if (!net_eq(net, &init_net))
 		kfree(table);
 err_alloc:
+#if IS_ENABLED(CONFIG_SMC_OPS)
+	smc_net_replace_smc_ops(net, NULL);
+#endif /* CONFIG_SMC_OPS */
 	return -ENOMEM;
 }
 
@@ -148,6 +234,10 @@  void __net_exit smc_sysctl_net_exit(struct net *net)
 
 	table = net->smc.smc_hdr->ctl_table_arg;
 	unregister_net_sysctl_table(net->smc.smc_hdr);
+#if IS_ENABLED(CONFIG_SMC_OPS)
+	smc_net_replace_smc_ops(net, NULL);
+#endif /* CONFIG_SMC_OPS */
+
 	if (!net_eq(net, &init_net))
 		kfree(table);
 }