diff mbox series

[RFC,v3,iproute2-next] devlink: Add shared memory pool eswitch attribute

Message ID 20240306232922.8249-1-witu@nvidia.com (mailing list archive)
State RFC
Delegated to: David Ahern
Headers show
Series [RFC,v3,iproute2-next] devlink: Add shared memory pool eswitch attribute | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

William Tu March 6, 2024, 11:29 p.m. UTC
Add eswitch attribute spool_size for shared memory pool size.

When using switchdev mode, the representor ports handles the slow path
traffic, the traffic that can't be offloaded will be redirected to the
representor port for processing. Memory consumption of the representor
port's rx buffer can grow to several GB when scaling to 1k VFs reps.
For example, in mlx5 driver, each RQ, with a typical 1K descriptors,
consumes 3MB of DMA memory for packet buffer in WQEs, and with four
channels, it consumes 4 * 3MB * 1024 = 12GB of memory. And since rep
ports are for slow path traffic, most of these rx DMA memory are idle.

Add spool_size configuration, allowing multiple representor ports
to share a rx memory buffer pool. When enabled, individual representor
doesn't need to allocate its dedicated rx buffer, but just pointing
its rq to the memory pool. This could make the memory being better
utilized. The spool_size represents the number of bytes of the memory
pool. Users can adjust it based on how many reps, total system
memory, or performance expectation.

An example use case:
$ devlink dev eswitch set pci/0000:08:00.0 mode switchdev \
  spool-size 4096000
$ devlink dev eswitch show pci/0000:08:00.0
  pci/0000:08:00.0: mode legacy inline-mode none encap-mode basic \
  spool-size 4096000

Disable the shared memory pool by setting spool_size to 0.

Signed-off-by: William Tu <witu@nvidia.com>
---
v3:
- change to 1 attributes and rename to spool-size

v2: feedback from Stephen
- add man page, send to iproute2-next
---
 devlink/devlink.c            | 25 +++++++++++++++++++++++--
 include/uapi/linux/devlink.h |  1 +
 man/man8/devlink-dev.8       |  6 ++++++
 3 files changed, 30 insertions(+), 2 deletions(-)

Comments

Jiri Pirko March 7, 2024, 8:36 a.m. UTC | #1
Thu, Mar 07, 2024 at 12:29:22AM CET, witu@nvidia.com wrote:
>Add eswitch attribute spool_size for shared memory pool size.
>
>When using switchdev mode, the representor ports handles the slow path
>traffic, the traffic that can't be offloaded will be redirected to the
>representor port for processing. Memory consumption of the representor
>port's rx buffer can grow to several GB when scaling to 1k VFs reps.
>For example, in mlx5 driver, each RQ, with a typical 1K descriptors,
>consumes 3MB of DMA memory for packet buffer in WQEs, and with four
>channels, it consumes 4 * 3MB * 1024 = 12GB of memory. And since rep
>ports are for slow path traffic, most of these rx DMA memory are idle.
>
>Add spool_size configuration, allowing multiple representor ports
>to share a rx memory buffer pool. When enabled, individual representor
>doesn't need to allocate its dedicated rx buffer, but just pointing
>its rq to the memory pool. This could make the memory being better
>utilized. The spool_size represents the number of bytes of the memory
>pool. Users can adjust it based on how many reps, total system
>memory, or performance expectation.
>
>An example use case:
>$ devlink dev eswitch set pci/0000:08:00.0 mode switchdev \
>  spool-size 4096000
>$ devlink dev eswitch show pci/0000:08:00.0
>  pci/0000:08:00.0: mode legacy inline-mode none encap-mode basic \
>  spool-size 4096000
>
>Disable the shared memory pool by setting spool_size to 0.
>
>Signed-off-by: William Tu <witu@nvidia.com>
>---
>v3:
>- change to 1 attributes and rename to spool-size
>
>v2: feedback from Stephen
>- add man page, send to iproute2-next
>---
> devlink/devlink.c            | 25 +++++++++++++++++++++++--
> include/uapi/linux/devlink.h |  1 +
> man/man8/devlink-dev.8       |  6 ++++++
> 3 files changed, 30 insertions(+), 2 deletions(-)
>
>diff --git a/devlink/devlink.c b/devlink/devlink.c
>index dbeb6e397e8e..5ad789caa934 100644
>--- a/devlink/devlink.c
>+++ b/devlink/devlink.c
>@@ -309,6 +309,7 @@ static int ifname_map_update(struct ifname_map *ifname_map, const char *ifname)
> #define DL_OPT_PORT_FN_RATE_TX_PRIORITY	BIT(55)
> #define DL_OPT_PORT_FN_RATE_TX_WEIGHT	BIT(56)
> #define DL_OPT_PORT_FN_CAPS	BIT(57)
>+#define DL_OPT_ESWITCH_SPOOL_SIZE	BIT(58)
> 
> struct dl_opts {
> 	uint64_t present; /* flags of present items */
>@@ -375,6 +376,7 @@ struct dl_opts {
> 	const char *linecard_type;
> 	bool selftests_opt[DEVLINK_ATTR_SELFTEST_ID_MAX + 1];
> 	struct nla_bitfield32 port_fn_caps;
>+	uint32_t eswitch_spool_size;
> };
> 
> struct dl {
>@@ -630,6 +632,7 @@ static const enum mnl_attr_data_type devlink_policy[DEVLINK_ATTR_MAX + 1] = {
> 	[DEVLINK_ATTR_ESWITCH_MODE] = MNL_TYPE_U16,
> 	[DEVLINK_ATTR_ESWITCH_INLINE_MODE] = MNL_TYPE_U8,
> 	[DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = MNL_TYPE_U8,
>+	[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE] = MNL_TYPE_U32,
> 	[DEVLINK_ATTR_DPIPE_TABLES] = MNL_TYPE_NESTED,
> 	[DEVLINK_ATTR_DPIPE_TABLE] = MNL_TYPE_NESTED,
> 	[DEVLINK_ATTR_DPIPE_TABLE_NAME] = MNL_TYPE_STRING,
>@@ -1672,6 +1675,7 @@ static const struct dl_args_metadata dl_args_required[] = {
> 	{DL_OPT_LINECARD,	      "Linecard index expected."},
> 	{DL_OPT_LINECARD_TYPE,	      "Linecard type expected."},
> 	{DL_OPT_SELFTESTS,            "Test name is expected"},
>+	{DL_OPT_ESWITCH_SPOOL_SIZE,   "E-Switch shared memory pool size expected."},
> };
> 
> static int dl_args_finding_required_validate(uint64_t o_required,
>@@ -1895,6 +1899,13 @@ static int dl_argv_parse(struct dl *dl, uint64_t o_required,
> 			if (err)
> 				return err;
> 			o_found |= DL_OPT_ESWITCH_ENCAP_MODE;
>+		} else if (dl_argv_match(dl, "spool-size") &&
>+			   (o_all & DL_OPT_ESWITCH_SPOOL_SIZE)) {
>+			dl_arg_inc(dl);
>+			err = dl_argv_uint32_t(dl, &opts->eswitch_spool_size);
>+			if (err)
>+				return err;
>+			o_found |= DL_OPT_ESWITCH_SPOOL_SIZE;
> 		} else if (dl_argv_match(dl, "path") &&
> 			   (o_all & DL_OPT_RESOURCE_PATH)) {
> 			dl_arg_inc(dl);
>@@ -2547,6 +2558,9 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl)
> 	if (opts->present & DL_OPT_ESWITCH_ENCAP_MODE)
> 		mnl_attr_put_u8(nlh, DEVLINK_ATTR_ESWITCH_ENCAP_MODE,
> 				opts->eswitch_encap_mode);
>+	if (opts->present & DL_OPT_ESWITCH_SPOOL_SIZE)
>+		mnl_attr_put_u32(nlh, DEVLINK_ATTR_ESWITCH_SPOOL_SIZE,
>+				 opts->eswitch_spool_size);
> 	if ((opts->present & DL_OPT_RESOURCE_PATH) && opts->resource_id_valid)
> 		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_ID,
> 				 opts->resource_id);
>@@ -2707,6 +2721,7 @@ static void cmd_dev_help(void)
> 	pr_err("       devlink dev eswitch set DEV [ mode { legacy | switchdev } ]\n");
> 	pr_err("                               [ inline-mode { none | link | network | transport } ]\n");
> 	pr_err("                               [ encap-mode { none | basic } ]\n");
>+	pr_err("                               [ spool-size { SIZE } ]\n");
> 	pr_err("       devlink dev eswitch show DEV\n");
> 	pr_err("       devlink dev param set DEV name PARAMETER value VALUE cmode { permanent | driverinit | runtime }\n");
> 	pr_err("       devlink dev param show [DEV name PARAMETER]\n");
>@@ -3194,7 +3209,12 @@ static void pr_out_eswitch(struct dl *dl, struct nlattr **tb)
> 			     eswitch_encap_mode_name(mnl_attr_get_u8(
> 				    tb[DEVLINK_ATTR_ESWITCH_ENCAP_MODE])));
> 	}
>-
>+	if (tb[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE]) {
>+		check_indent_newline(dl);
>+		print_uint(PRINT_ANY, "spool-size", "spool-size %u",
>+			   mnl_attr_get_u32(
>+				    tb[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE]));
>+	}
> 	pr_out_handle_end(dl);
> }
> 
>@@ -3239,7 +3259,8 @@ static int cmd_dev_eswitch_set(struct dl *dl)
> 	err = dl_argv_parse(dl, DL_OPT_HANDLE,
> 			    DL_OPT_ESWITCH_MODE |
> 			    DL_OPT_ESWITCH_INLINE_MODE |
>-			    DL_OPT_ESWITCH_ENCAP_MODE);
>+			    DL_OPT_ESWITCH_ENCAP_MODE |
>+			    DL_OPT_ESWITCH_SPOOL_SIZE);
> 	if (err)
> 		return err;
> 
>diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
>index e77170199815..c750e29a1c5c 100644
>--- a/include/uapi/linux/devlink.h
>+++ b/include/uapi/linux/devlink.h
>@@ -614,6 +614,7 @@ enum devlink_attr {
> 
> 	DEVLINK_ATTR_REGION_DIRECT,		/* flag */
> 
>+	DEVLINK_ATTR_ESWITCH_SPOOL_SIZE,	/* u32 */
> 	/* add new attributes above here, update the policy in devlink.c */
> 
> 	__DEVLINK_ATTR_MAX,
>diff --git a/man/man8/devlink-dev.8 b/man/man8/devlink-dev.8
>index e9d091df48d8..081cc8740f8b 100644
>--- a/man/man8/devlink-dev.8
>+++ b/man/man8/devlink-dev.8
>@@ -34,6 +34,8 @@ devlink-dev \- devlink device configuration
> .BR inline-mode " { " none " | " link " | " network " | " transport " } "
> ] [
> .BR encap-mode " { " none " | " basic " } "
>+] [
>+.BR spool-size " { SIZE } "
> ]
> 
> .ti -8
>@@ -151,6 +153,10 @@ Set eswitch encapsulation support
> .I basic
> - Enable encapsulation support
> 
>+.TP
>+.BR spool-size " SIZE"
>+Set the rx shared memory pool size in bytes.

Be more verbose please. Describe the cmd line option properly.


>+
> .SS devlink dev param set  - set new value to devlink device configuration parameter
> 
> .TP
>-- 
>2.38.1
>
>
Jiri Pirko March 7, 2024, 8:37 a.m. UTC | #2
Thu, Mar 07, 2024 at 12:29:22AM CET, witu@nvidia.com wrote:
>Add eswitch attribute spool_size for shared memory pool size.
>
>When using switchdev mode, the representor ports handles the slow path
>traffic, the traffic that can't be offloaded will be redirected to the
>representor port for processing. Memory consumption of the representor
>port's rx buffer can grow to several GB when scaling to 1k VFs reps.
>For example, in mlx5 driver, each RQ, with a typical 1K descriptors,
>consumes 3MB of DMA memory for packet buffer in WQEs, and with four
>channels, it consumes 4 * 3MB * 1024 = 12GB of memory. And since rep
>ports are for slow path traffic, most of these rx DMA memory are idle.
>
>Add spool_size configuration, allowing multiple representor ports
>to share a rx memory buffer pool. When enabled, individual representor
>doesn't need to allocate its dedicated rx buffer, but just pointing
>its rq to the memory pool. This could make the memory being better
>utilized. The spool_size represents the number of bytes of the memory
>pool. Users can adjust it based on how many reps, total system
>memory, or performance expectation.
>
>An example use case:
>$ devlink dev eswitch set pci/0000:08:00.0 mode switchdev \
>  spool-size 4096000
>$ devlink dev eswitch show pci/0000:08:00.0
>  pci/0000:08:00.0: mode legacy inline-mode none encap-mode basic \
>  spool-size 4096000
>
>Disable the shared memory pool by setting spool_size to 0.
>
>Signed-off-by: William Tu <witu@nvidia.com>
>---
>v3:
>- change to 1 attributes and rename to spool-size
>
>v2: feedback from Stephen
>- add man page, send to iproute2-next
>---
> devlink/devlink.c            | 25 +++++++++++++++++++++++--
> include/uapi/linux/devlink.h |  1 +
> man/man8/devlink-dev.8       |  6 ++++++
> 3 files changed, 30 insertions(+), 2 deletions(-)
>
>diff --git a/devlink/devlink.c b/devlink/devlink.c
>index dbeb6e397e8e..5ad789caa934 100644
>--- a/devlink/devlink.c
>+++ b/devlink/devlink.c
>@@ -309,6 +309,7 @@ static int ifname_map_update(struct ifname_map *ifname_map, const char *ifname)
> #define DL_OPT_PORT_FN_RATE_TX_PRIORITY	BIT(55)
> #define DL_OPT_PORT_FN_RATE_TX_WEIGHT	BIT(56)
> #define DL_OPT_PORT_FN_CAPS	BIT(57)
>+#define DL_OPT_ESWITCH_SPOOL_SIZE	BIT(58)
> 
> struct dl_opts {
> 	uint64_t present; /* flags of present items */
>@@ -375,6 +376,7 @@ struct dl_opts {
> 	const char *linecard_type;
> 	bool selftests_opt[DEVLINK_ATTR_SELFTEST_ID_MAX + 1];
> 	struct nla_bitfield32 port_fn_caps;
>+	uint32_t eswitch_spool_size;
> };
> 
> struct dl {
>@@ -630,6 +632,7 @@ static const enum mnl_attr_data_type devlink_policy[DEVLINK_ATTR_MAX + 1] = {
> 	[DEVLINK_ATTR_ESWITCH_MODE] = MNL_TYPE_U16,
> 	[DEVLINK_ATTR_ESWITCH_INLINE_MODE] = MNL_TYPE_U8,
> 	[DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = MNL_TYPE_U8,
>+	[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE] = MNL_TYPE_U32,
> 	[DEVLINK_ATTR_DPIPE_TABLES] = MNL_TYPE_NESTED,
> 	[DEVLINK_ATTR_DPIPE_TABLE] = MNL_TYPE_NESTED,
> 	[DEVLINK_ATTR_DPIPE_TABLE_NAME] = MNL_TYPE_STRING,
>@@ -1672,6 +1675,7 @@ static const struct dl_args_metadata dl_args_required[] = {
> 	{DL_OPT_LINECARD,	      "Linecard index expected."},
> 	{DL_OPT_LINECARD_TYPE,	      "Linecard type expected."},
> 	{DL_OPT_SELFTESTS,            "Test name is expected"},
>+	{DL_OPT_ESWITCH_SPOOL_SIZE,   "E-Switch shared memory pool size expected."},
> };
> 
> static int dl_args_finding_required_validate(uint64_t o_required,
>@@ -1895,6 +1899,13 @@ static int dl_argv_parse(struct dl *dl, uint64_t o_required,
> 			if (err)
> 				return err;
> 			o_found |= DL_OPT_ESWITCH_ENCAP_MODE;
>+		} else if (dl_argv_match(dl, "spool-size") &&
>+			   (o_all & DL_OPT_ESWITCH_SPOOL_SIZE)) {
>+			dl_arg_inc(dl);
>+			err = dl_argv_uint32_t(dl, &opts->eswitch_spool_size);
>+			if (err)
>+				return err;
>+			o_found |= DL_OPT_ESWITCH_SPOOL_SIZE;
> 		} else if (dl_argv_match(dl, "path") &&
> 			   (o_all & DL_OPT_RESOURCE_PATH)) {
> 			dl_arg_inc(dl);
>@@ -2547,6 +2558,9 @@ static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl)
> 	if (opts->present & DL_OPT_ESWITCH_ENCAP_MODE)
> 		mnl_attr_put_u8(nlh, DEVLINK_ATTR_ESWITCH_ENCAP_MODE,
> 				opts->eswitch_encap_mode);
>+	if (opts->present & DL_OPT_ESWITCH_SPOOL_SIZE)
>+		mnl_attr_put_u32(nlh, DEVLINK_ATTR_ESWITCH_SPOOL_SIZE,
>+				 opts->eswitch_spool_size);
> 	if ((opts->present & DL_OPT_RESOURCE_PATH) && opts->resource_id_valid)
> 		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_ID,
> 				 opts->resource_id);
>@@ -2707,6 +2721,7 @@ static void cmd_dev_help(void)
> 	pr_err("       devlink dev eswitch set DEV [ mode { legacy | switchdev } ]\n");
> 	pr_err("                               [ inline-mode { none | link | network | transport } ]\n");
> 	pr_err("                               [ encap-mode { none | basic } ]\n");
>+	pr_err("                               [ spool-size { SIZE } ]\n");


SIZE should not be in brackets, it's not an enum selection:
	pr_err("                               [ spool-size SIZE ]\n");
Same in man



> 	pr_err("       devlink dev eswitch show DEV\n");
> 	pr_err("       devlink dev param set DEV name PARAMETER value VALUE cmode { permanent | driverinit | runtime }\n");
> 	pr_err("       devlink dev param show [DEV name PARAMETER]\n");
>@@ -3194,7 +3209,12 @@ static void pr_out_eswitch(struct dl *dl, struct nlattr **tb)
> 			     eswitch_encap_mode_name(mnl_attr_get_u8(
> 				    tb[DEVLINK_ATTR_ESWITCH_ENCAP_MODE])));
> 	}
>-
>+	if (tb[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE]) {
>+		check_indent_newline(dl);
>+		print_uint(PRINT_ANY, "spool-size", "spool-size %u",
>+			   mnl_attr_get_u32(
>+				    tb[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE]));
>+	}
> 	pr_out_handle_end(dl);
> }
> 
>@@ -3239,7 +3259,8 @@ static int cmd_dev_eswitch_set(struct dl *dl)
> 	err = dl_argv_parse(dl, DL_OPT_HANDLE,
> 			    DL_OPT_ESWITCH_MODE |
> 			    DL_OPT_ESWITCH_INLINE_MODE |
>-			    DL_OPT_ESWITCH_ENCAP_MODE);
>+			    DL_OPT_ESWITCH_ENCAP_MODE |
>+			    DL_OPT_ESWITCH_SPOOL_SIZE);
> 	if (err)
> 		return err;
> 
>diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
>index e77170199815..c750e29a1c5c 100644
>--- a/include/uapi/linux/devlink.h
>+++ b/include/uapi/linux/devlink.h
>@@ -614,6 +614,7 @@ enum devlink_attr {
> 
> 	DEVLINK_ATTR_REGION_DIRECT,		/* flag */
> 
>+	DEVLINK_ATTR_ESWITCH_SPOOL_SIZE,	/* u32 */
> 	/* add new attributes above here, update the policy in devlink.c */
> 
> 	__DEVLINK_ATTR_MAX,
>diff --git a/man/man8/devlink-dev.8 b/man/man8/devlink-dev.8
>index e9d091df48d8..081cc8740f8b 100644
>--- a/man/man8/devlink-dev.8
>+++ b/man/man8/devlink-dev.8
>@@ -34,6 +34,8 @@ devlink-dev \- devlink device configuration
> .BR inline-mode " { " none " | " link " | " network " | " transport " } "
> ] [
> .BR encap-mode " { " none " | " basic " } "
>+] [
>+.BR spool-size " { SIZE } "
> ]
> 
> .ti -8
>@@ -151,6 +153,10 @@ Set eswitch encapsulation support
> .I basic
> - Enable encapsulation support
> 
>+.TP
>+.BR spool-size " SIZE"
>+Set the rx shared memory pool size in bytes.
>+
> .SS devlink dev param set  - set new value to devlink device configuration parameter
> 
> .TP
>-- 
>2.38.1
>
>
diff mbox series

Patch

diff --git a/devlink/devlink.c b/devlink/devlink.c
index dbeb6e397e8e..5ad789caa934 100644
--- a/devlink/devlink.c
+++ b/devlink/devlink.c
@@ -309,6 +309,7 @@  static int ifname_map_update(struct ifname_map *ifname_map, const char *ifname)
 #define DL_OPT_PORT_FN_RATE_TX_PRIORITY	BIT(55)
 #define DL_OPT_PORT_FN_RATE_TX_WEIGHT	BIT(56)
 #define DL_OPT_PORT_FN_CAPS	BIT(57)
+#define DL_OPT_ESWITCH_SPOOL_SIZE	BIT(58)
 
 struct dl_opts {
 	uint64_t present; /* flags of present items */
@@ -375,6 +376,7 @@  struct dl_opts {
 	const char *linecard_type;
 	bool selftests_opt[DEVLINK_ATTR_SELFTEST_ID_MAX + 1];
 	struct nla_bitfield32 port_fn_caps;
+	uint32_t eswitch_spool_size;
 };
 
 struct dl {
@@ -630,6 +632,7 @@  static const enum mnl_attr_data_type devlink_policy[DEVLINK_ATTR_MAX + 1] = {
 	[DEVLINK_ATTR_ESWITCH_MODE] = MNL_TYPE_U16,
 	[DEVLINK_ATTR_ESWITCH_INLINE_MODE] = MNL_TYPE_U8,
 	[DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = MNL_TYPE_U8,
+	[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE] = MNL_TYPE_U32,
 	[DEVLINK_ATTR_DPIPE_TABLES] = MNL_TYPE_NESTED,
 	[DEVLINK_ATTR_DPIPE_TABLE] = MNL_TYPE_NESTED,
 	[DEVLINK_ATTR_DPIPE_TABLE_NAME] = MNL_TYPE_STRING,
@@ -1672,6 +1675,7 @@  static const struct dl_args_metadata dl_args_required[] = {
 	{DL_OPT_LINECARD,	      "Linecard index expected."},
 	{DL_OPT_LINECARD_TYPE,	      "Linecard type expected."},
 	{DL_OPT_SELFTESTS,            "Test name is expected"},
+	{DL_OPT_ESWITCH_SPOOL_SIZE,   "E-Switch shared memory pool size expected."},
 };
 
 static int dl_args_finding_required_validate(uint64_t o_required,
@@ -1895,6 +1899,13 @@  static int dl_argv_parse(struct dl *dl, uint64_t o_required,
 			if (err)
 				return err;
 			o_found |= DL_OPT_ESWITCH_ENCAP_MODE;
+		} else if (dl_argv_match(dl, "spool-size") &&
+			   (o_all & DL_OPT_ESWITCH_SPOOL_SIZE)) {
+			dl_arg_inc(dl);
+			err = dl_argv_uint32_t(dl, &opts->eswitch_spool_size);
+			if (err)
+				return err;
+			o_found |= DL_OPT_ESWITCH_SPOOL_SIZE;
 		} else if (dl_argv_match(dl, "path") &&
 			   (o_all & DL_OPT_RESOURCE_PATH)) {
 			dl_arg_inc(dl);
@@ -2547,6 +2558,9 @@  static void dl_opts_put(struct nlmsghdr *nlh, struct dl *dl)
 	if (opts->present & DL_OPT_ESWITCH_ENCAP_MODE)
 		mnl_attr_put_u8(nlh, DEVLINK_ATTR_ESWITCH_ENCAP_MODE,
 				opts->eswitch_encap_mode);
+	if (opts->present & DL_OPT_ESWITCH_SPOOL_SIZE)
+		mnl_attr_put_u32(nlh, DEVLINK_ATTR_ESWITCH_SPOOL_SIZE,
+				 opts->eswitch_spool_size);
 	if ((opts->present & DL_OPT_RESOURCE_PATH) && opts->resource_id_valid)
 		mnl_attr_put_u64(nlh, DEVLINK_ATTR_RESOURCE_ID,
 				 opts->resource_id);
@@ -2707,6 +2721,7 @@  static void cmd_dev_help(void)
 	pr_err("       devlink dev eswitch set DEV [ mode { legacy | switchdev } ]\n");
 	pr_err("                               [ inline-mode { none | link | network | transport } ]\n");
 	pr_err("                               [ encap-mode { none | basic } ]\n");
+	pr_err("                               [ spool-size { SIZE } ]\n");
 	pr_err("       devlink dev eswitch show DEV\n");
 	pr_err("       devlink dev param set DEV name PARAMETER value VALUE cmode { permanent | driverinit | runtime }\n");
 	pr_err("       devlink dev param show [DEV name PARAMETER]\n");
@@ -3194,7 +3209,12 @@  static void pr_out_eswitch(struct dl *dl, struct nlattr **tb)
 			     eswitch_encap_mode_name(mnl_attr_get_u8(
 				    tb[DEVLINK_ATTR_ESWITCH_ENCAP_MODE])));
 	}
-
+	if (tb[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE]) {
+		check_indent_newline(dl);
+		print_uint(PRINT_ANY, "spool-size", "spool-size %u",
+			   mnl_attr_get_u32(
+				    tb[DEVLINK_ATTR_ESWITCH_SPOOL_SIZE]));
+	}
 	pr_out_handle_end(dl);
 }
 
@@ -3239,7 +3259,8 @@  static int cmd_dev_eswitch_set(struct dl *dl)
 	err = dl_argv_parse(dl, DL_OPT_HANDLE,
 			    DL_OPT_ESWITCH_MODE |
 			    DL_OPT_ESWITCH_INLINE_MODE |
-			    DL_OPT_ESWITCH_ENCAP_MODE);
+			    DL_OPT_ESWITCH_ENCAP_MODE |
+			    DL_OPT_ESWITCH_SPOOL_SIZE);
 	if (err)
 		return err;
 
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index e77170199815..c750e29a1c5c 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -614,6 +614,7 @@  enum devlink_attr {
 
 	DEVLINK_ATTR_REGION_DIRECT,		/* flag */
 
+	DEVLINK_ATTR_ESWITCH_SPOOL_SIZE,	/* u32 */
 	/* add new attributes above here, update the policy in devlink.c */
 
 	__DEVLINK_ATTR_MAX,
diff --git a/man/man8/devlink-dev.8 b/man/man8/devlink-dev.8
index e9d091df48d8..081cc8740f8b 100644
--- a/man/man8/devlink-dev.8
+++ b/man/man8/devlink-dev.8
@@ -34,6 +34,8 @@  devlink-dev \- devlink device configuration
 .BR inline-mode " { " none " | " link " | " network " | " transport " } "
 ] [
 .BR encap-mode " { " none " | " basic " } "
+] [
+.BR spool-size " { SIZE } "
 ]
 
 .ti -8
@@ -151,6 +153,10 @@  Set eswitch encapsulation support
 .I basic
 - Enable encapsulation support
 
+.TP
+.BR spool-size " SIZE"
+Set the rx shared memory pool size in bytes.
+
 .SS devlink dev param set  - set new value to devlink device configuration parameter
 
 .TP