diff mbox series

[net-next,V5,07/11] devlink: Extend devlink rate API with traffic classes bandwidth management

Message ID 20241204220931.254964-8-tariqt@nvidia.com (mailing list archive)
State Superseded
Headers show
Series net/mlx5: ConnectX-8 SW Steering + Rate management on traffic classes | expand

Commit Message

Tariq Toukan Dec. 4, 2024, 10:09 p.m. UTC
From: Carolina Jubran <cjubran@nvidia.com>

Introduce support for specifying bandwidth proportions between traffic
classes (TC) in the devlink-rate API. This new option allows users to
allocate bandwidth across multiple traffic classes in a single command.

This feature provides a more granular control over traffic management,
especially for scenarios requiring Enhanced Transmission Selection.

Users can now define a specific bandwidth share for each traffic class,
such as allocating 20% for TC0 (TCP/UDP) and 80% for TC5 (RoCE).

Example:
DEV=pci/0000:08:00.0

$ devlink port function rate add $DEV/vfs_group tx_share 10Gbit \
  tx_max 50Gbit tc-bw 0:20 1:0 2:0 3:0 4:0 5:80 6:0 7:0

$ devlink port function rate set $DEV/vfs_group \
  tc-bw 0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0

Example usage with ynl:

./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \
  --do rate-set --json '{
  "bus-name": "pci",
  "dev-name": "0000:08:00.0",
  "port-index": 1,
  "rate-tc-bws": [
    {"rate-tc-index": 0, "rate-tc-bw": 50},
    {"rate-tc-index": 1, "rate-tc-bw": 50},
    {"rate-tc-index": 2, "rate-tc-bw": 0},
    {"rate-tc-index": 3, "rate-tc-bw": 0},
    {"rate-tc-index": 4, "rate-tc-bw": 0},
    {"rate-tc-index": 5, "rate-tc-bw": 0},
    {"rate-tc-index": 6, "rate-tc-bw": 0},
    {"rate-tc-index": 7, "rate-tc-bw": 0}
  ]
}'

./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \
  --do rate-get --json '{
  "bus-name": "pci",
  "dev-name": "0000:08:00.0",
  "port-index": 1
}'

output for rate-get:
{'bus-name': 'pci',
 'dev-name': '0000:08:00.0',
 'port-index': 1,
 'rate-tc-bws': [{'rate-tc-bw': 50, 'rate-tc-index': 0},
                 {'rate-tc-bw': 50, 'rate-tc-index': 1},
                 {'rate-tc-bw': 0, 'rate-tc-index': 2},
                 {'rate-tc-bw': 0, 'rate-tc-index': 3},
                 {'rate-tc-bw': 0, 'rate-tc-index': 4},
                 {'rate-tc-bw': 0, 'rate-tc-index': 5},
                 {'rate-tc-bw': 0, 'rate-tc-index': 6},
                 {'rate-tc-bw': 0, 'rate-tc-index': 7}],
 'rate-tx-max': 0,
 'rate-tx-priority': 0,
 'rate-tx-share': 0,
 'rate-tx-weight': 0,
 'rate-type': 'leaf'}

Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 Documentation/netlink/specs/devlink.yaml |  28 ++++-
 include/net/devlink.h                    |   7 ++
 include/uapi/linux/devlink.h             |   4 +
 net/devlink/netlink_gen.c                |  15 ++-
 net/devlink/netlink_gen.h                |   1 +
 net/devlink/rate.c                       | 124 +++++++++++++++++++++++
 6 files changed, 174 insertions(+), 5 deletions(-)

Comments

Jakub Kicinski Dec. 7, 2024, 2:10 a.m. UTC | #1
On Thu, 5 Dec 2024 00:09:27 +0200 Tariq Toukan wrote:
> +          min: 0
> +          max: 100

Are full percentage points sufficient granularity?

> +	if (!tb[DEVLINK_ATTR_RATE_TC_INDEX]) {

NL_SET_ERR_ATTR_MISS()

Please limit the string messages where error can be expressed in
machine readable form.

> +		NL_SET_ERR_MSG(extack, "Traffic class index is expected");
> +		return -EINVAL;
> +	}
> +
> +	tc_index = nla_get_u8(tb[DEVLINK_ATTR_RATE_TC_INDEX]);
> +
> +	if (tc_index >= IEEE_8021QAZ_MAX_TCS) {

This can't be enforced by the policy?

> +		NL_SET_ERR_MSG_FMT(extack,
> +				   "Provided traffic class index (%u) exceeds the maximum allowed value (%u)",
> +				   tc_index, IEEE_8021QAZ_MAX_TCS - 1);
> +		return -EINVAL;
> +	}
> +
> +	if (!tb[DEVLINK_ATTR_RATE_TC_BW]) {
> +		NL_SET_ERR_MSG(extack, "Traffic class bandwidth is expected");
> +		return -EINVAL;
> +	}
> +
> +	if (test_and_set_bit(tc_index, bitmap)) {
> +		NL_SET_ERR_MSG(extack, "Duplicate traffic class index specified");

always try to point to attr that caused the issue

> +		return -EINVAL;
> +	}
Tariq Toukan Dec. 9, 2024, 9:03 p.m. UTC | #2
On 07/12/2024 4:10, Jakub Kicinski wrote:
> On Thu, 5 Dec 2024 00:09:27 +0200 Tariq Toukan wrote:
>> +          min: 0
>> +          max: 100
> 
> Are full percentage points sufficient granularity?
> 

Yes, we think so.

>> +	if (!tb[DEVLINK_ATTR_RATE_TC_INDEX]) {
> 
> NL_SET_ERR_ATTR_MISS()
> 
> Please limit the string messages where error can be expressed in
> machine readable form.
> 

I'll fix.

>> +		NL_SET_ERR_MSG(extack, "Traffic class index is expected");
>> +		return -EINVAL;
>> +	}
>> +
>> +	tc_index = nla_get_u8(tb[DEVLINK_ATTR_RATE_TC_INDEX]);
>> +
>> +	if (tc_index >= IEEE_8021QAZ_MAX_TCS) {
> 
> This can't be enforced by the policy?
> 

If we enforce by policy we need to use the constant 7, not the macro 
IEEE_8021QAZ_MAX_TCS-1.
I'll keep it.

>> +		NL_SET_ERR_MSG_FMT(extack,
>> +				   "Provided traffic class index (%u) exceeds the maximum allowed value (%u)",
>> +				   tc_index, IEEE_8021QAZ_MAX_TCS - 1);
>> +		return -EINVAL;
>> +	}
>> +
>> +	if (!tb[DEVLINK_ATTR_RATE_TC_BW]) {
>> +		NL_SET_ERR_MSG(extack, "Traffic class bandwidth is expected");
>> +		return -EINVAL;
>> +	}
>> +
>> +	if (test_and_set_bit(tc_index, bitmap)) {
>> +		NL_SET_ERR_MSG(extack, "Duplicate traffic class index specified");
> 
> always try to point to attr that caused the issue
> 

I'll fix.

>> +		return -EINVAL;
>> +	}
Jakub Kicinski Dec. 9, 2024, 9:27 p.m. UTC | #3
On Mon, 9 Dec 2024 23:03:04 +0200 Tariq Toukan wrote:
> >> +	tc_index = nla_get_u8(tb[DEVLINK_ATTR_RATE_TC_INDEX]);
> >> +
> >> +	if (tc_index >= IEEE_8021QAZ_MAX_TCS) {  
> > 
> > This can't be enforced by the policy?
> >   
> 
> If we enforce by policy we need to use the constant 7, not the macro 
> IEEE_8021QAZ_MAX_TCS-1.
> I'll keep it.

The spec should support using "foreign constants"
Off the top of my head - you can define the ieee-8021qaz-max-tcs contant
as if you were defining a devlink constant, then add a header:
attribute. This will tell C codegen to include that header instead of
generating the definition.
Carolina Jubran Jan. 20, 2025, 11:55 a.m. UTC | #4
On 09/12/2024 23:27, Jakub Kicinski wrote:
> On Mon, 9 Dec 2024 23:03:04 +0200 Tariq Toukan wrote:
>>>> +	tc_index = nla_get_u8(tb[DEVLINK_ATTR_RATE_TC_INDEX]);
>>>> +
>>>> +	if (tc_index >= IEEE_8021QAZ_MAX_TCS) {
>>>
>>> This can't be enforced by the policy?
>>>    
>>
>> If we enforce by policy we need to use the constant 7, not the macro
>> IEEE_8021QAZ_MAX_TCS-1.
>> I'll keep it.
> 
> The spec should support using "foreign constants"
> Off the top of my head - you can define the ieee-8021qaz-max-tcs contant
> as if you were defining a devlink constant, then add a header:
> attribute. This will tell C codegen to include that header instead of
> generating the definition.
> 

Hi Jakub,

I tried implementing this as you suggested, but it seems that the only 
supported definition types are ['const', 'enum', 'flags', 'struct'], 
while the max value in checks only accepts patterns matching 
^[su](8|16|32|64)-(min|max)$.

 From what I see, it doesn’t currently support using a const value for 
the max or min checks. Let me know if I’m missing something or if 
there’s an alternative way to achieve this.

Thanks,
Carolina
Jakub Kicinski Jan. 20, 2025, 6:14 p.m. UTC | #5
On Mon, 20 Jan 2025 13:55:58 +0200 Carolina Jubran wrote:
> On 09/12/2024 23:27, Jakub Kicinski wrote:
> > On Mon, 9 Dec 2024 23:03:04 +0200 Tariq Toukan wrote:  
> >> If we enforce by policy we need to use the constant 7, not the macro
> >> IEEE_8021QAZ_MAX_TCS-1.
> >> I'll keep it.  
> > 
> > The spec should support using "foreign constants"
> > Off the top of my head - you can define the ieee-8021qaz-max-tcs contant
> > as if you were defining a devlink constant, then add a header:
> > attribute. This will tell C codegen to include that header instead of
> > generating the definition.
> >   
> 
> Hi Jakub,
> 
> I tried implementing this as you suggested, but it seems that the only 
> supported definition types are ['const', 'enum', 'flags', 'struct'], 
> while the max value in checks only accepts patterns matching 
> ^[su](8|16|32|64)-(min|max)$.
> 
>  From what I see, it doesn’t currently support using a const value for 
> the max or min checks. Let me know if I’m missing something or if 
> there’s an alternative way to achieve this.

Ah, I thought we already implemented this, sorry.
Can you try the two patches from the top of this branch?

https://github.com/kuba-moo/linux/tree/ynl-limits
Carolina Jubran Jan. 21, 2025, 12:36 p.m. UTC | #6
On 20/01/2025 20:14, Jakub Kicinski wrote:
> On Mon, 20 Jan 2025 13:55:58 +0200 Carolina Jubran wrote:
>> On 09/12/2024 23:27, Jakub Kicinski wrote:
>>> On Mon, 9 Dec 2024 23:03:04 +0200 Tariq Toukan wrote:
>>>> If we enforce by policy we need to use the constant 7, not the macro
>>>> IEEE_8021QAZ_MAX_TCS-1.
>>>> I'll keep it.
>>>
>>> The spec should support using "foreign constants"
>>> Off the top of my head - you can define the ieee-8021qaz-max-tcs contant
>>> as if you were defining a devlink constant, then add a header:
>>> attribute. This will tell C codegen to include that header instead of
>>> generating the definition.
>>>    
>>
>> Hi Jakub,
>>
>> I tried implementing this as you suggested, but it seems that the only
>> supported definition types are ['const', 'enum', 'flags', 'struct'],
>> while the max value in checks only accepts patterns matching
>> ^[su](8|16|32|64)-(min|max)$.
>>
>>   From what I see, it doesn’t currently support using a const value for
>> the max or min checks. Let me know if I’m missing something or if
>> there’s an alternative way to achieve this.
> 
> Ah, I thought we already implemented this, sorry.
> Can you try the two patches from the top of this branch?
> 
> https://github.com/kuba-moo/linux/tree/ynl-limits

Yes, it worked after applying the pattern changes to the
genetlink-legacy.yaml , as devlink uses it.

Thank you!

Carolina
Carolina Jubran Jan. 22, 2025, 12:48 p.m. UTC | #7
On 21/01/2025 14:36, Carolina Jubran wrote:
> 
> 
> On 20/01/2025 20:14, Jakub Kicinski wrote:
>> On Mon, 20 Jan 2025 13:55:58 +0200 Carolina Jubran wrote:
>>> On 09/12/2024 23:27, Jakub Kicinski wrote:
>>>> On Mon, 9 Dec 2024 23:03:04 +0200 Tariq Toukan wrote:
>>>>> If we enforce by policy we need to use the constant 7, not the macro
>>>>> IEEE_8021QAZ_MAX_TCS-1.
>>>>> I'll keep it.
>>>>
>>>> The spec should support using "foreign constants"
>>>> Off the top of my head - you can define the ieee-8021qaz-max-tcs 
>>>> contant
>>>> as if you were defining a devlink constant, then add a header:
>>>> attribute. This will tell C codegen to include that header instead of
>>>> generating the definition.
>>>
>>> Hi Jakub,
>>>
>>> I tried implementing this as you suggested, but it seems that the only
>>> supported definition types are ['const', 'enum', 'flags', 'struct'],
>>> while the max value in checks only accepts patterns matching
>>> ^[su](8|16|32|64)-(min|max)$.
>>>
>>>   From what I see, it doesn’t currently support using a const value for
>>> the max or min checks. Let me know if I’m missing something or if
>>> there’s an alternative way to achieve this.
>>
>> Ah, I thought we already implemented this, sorry.
>> Can you try the two patches from the top of this branch?
>>
>> https://github.com/kuba-moo/linux/tree/ynl-limits
> 
> Yes, it worked after applying the pattern changes to the
> genetlink-legacy.yaml , as devlink uses it.
> 
> Thank you!
> 
> Carolina
> 

Since this worked and the devlink patch now depends on it, would it be 
possible to include the top two patches
https://github.com/kuba-moo/linux/tree/ynl-limits in the next submission 
of the devlink and mlx5 patches?

Thanks!
Carolina
Jakub Kicinski Jan. 22, 2025, 2:30 p.m. UTC | #8
On Wed, 22 Jan 2025 14:48:55 +0200 Carolina Jubran wrote:
> Since this worked and the devlink patch now depends on it, would it be 
> possible to include the top two patches
> https://github.com/kuba-moo/linux/tree/ynl-limits in the next submission 
> of the devlink and mlx5 patches?

I'll post the two patches right after the merge window.
They stand on their own, and we can keep your series short-ish.
Tariq Toukan Feb. 5, 2025, 6:22 a.m. UTC | #9
On 22/01/2025 16:30, Jakub Kicinski wrote:
> On Wed, 22 Jan 2025 14:48:55 +0200 Carolina Jubran wrote:
>> Since this worked and the devlink patch now depends on it, would it be
>> possible to include the top two patches
>> https://github.com/kuba-moo/linux/tree/ynl-limits in the next submission
>> of the devlink and mlx5 patches?
> 
> I'll post the two patches right after the merge window.
> They stand on their own, and we can keep your series short-ish.

Hi Jakub,

A kind reminder, as we have dependency on these.

Regards,
Tariq
Gal Pressman Feb. 5, 2025, 6:56 a.m. UTC | #10
On 05/02/2025 8:22, Tariq Toukan wrote:
> 
> 
> On 22/01/2025 16:30, Jakub Kicinski wrote:
>> On Wed, 22 Jan 2025 14:48:55 +0200 Carolina Jubran wrote:
>>> Since this worked and the devlink patch now depends on it, would it be
>>> possible to include the top two patches
>>> https://github.com/kuba-moo/linux/tree/ynl-limits in the next submission
>>> of the devlink and mlx5 patches?
>>
>> I'll post the two patches right after the merge window.
>> They stand on their own, and we can keep your series short-ish.
> 
> Hi Jakub,
> 
> A kind reminder, as we have dependency on these.

They're submitted already:
https://lore.kernel.org/netdev/20250203215510.1288728-2-kuba@kernel.org/
Tariq Toukan Feb. 5, 2025, 8:02 a.m. UTC | #11
On 05/02/2025 8:56, Gal Pressman wrote:
> On 05/02/2025 8:22, Tariq Toukan wrote:
>>
>>
>> On 22/01/2025 16:30, Jakub Kicinski wrote:
>>> On Wed, 22 Jan 2025 14:48:55 +0200 Carolina Jubran wrote:
>>>> Since this worked and the devlink patch now depends on it, would it be
>>>> possible to include the top two patches
>>>> https://github.com/kuba-moo/linux/tree/ynl-limits in the next submission
>>>> of the devlink and mlx5 patches?
>>>
>>> I'll post the two patches right after the merge window.
>>> They stand on their own, and we can keep your series short-ish.
>>
>> Hi Jakub,
>>
>> A kind reminder, as we have dependency on these.
> 
> They're submitted already:
> https://lore.kernel.org/netdev/20250203215510.1288728-2-kuba@kernel.org/

I see. Patch was renamed.
I searched for the old name "don't output foreign constants".

Great!

Thanks,
Tariq
diff mbox series

Patch

diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml
index 09fbb4c03fc8..7fceb8fdc73b 100644
--- a/Documentation/netlink/specs/devlink.yaml
+++ b/Documentation/netlink/specs/devlink.yaml
@@ -820,7 +820,23 @@  attribute-sets:
       -
         name: region-direct
         type: flag
-
+      -
+        name: rate-tc-bws
+        type: nest
+        multi-attr: true
+        nested-attributes: dl-rate-tc-bws
+      -
+        name: rate-tc-index
+        type: u8
+      -
+        name: rate-tc-bw
+        type: u32
+        doc: |
+             Specifies the bandwidth allocation for the Traffic Class as a
+             percentage.
+        checks:
+          min: 0
+          max: 100
   -
     name: dl-dev-stats
     subset-of: devlink
@@ -1225,6 +1241,14 @@  attribute-sets:
       -
         name: flash
         type: flag
+  -
+    name: dl-rate-tc-bws
+    subset-of: devlink
+    attributes:
+      -
+       name: rate-tc-index
+      -
+       name: rate-tc-bw
 
 operations:
   enum-model: directional
@@ -2149,6 +2173,7 @@  operations:
             - rate-tx-priority
             - rate-tx-weight
             - rate-parent-node-name
+            - rate-tc-bws
 
     -
       name: rate-new
@@ -2169,6 +2194,7 @@  operations:
             - rate-tx-priority
             - rate-tx-weight
             - rate-parent-node-name
+            - rate-tc-bws
 
     -
       name: rate-del
diff --git a/include/net/devlink.h b/include/net/devlink.h
index fbb9a2668e24..277b826cdd60 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -20,6 +20,7 @@ 
 #include <uapi/linux/devlink.h>
 #include <linux/xarray.h>
 #include <linux/firmware.h>
+#include <linux/dcbnl.h>
 
 struct devlink;
 struct devlink_linecard;
@@ -117,6 +118,8 @@  struct devlink_rate {
 
 	u32 tx_priority;
 	u32 tx_weight;
+
+	u32 tc_bw[IEEE_8021QAZ_MAX_TCS];
 };
 
 struct devlink_port {
@@ -1469,6 +1472,8 @@  struct devlink_ops {
 					 u32 tx_priority, struct netlink_ext_ack *extack);
 	int (*rate_leaf_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv,
 				       u32 tx_weight, struct netlink_ext_ack *extack);
+	int (*rate_leaf_tc_bw_set)(struct devlink_rate *devlink_rate, void *priv,
+				   u32 *tc_bw, struct netlink_ext_ack *extack);
 	int (*rate_node_tx_share_set)(struct devlink_rate *devlink_rate, void *priv,
 				      u64 tx_share, struct netlink_ext_ack *extack);
 	int (*rate_node_tx_max_set)(struct devlink_rate *devlink_rate, void *priv,
@@ -1477,6 +1482,8 @@  struct devlink_ops {
 					 u32 tx_priority, struct netlink_ext_ack *extack);
 	int (*rate_node_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv,
 				       u32 tx_weight, struct netlink_ext_ack *extack);
+	int (*rate_node_tc_bw_set)(struct devlink_rate *devlink_rate, void *priv,
+				   u32 *tc_bw, struct netlink_ext_ack *extack);
 	int (*rate_node_new)(struct devlink_rate *rate_node, void **priv,
 			     struct netlink_ext_ack *extack);
 	int (*rate_node_del)(struct devlink_rate *rate_node, void *priv,
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 9401aa343673..b3b538c67c34 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -614,6 +614,10 @@  enum devlink_attr {
 
 	DEVLINK_ATTR_REGION_DIRECT,		/* flag */
 
+	DEVLINK_ATTR_RATE_TC_BWS,		/* nested */
+	DEVLINK_ATTR_RATE_TC_INDEX,		/* u8 */
+	DEVLINK_ATTR_RATE_TC_BW,		/* u32 */
+
 	/* Add new attributes above here, update the spec in
 	 * Documentation/netlink/specs/devlink.yaml and re-generate
 	 * net/devlink/netlink_gen.c.
diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c
index f9786d51f68f..b7b7829175dc 100644
--- a/net/devlink/netlink_gen.c
+++ b/net/devlink/netlink_gen.c
@@ -18,6 +18,11 @@  const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_
 	[DEVLINK_PORT_FN_ATTR_CAPS] = NLA_POLICY_BITFIELD32(15),
 };
 
+const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_ATTR_RATE_TC_BW + 1] = {
+	[DEVLINK_ATTR_RATE_TC_INDEX] = { .type = NLA_U8, },
+	[DEVLINK_ATTR_RATE_TC_BW] = NLA_POLICY_RANGE(NLA_U32, 0, 100),
+};
+
 const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1] = {
 	[DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG, },
 };
@@ -496,7 +501,7 @@  static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_DEV_
 };
 
 /* DEVLINK_CMD_RATE_SET - do */
-static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = {
+static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
 	[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
@@ -505,10 +510,11 @@  static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TX_W
 	[DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, },
 	[DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, },
 	[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, },
+	[DEVLINK_ATTR_RATE_TC_BWS] = NLA_POLICY_NESTED(devlink_dl_rate_tc_bws_nl_policy),
 };
 
 /* DEVLINK_CMD_RATE_NEW - do */
-static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TX_WEIGHT + 1] = {
+static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = {
 	[DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, },
 	[DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, },
 	[DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, },
@@ -517,6 +523,7 @@  static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TX_W
 	[DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32, },
 	[DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32, },
 	[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING, },
+	[DEVLINK_ATTR_RATE_TC_BWS] = NLA_POLICY_NESTED(devlink_dl_rate_tc_bws_nl_policy),
 };
 
 /* DEVLINK_CMD_RATE_DEL - do */
@@ -1164,7 +1171,7 @@  const struct genl_split_ops devlink_nl_ops[74] = {
 		.doit		= devlink_nl_rate_set_doit,
 		.post_doit	= devlink_nl_post_doit,
 		.policy		= devlink_rate_set_nl_policy,
-		.maxattr	= DEVLINK_ATTR_RATE_TX_WEIGHT,
+		.maxattr	= DEVLINK_ATTR_RATE_TC_BWS,
 		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
 	},
 	{
@@ -1174,7 +1181,7 @@  const struct genl_split_ops devlink_nl_ops[74] = {
 		.doit		= devlink_nl_rate_new_doit,
 		.post_doit	= devlink_nl_post_doit,
 		.policy		= devlink_rate_new_nl_policy,
-		.maxattr	= DEVLINK_ATTR_RATE_TX_WEIGHT,
+		.maxattr	= DEVLINK_ATTR_RATE_TC_BWS,
 		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
 	},
 	{
diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h
index 8f2bd50ddf5e..fb733b5d4ff1 100644
--- a/net/devlink/netlink_gen.h
+++ b/net/devlink/netlink_gen.h
@@ -13,6 +13,7 @@ 
 
 /* Common nested types */
 extern const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_FN_ATTR_CAPS + 1];
+extern const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_ATTR_RATE_TC_BW + 1];
 extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1];
 
 /* Ops table for devlink */
diff --git a/net/devlink/rate.c b/net/devlink/rate.c
index 8828ffaf6cbc..4bee2481115c 100644
--- a/net/devlink/rate.c
+++ b/net/devlink/rate.c
@@ -80,6 +80,29 @@  devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
 		return ERR_PTR(-EINVAL);
 }
 
+static int devlink_rate_put_tc_bws(struct sk_buff *msg, u32 *tc_bw)
+{
+	struct nlattr *nla_tc_bw;
+	int i;
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		nla_tc_bw = nla_nest_start(msg, DEVLINK_ATTR_RATE_TC_BWS);
+		if (!nla_tc_bw)
+			return -EMSGSIZE;
+
+		if (nla_put_u8(msg, DEVLINK_ATTR_RATE_TC_INDEX, i) ||
+		    nla_put_u32(msg, DEVLINK_ATTR_RATE_TC_BW, tc_bw[i]))
+			goto nla_put_failure;
+
+		nla_nest_end(msg, nla_tc_bw);
+	}
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, nla_tc_bw);
+	return -EMSGSIZE;
+}
+
 static int devlink_nl_rate_fill(struct sk_buff *msg,
 				struct devlink_rate *devlink_rate,
 				enum devlink_command cmd, u32 portid, u32 seq,
@@ -129,6 +152,9 @@  static int devlink_nl_rate_fill(struct sk_buff *msg,
 				   devlink_rate->parent->name))
 			goto nla_put_failure;
 
+	if (devlink_rate_put_tc_bws(msg, devlink_rate->tc_bw))
+		goto nla_put_failure;
+
 	genlmsg_end(msg, hdr);
 	return 0;
 
@@ -316,6 +342,86 @@  devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
 	return 0;
 }
 
+static int devlink_nl_rate_tc_bw_parse(struct nlattr *parent_nest, u32 *tc_bw,
+				       unsigned long *bitmap, struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[DEVLINK_ATTR_MAX + 1];
+	u8 tc_index;
+
+	nla_parse_nested(tb, DEVLINK_ATTR_MAX, parent_nest, devlink_dl_rate_tc_bws_nl_policy,
+			 extack);
+	if (!tb[DEVLINK_ATTR_RATE_TC_INDEX]) {
+		NL_SET_ERR_MSG(extack, "Traffic class index is expected");
+		return -EINVAL;
+	}
+
+	tc_index = nla_get_u8(tb[DEVLINK_ATTR_RATE_TC_INDEX]);
+
+	if (tc_index >= IEEE_8021QAZ_MAX_TCS) {
+		NL_SET_ERR_MSG_FMT(extack,
+				   "Provided traffic class index (%u) exceeds the maximum allowed value (%u)",
+				   tc_index, IEEE_8021QAZ_MAX_TCS - 1);
+		return -EINVAL;
+	}
+
+	if (!tb[DEVLINK_ATTR_RATE_TC_BW]) {
+		NL_SET_ERR_MSG(extack, "Traffic class bandwidth is expected");
+		return -EINVAL;
+	}
+
+	if (test_and_set_bit(tc_index, bitmap)) {
+		NL_SET_ERR_MSG(extack, "Duplicate traffic class index specified");
+		return -EINVAL;
+	}
+
+	tc_bw[tc_index] = nla_get_u32(tb[DEVLINK_ATTR_RATE_TC_BW]);
+
+	return 0;
+}
+
+static int devlink_nl_rate_tc_bw_set(struct devlink_rate *devlink_rate,
+				     struct genl_info *info)
+{
+	DECLARE_BITMAP(bitmap, IEEE_8021QAZ_MAX_TCS) = {};
+	struct devlink *devlink = devlink_rate->devlink;
+	const struct devlink_ops *ops = devlink->ops;
+	u32 tc_bw[IEEE_8021QAZ_MAX_TCS] = {};
+	int rem, err = -EOPNOTSUPP, i;
+	struct nlattr *attr;
+
+	nla_for_each_attr(attr, genlmsg_data(info->genlhdr),
+			  genlmsg_len(info->genlhdr), rem) {
+		if (nla_type(attr) == DEVLINK_ATTR_RATE_TC_BWS) {
+			err = devlink_nl_rate_tc_bw_parse(attr, tc_bw, bitmap, info->extack);
+			if (err)
+				return err;
+		}
+	}
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		if (!test_bit(i, bitmap)) {
+			NL_SET_ERR_MSG_FMT(info->extack,
+					   "Bandwidth values must be specified for all %u traffic classes",
+					   IEEE_8021QAZ_MAX_TCS);
+			return -EINVAL;
+		}
+	}
+
+	if (devlink_rate_is_leaf(devlink_rate))
+		err = ops->rate_leaf_tc_bw_set(devlink_rate, devlink_rate->priv, tc_bw,
+					       info->extack);
+	else if (devlink_rate_is_node(devlink_rate))
+		err = ops->rate_node_tc_bw_set(devlink_rate, devlink_rate->priv, tc_bw,
+					       info->extack);
+
+	if (err)
+		return err;
+
+	memcpy(devlink_rate->tc_bw, tc_bw, sizeof(tc_bw));
+
+	return 0;
+}
+
 static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
 			       const struct devlink_ops *ops,
 			       struct genl_info *info)
@@ -388,6 +494,12 @@  static int devlink_nl_rate_set(struct devlink_rate *devlink_rate,
 			return err;
 	}
 
+	if (attrs[DEVLINK_ATTR_RATE_TC_BWS]) {
+		err = devlink_nl_rate_tc_bw_set(devlink_rate, info);
+		if (err)
+			return err;
+	}
+
 	return 0;
 }
 
@@ -423,6 +535,12 @@  static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
 					    "TX weight set isn't supported for the leafs");
 			return false;
 		}
+		if (attrs[DEVLINK_ATTR_RATE_TC_BWS] && !ops->rate_leaf_tc_bw_set) {
+			NL_SET_ERR_MSG_ATTR(info->extack,
+					    attrs[DEVLINK_ATTR_RATE_TC_BWS],
+					    "TC bandwidth set isn't supported for the leafs");
+			return false;
+		}
 	} else if (type == DEVLINK_RATE_TYPE_NODE) {
 		if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
 			NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the nodes");
@@ -449,6 +567,12 @@  static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
 					    "TX weight set isn't supported for the nodes");
 			return false;
 		}
+		if (attrs[DEVLINK_ATTR_RATE_TC_BWS] && !ops->rate_node_tc_bw_set) {
+			NL_SET_ERR_MSG_ATTR(info->extack,
+					    attrs[DEVLINK_ATTR_RATE_TC_BWS],
+					    "TC bandwidth set isn't supported for the nodes");
+			return false;
+		}
 	} else {
 		WARN(1, "Unknown type of rate object");
 		return false;