diff mbox series

[net-next,v11,2/2] net: sched: support hash selecting tx queue

Message ID 20220323021447.34800-3-xiangxia.m.yue@gmail.com (mailing list archive)
State Deferred
Delegated to: Netdev Maintainers
Headers show
Series net: sched: allow user to select txqueue | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 9 this patch: 9
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 18 this patch: 18
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 14 this patch: 14
netdev/checkpatch warning WARNING: line length of 103 exceeds 80 columns WARNING: line length of 112 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Tonghao Zhang March 23, 2022, 2:14 a.m. UTC
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>

This patch allows users to pick queue_mapping, range
from A to B. Then we can load balance packets from A
to B tx queue. The range is an unsigned 16bit value
in decimal format.

$ tc filter ... action skbedit queue_mapping skbhash A B

"skbedit queue_mapping QUEUE_MAPPING" (from "man 8 tc-skbedit")
is enhanced with flags: SKBEDIT_F_TXQ_SKBHASH

  +----+      +----+      +----+
  | P1 |      | P2 |      | Pn |
  +----+      +----+      +----+
    |           |           |
    +-----------+-----------+
                |
                | clsact/skbedit
                |      MQ
                v
    +-----------+-----------+
    | q0        | qn        | qm
    v           v           v
  HTB/FQ       FIFO   ...  FIFO

For example:
If P1 sends out packets to different Pods on other host, and
we want distribute flows from qn - qm. Then we can use skb->hash
as hash.

setup commands:
$ NETDEV=eth0
$ ip netns add n1
$ ip link add ipv1 link $NETDEV type ipvlan mode l2
$ ip link set ipv1 netns n1
$ ip netns exec n1 ifconfig ipv1 2.2.2.100/24 up

$ tc qdisc add dev $NETDEV clsact
$ tc filter add dev $NETDEV egress protocol ip prio 1 \
        flower skip_hw src_ip 2.2.2.100 action skbedit queue_mapping skbhash 2 6
$ tc qdisc add dev $NETDEV handle 1: root mq
$ tc qdisc add dev $NETDEV parent 1:1 handle 2: htb
$ tc class add dev $NETDEV parent 2: classid 2:1 htb rate 100kbit
$ tc class add dev $NETDEV parent 2: classid 2:2 htb rate 200kbit
$ tc qdisc add dev $NETDEV parent 1:2 tbf rate 100mbit burst 100mb latency 1
$ tc qdisc add dev $NETDEV parent 1:3 pfifo
$ tc qdisc add dev $NETDEV parent 1:4 pfifo
$ tc qdisc add dev $NETDEV parent 1:5 pfifo
$ tc qdisc add dev $NETDEV parent 1:6 pfifo
$ tc qdisc add dev $NETDEV parent 1:7 pfifo

$ ip netns exec n1 iperf3 -c 2.2.2.1 -i 1 -t 10 -P 10

pick txqueue from 2 - 6:
$ ethtool -S $NETDEV | grep -i tx_queue_[0-9]_bytes
     tx_queue_0_bytes: 42
     tx_queue_1_bytes: 0
     tx_queue_2_bytes: 11442586444
     tx_queue_3_bytes: 7383615334
     tx_queue_4_bytes: 3981365579
     tx_queue_5_bytes: 3983235051
     tx_queue_6_bytes: 6706236461
     tx_queue_7_bytes: 42
     tx_queue_8_bytes: 0
     tx_queue_9_bytes: 0

txqueues 2 - 6 are mapped to classid 1:3 - 1:7
$ tc -s class show dev $NETDEV
...
class mq 1:3 root leaf 8002:
 Sent 11949133672 bytes 7929798 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
class mq 1:4 root leaf 8003:
 Sent 7710449050 bytes 5117279 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
class mq 1:5 root leaf 8004:
 Sent 4157648675 bytes 2758990 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
class mq 1:6 root leaf 8005:
 Sent 4159632195 bytes 2759990 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
class mq 1:7 root leaf 8006:
 Sent 7003169603 bytes 4646912 pkt (dropped 0, overlimits 0 requeues 0)
 backlog 0b 0p requeues 0
...

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Alexander Lobakin <alobakin@pm.me>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Talal Ahmad <talalahmad@google.com>
Cc: Kevin Hao <haokexin@gmail.com>
Cc: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Antoine Tenart <atenart@kernel.org>
Cc: Wei Wang <weiwan@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 include/net/tc_act/tc_skbedit.h        |  1 +
 include/uapi/linux/tc_act/tc_skbedit.h |  2 ++
 net/sched/act_skbedit.c                | 49 ++++++++++++++++++++++++--
 3 files changed, 50 insertions(+), 2 deletions(-)

Comments

Dave Taht March 23, 2022, 6:10 p.m. UTC | #1
On Wed, Mar 23, 2022 at 11:45 AM <xiangxia.m.yue@gmail.com> wrote:
>
> From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
>
> This patch allows users to pick queue_mapping, range
> from A to B. Then we can load balance packets from A
> to B tx queue. The range is an unsigned 16bit value
> in decimal format.
>
> $ tc filter ... action skbedit queue_mapping skbhash A B
>
> "skbedit queue_mapping QUEUE_MAPPING" (from "man 8 tc-skbedit")
> is enhanced with flags: SKBEDIT_F_TXQ_SKBHASH
>
>   +----+      +----+      +----+
>   | P1 |      | P2 |      | Pn |
>   +----+      +----+      +----+
>     |           |           |
>     +-----------+-----------+
>                 |
>                 | clsact/skbedit
>                 |      MQ
>                 v
>     +-----------+-----------+
>     | q0        | qn        | qm
>     v           v           v
>   HTB/FQ       FIFO   ...  FIFO
>
> For example:
> If P1 sends out packets to different Pods on other host, and
> we want distribute flows from qn - qm. Then we can use skb->hash
> as hash.
>
> setup commands:
> $ NETDEV=eth0
> $ ip netns add n1
> $ ip link add ipv1 link $NETDEV type ipvlan mode l2
> $ ip link set ipv1 netns n1
> $ ip netns exec n1 ifconfig ipv1 2.2.2.100/24 up
>
> $ tc qdisc add dev $NETDEV clsact
> $ tc filter add dev $NETDEV egress protocol ip prio 1 \
>         flower skip_hw src_ip 2.2.2.100 action skbedit queue_mapping skbhash 2 6
> $ tc qdisc add dev $NETDEV handle 1: root mq
> $ tc qdisc add dev $NETDEV parent 1:1 handle 2: htb
> $ tc class add dev $NETDEV parent 2: classid 2:1 htb rate 100kbit
> $ tc class add dev $NETDEV parent 2: classid 2:2 htb rate 200kbit
> $ tc qdisc add dev $NETDEV parent 1:2 tbf rate 100mbit burst 100mb latency 1

aside from the utility of this patch the above example settings for
burst and latency are a bit awkward.

> $ tc qdisc add dev $NETDEV parent 1:3 pfifo
> $ tc qdisc add dev $NETDEV parent 1:4 pfifo
> $ tc qdisc add dev $NETDEV parent 1:5 pfifo
> $ tc qdisc add dev $NETDEV parent 1:6 pfifo
> $ tc qdisc add dev $NETDEV parent 1:7 pfifo

pfifo's default packet limit is 1000 packets, 5 queues like this could
create about 650ms of latency with tbf, more with htb & gso.

> $ ip netns exec n1 iperf3 -c 2.2.2.1 -i 1 -t 10 -P 10

Given the structure of this test, you are probably more regulated by
tsq than pfifo, however a packet capture of
the actual tcp rtt induced would be interesting. substitute fq_codel
for pfifo, also.

It's great to send lots of packets over lots of queues, but not so
great to have seconds of data outstanding in them.
>
> pick txqueue from 2 - 6:
> $ ethtool -S $NETDEV | grep -i tx_queue_[0-9]_bytes
>      tx_queue_0_bytes: 42
>      tx_queue_1_bytes: 0
>      tx_queue_2_bytes: 11442586444
>      tx_queue_3_bytes: 7383615334
>      tx_queue_4_bytes: 3981365579
>      tx_queue_5_bytes: 3983235051
>      tx_queue_6_bytes: 6706236461
>      tx_queue_7_bytes: 42
>      tx_queue_8_bytes: 0
>      tx_queue_9_bytes: 0
>
> txqueues 2 - 6 are mapped to classid 1:3 - 1:7
> $ tc -s class show dev $NETDEV
> ...
> class mq 1:3 root leaf 8002:
>  Sent 11949133672 bytes 7929798 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> class mq 1:4 root leaf 8003:
>  Sent 7710449050 bytes 5117279 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> class mq 1:5 root leaf 8004:
>  Sent 4157648675 bytes 2758990 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> class mq 1:6 root leaf 8005:
>  Sent 4159632195 bytes 2759990 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> class mq 1:7 root leaf 8006:
>  Sent 7003169603 bytes 4646912 pkt (dropped 0, overlimits 0 requeues 0)
>  backlog 0b 0p requeues 0
> ...
>
> Cc: Jamal Hadi Salim <jhs@mojatatu.com>
> Cc: Cong Wang <xiyou.wangcong@gmail.com>
> Cc: Jiri Pirko <jiri@resnulli.us>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Jakub Kicinski <kuba@kernel.org>
> Cc: Jonathan Lemon <jonathan.lemon@gmail.com>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: Alexander Lobakin <alobakin@pm.me>
> Cc: Paolo Abeni <pabeni@redhat.com>
> Cc: Talal Ahmad <talalahmad@google.com>
> Cc: Kevin Hao <haokexin@gmail.com>
> Cc: Ilias Apalodimas <ilias.apalodimas@linaro.org>
> Cc: Kees Cook <keescook@chromium.org>
> Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
> Cc: Antoine Tenart <atenart@kernel.org>
> Cc: Wei Wang <weiwan@google.com>
> Cc: Arnd Bergmann <arnd@arndb.de>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
> ---
>  include/net/tc_act/tc_skbedit.h        |  1 +
>  include/uapi/linux/tc_act/tc_skbedit.h |  2 ++
>  net/sched/act_skbedit.c                | 49 ++++++++++++++++++++++++--
>  3 files changed, 50 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
> index 00bfee70609e..ee96e0fa6566 100644
> --- a/include/net/tc_act/tc_skbedit.h
> +++ b/include/net/tc_act/tc_skbedit.h
> @@ -17,6 +17,7 @@ struct tcf_skbedit_params {
>         u32 mark;
>         u32 mask;
>         u16 queue_mapping;
> +       u16 mapping_mod;
>         u16 ptype;
>         struct rcu_head rcu;
>  };
> diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
> index 800e93377218..6cb6101208d0 100644
> --- a/include/uapi/linux/tc_act/tc_skbedit.h
> +++ b/include/uapi/linux/tc_act/tc_skbedit.h
> @@ -29,6 +29,7 @@
>  #define SKBEDIT_F_PTYPE                        0x8
>  #define SKBEDIT_F_MASK                 0x10
>  #define SKBEDIT_F_INHERITDSFIELD       0x20
> +#define SKBEDIT_F_TXQ_SKBHASH          0x40
>
>  struct tc_skbedit {
>         tc_gen;
> @@ -45,6 +46,7 @@ enum {
>         TCA_SKBEDIT_PTYPE,
>         TCA_SKBEDIT_MASK,
>         TCA_SKBEDIT_FLAGS,
> +       TCA_SKBEDIT_QUEUE_MAPPING_MAX,
>         __TCA_SKBEDIT_MAX
>  };
>  #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1)
> diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
> index d5799b4fc499..2634c725bc75 100644
> --- a/net/sched/act_skbedit.c
> +++ b/net/sched/act_skbedit.c
> @@ -23,6 +23,20 @@
>  static unsigned int skbedit_net_id;
>  static struct tc_action_ops act_skbedit_ops;
>
> +static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params,
> +                           struct sk_buff *skb)
> +{
> +       u16 queue_mapping = params->queue_mapping;
> +
> +       if (params->flags & SKBEDIT_F_TXQ_SKBHASH) {
> +               u32 hash = skb_get_hash(skb);
> +
> +               queue_mapping += hash % params->mapping_mod;
> +       }
> +
> +       return netdev_cap_txqueue(skb->dev, queue_mapping);
> +}
> +
>  static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
>                            struct tcf_result *res)
>  {
> @@ -62,7 +76,7 @@ static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
>  #ifdef CONFIG_NET_EGRESS
>                 netdev_xmit_skip_txqueue(true);
>  #endif
> -               skb_set_queue_mapping(skb, params->queue_mapping);
> +               skb_set_queue_mapping(skb, tcf_skbedit_hash(params, skb));
>         }
>         if (params->flags & SKBEDIT_F_MARK) {
>                 skb->mark &= ~params->mask;
> @@ -96,6 +110,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
>         [TCA_SKBEDIT_PTYPE]             = { .len = sizeof(u16) },
>         [TCA_SKBEDIT_MASK]              = { .len = sizeof(u32) },
>         [TCA_SKBEDIT_FLAGS]             = { .len = sizeof(u64) },
> +       [TCA_SKBEDIT_QUEUE_MAPPING_MAX] = { .len = sizeof(u16) },
>  };
>
>  static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
> @@ -112,6 +127,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
>         struct tcf_skbedit *d;
>         u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
>         u16 *queue_mapping = NULL, *ptype = NULL;
> +       u16 mapping_mod = 1;
>         bool exists = false;
>         int ret = 0, err;
>         u32 index;
> @@ -157,6 +173,25 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
>         if (tb[TCA_SKBEDIT_FLAGS] != NULL) {
>                 u64 *pure_flags = nla_data(tb[TCA_SKBEDIT_FLAGS]);
>
> +               if (*pure_flags & SKBEDIT_F_TXQ_SKBHASH) {
> +                       u16 *queue_mapping_max;
> +
> +                       if (!tb[TCA_SKBEDIT_QUEUE_MAPPING] ||
> +                           !tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]) {
> +                               NL_SET_ERR_MSG_MOD(extack, "Missing required range of queue_mapping.");
> +                               return -EINVAL;
> +                       }
> +
> +                       queue_mapping_max =
> +                               nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]);
> +                       if (*queue_mapping_max < *queue_mapping) {
> +                               NL_SET_ERR_MSG_MOD(extack, "The range of queue_mapping is invalid, max < min.");
> +                               return -EINVAL;
> +                       }
> +
> +                       mapping_mod = *queue_mapping_max - *queue_mapping + 1;
> +                       flags |= SKBEDIT_F_TXQ_SKBHASH;
> +               }
>                 if (*pure_flags & SKBEDIT_F_INHERITDSFIELD)
>                         flags |= SKBEDIT_F_INHERITDSFIELD;
>         }
> @@ -208,8 +243,10 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
>         params_new->flags = flags;
>         if (flags & SKBEDIT_F_PRIORITY)
>                 params_new->priority = *priority;
> -       if (flags & SKBEDIT_F_QUEUE_MAPPING)
> +       if (flags & SKBEDIT_F_QUEUE_MAPPING) {
>                 params_new->queue_mapping = *queue_mapping;
> +               params_new->mapping_mod = mapping_mod;
> +       }
>         if (flags & SKBEDIT_F_MARK)
>                 params_new->mark = *mark;
>         if (flags & SKBEDIT_F_PTYPE)
> @@ -276,6 +313,13 @@ static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
>                 goto nla_put_failure;
>         if (params->flags & SKBEDIT_F_INHERITDSFIELD)
>                 pure_flags |= SKBEDIT_F_INHERITDSFIELD;
> +       if (params->flags & SKBEDIT_F_TXQ_SKBHASH) {
> +               if (nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING_MAX,
> +                               params->queue_mapping + params->mapping_mod - 1))
> +                       goto nla_put_failure;
> +
> +               pure_flags |= SKBEDIT_F_TXQ_SKBHASH;
> +       }
>         if (pure_flags != 0 &&
>             nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags))
>                 goto nla_put_failure;
> @@ -325,6 +369,7 @@ static size_t tcf_skbedit_get_fill_size(const struct tc_action *act)
>         return nla_total_size(sizeof(struct tc_skbedit))
>                 + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_PRIORITY */
>                 + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING */
> +               + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING_MAX */
>                 + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MARK */
>                 + nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_PTYPE */
>                 + nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MASK */
> --
> 2.27.0
>
diff mbox series

Patch

diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index 00bfee70609e..ee96e0fa6566 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -17,6 +17,7 @@  struct tcf_skbedit_params {
 	u32 mark;
 	u32 mask;
 	u16 queue_mapping;
+	u16 mapping_mod;
 	u16 ptype;
 	struct rcu_head rcu;
 };
diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
index 800e93377218..6cb6101208d0 100644
--- a/include/uapi/linux/tc_act/tc_skbedit.h
+++ b/include/uapi/linux/tc_act/tc_skbedit.h
@@ -29,6 +29,7 @@ 
 #define SKBEDIT_F_PTYPE			0x8
 #define SKBEDIT_F_MASK			0x10
 #define SKBEDIT_F_INHERITDSFIELD	0x20
+#define SKBEDIT_F_TXQ_SKBHASH		0x40
 
 struct tc_skbedit {
 	tc_gen;
@@ -45,6 +46,7 @@  enum {
 	TCA_SKBEDIT_PTYPE,
 	TCA_SKBEDIT_MASK,
 	TCA_SKBEDIT_FLAGS,
+	TCA_SKBEDIT_QUEUE_MAPPING_MAX,
 	__TCA_SKBEDIT_MAX
 };
 #define TCA_SKBEDIT_MAX (__TCA_SKBEDIT_MAX - 1)
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index d5799b4fc499..2634c725bc75 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -23,6 +23,20 @@ 
 static unsigned int skbedit_net_id;
 static struct tc_action_ops act_skbedit_ops;
 
+static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params,
+			    struct sk_buff *skb)
+{
+	u16 queue_mapping = params->queue_mapping;
+
+	if (params->flags & SKBEDIT_F_TXQ_SKBHASH) {
+		u32 hash = skb_get_hash(skb);
+
+		queue_mapping += hash % params->mapping_mod;
+	}
+
+	return netdev_cap_txqueue(skb->dev, queue_mapping);
+}
+
 static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
 			   struct tcf_result *res)
 {
@@ -62,7 +76,7 @@  static int tcf_skbedit_act(struct sk_buff *skb, const struct tc_action *a,
 #ifdef CONFIG_NET_EGRESS
 		netdev_xmit_skip_txqueue(true);
 #endif
-		skb_set_queue_mapping(skb, params->queue_mapping);
+		skb_set_queue_mapping(skb, tcf_skbedit_hash(params, skb));
 	}
 	if (params->flags & SKBEDIT_F_MARK) {
 		skb->mark &= ~params->mask;
@@ -96,6 +110,7 @@  static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 	[TCA_SKBEDIT_PTYPE]		= { .len = sizeof(u16) },
 	[TCA_SKBEDIT_MASK]		= { .len = sizeof(u32) },
 	[TCA_SKBEDIT_FLAGS]		= { .len = sizeof(u64) },
+	[TCA_SKBEDIT_QUEUE_MAPPING_MAX]	= { .len = sizeof(u16) },
 };
 
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
@@ -112,6 +127,7 @@  static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	struct tcf_skbedit *d;
 	u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
 	u16 *queue_mapping = NULL, *ptype = NULL;
+	u16 mapping_mod = 1;
 	bool exists = false;
 	int ret = 0, err;
 	u32 index;
@@ -157,6 +173,25 @@  static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	if (tb[TCA_SKBEDIT_FLAGS] != NULL) {
 		u64 *pure_flags = nla_data(tb[TCA_SKBEDIT_FLAGS]);
 
+		if (*pure_flags & SKBEDIT_F_TXQ_SKBHASH) {
+			u16 *queue_mapping_max;
+
+			if (!tb[TCA_SKBEDIT_QUEUE_MAPPING] ||
+			    !tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]) {
+				NL_SET_ERR_MSG_MOD(extack, "Missing required range of queue_mapping.");
+				return -EINVAL;
+			}
+
+			queue_mapping_max =
+				nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING_MAX]);
+			if (*queue_mapping_max < *queue_mapping) {
+				NL_SET_ERR_MSG_MOD(extack, "The range of queue_mapping is invalid, max < min.");
+				return -EINVAL;
+			}
+
+			mapping_mod = *queue_mapping_max - *queue_mapping + 1;
+			flags |= SKBEDIT_F_TXQ_SKBHASH;
+		}
 		if (*pure_flags & SKBEDIT_F_INHERITDSFIELD)
 			flags |= SKBEDIT_F_INHERITDSFIELD;
 	}
@@ -208,8 +243,10 @@  static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 	params_new->flags = flags;
 	if (flags & SKBEDIT_F_PRIORITY)
 		params_new->priority = *priority;
-	if (flags & SKBEDIT_F_QUEUE_MAPPING)
+	if (flags & SKBEDIT_F_QUEUE_MAPPING) {
 		params_new->queue_mapping = *queue_mapping;
+		params_new->mapping_mod = mapping_mod;
+	}
 	if (flags & SKBEDIT_F_MARK)
 		params_new->mark = *mark;
 	if (flags & SKBEDIT_F_PTYPE)
@@ -276,6 +313,13 @@  static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
 		goto nla_put_failure;
 	if (params->flags & SKBEDIT_F_INHERITDSFIELD)
 		pure_flags |= SKBEDIT_F_INHERITDSFIELD;
+	if (params->flags & SKBEDIT_F_TXQ_SKBHASH) {
+		if (nla_put_u16(skb, TCA_SKBEDIT_QUEUE_MAPPING_MAX,
+				params->queue_mapping + params->mapping_mod - 1))
+			goto nla_put_failure;
+
+		pure_flags |= SKBEDIT_F_TXQ_SKBHASH;
+	}
 	if (pure_flags != 0 &&
 	    nla_put(skb, TCA_SKBEDIT_FLAGS, sizeof(pure_flags), &pure_flags))
 		goto nla_put_failure;
@@ -325,6 +369,7 @@  static size_t tcf_skbedit_get_fill_size(const struct tc_action *act)
 	return nla_total_size(sizeof(struct tc_skbedit))
 		+ nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_PRIORITY */
 		+ nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING */
+		+ nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_QUEUE_MAPPING_MAX */
 		+ nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MARK */
 		+ nla_total_size(sizeof(u16)) /* TCA_SKBEDIT_PTYPE */
 		+ nla_total_size(sizeof(u32)) /* TCA_SKBEDIT_MASK */