diff mbox series

[net-next,v1,2/5] net: stmmac: support fp parameter of tc-mqprio

Message ID df005cc6b2f97e7ea373dcc356fb6a693f33263a.1722421644.git.0x1207@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net: stmmac: FPE via ethtool + tc | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 42 this patch: 42
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 9 of 9 maintainers
netdev/build_clang success Errors and warnings before: 43 this patch: 43
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 53 this patch: 53
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 38 this patch: 38
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-08-01--06-00 (tests: 707)

Commit Message

Furong Xu July 31, 2024, 10:43 a.m. UTC
tc-mqprio can select whether traffic classes are express or preemptible.

Tested on DWMAC CORE 5.10a

Signed-off-by: Furong Xu <0x1207@gmail.com>
---
 .../net/ethernet/stmicro/stmmac/dwmac4_core.c |  2 +
 drivers/net/ethernet/stmicro/stmmac/dwmac5.c  | 12 ++++
 drivers/net/ethernet/stmicro/stmmac/dwmac5.h  |  2 +
 drivers/net/ethernet/stmicro/stmmac/hwif.h    |  8 +++
 .../net/ethernet/stmicro/stmmac/stmmac_main.c |  2 +
 .../net/ethernet/stmicro/stmmac/stmmac_tc.c   | 61 +++++++++++++++++++
 6 files changed, 87 insertions(+)

Comments

Vladimir Oltean Aug. 1, 2024, 11:07 p.m. UTC | #1
On Wed, Jul 31, 2024 at 06:43:13PM +0800, Furong Xu wrote:
> tc-mqprio can select whether traffic classes are express or preemptible.
> 
> Tested on DWMAC CORE 5.10a
> 
> Signed-off-by: Furong Xu <0x1207@gmail.com>
> ---
>  .../net/ethernet/stmicro/stmmac/dwmac4_core.c |  2 +
>  drivers/net/ethernet/stmicro/stmmac/dwmac5.c  | 12 ++++
>  drivers/net/ethernet/stmicro/stmmac/dwmac5.h  |  2 +
>  drivers/net/ethernet/stmicro/stmmac/hwif.h    |  8 +++
>  .../net/ethernet/stmicro/stmmac/stmmac_main.c |  2 +
>  .../net/ethernet/stmicro/stmmac/stmmac_tc.c   | 61 +++++++++++++++++++
>  6 files changed, 87 insertions(+)
> 
> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
> index 5d132bada3fe..068859284691 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
> @@ -655,3 +655,15 @@ void dwmac5_fpe_set_add_frag_size(void __iomem *ioaddr, u32 add_frag_size)
>  
>  	writel(value, ioaddr + MTL_FPE_CTRL_STS);
>  }
> +
> +void dwmac5_fpe_set_preemptible_tcs(void __iomem *ioaddr, unsigned long tcs)
> +{
> +	u32 value;
> +
> +	value = readl(ioaddr + MTL_FPE_CTRL_STS);
> +
> +	value &= ~PEC;
> +	value |= FIELD_PREP(PEC, tcs);
> +
> +	writel(value, ioaddr + MTL_FPE_CTRL_STS);
> +}

Watch out here. I think the MTL_FPE_CTRL_STS[PEC] field is per TXQ, but
input from user space is per TC. There's a difference between the 2, that I'll
try to clarify below. But even ignoring the case of multiple TXQs per TC,
there's also the case of reverse TC:TXQ mappings: "num_tc 4 map 0 1 2 3
queues 1@3 1@2 1@1 1@0". When the user then says he wants TC 0 to be
preemptible, he really means TXQ 3. That's how this should be treated.

> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index 9b1cf81c50ea..a5e3316bc410 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -6256,6 +6256,8 @@ static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type,
>  	switch (type) {
>  	case TC_QUERY_CAPS:
>  		return stmmac_tc_query_caps(priv, priv, type_data);
> +	case TC_SETUP_QDISC_MQPRIO:
> +		return stmmac_tc_setup_mqprio(priv, priv, type_data);
>  	case TC_SETUP_BLOCK:
>  		return flow_block_cb_setup_simple(type_data,
>  						  &stmmac_block_cb_list,
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
> index 996f2bcd07a2..494fe2f68300 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
> @@ -1198,6 +1198,13 @@ static int tc_query_caps(struct stmmac_priv *priv,
>  			 struct tc_query_caps_base *base)
>  {
>  	switch (base->type) {
> +	case TC_SETUP_QDISC_MQPRIO: {
> +		struct tc_mqprio_caps *caps = base->caps;
> +
> +		caps->validate_queue_counts = true;
> +
> +		return 0;
> +	}
>  	case TC_SETUP_QDISC_TAPRIO: {
>  		struct tc_taprio_caps *caps = base->caps;
>  
> @@ -1214,6 +1221,59 @@ static int tc_query_caps(struct stmmac_priv *priv,
>  	}
>  }
>  
> +static void stmmac_reset_tc_mqprio(struct net_device *ndev)
> +{
> +	struct stmmac_priv *priv = netdev_priv(ndev);
> +
> +	netdev_reset_tc(ndev);
> +	netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use);
> +
> +	stmmac_fpe_set_preemptible_tcs(priv, priv->ioaddr, 0);
> +}
> +
> +static int tc_setup_mqprio(struct stmmac_priv *priv,
> +			   struct tc_mqprio_qopt_offload *mqprio)
> +{
> +	struct tc_mqprio_qopt *qopt = &mqprio->qopt;
> +	struct net_device *ndev = priv->dev;
> +	int num_stack_tx_queues = 0;
> +	int num_tc = qopt->num_tc;
> +	int offset, count;
> +	int tc, err;
> +
> +	if (!num_tc) {
> +		stmmac_reset_tc_mqprio(ndev);
> +		return 0;
> +	}
> +
> +	err = netdev_set_num_tc(ndev, num_tc);
> +	if (err)
> +		return err;
> +
> +	for (tc = 0; tc < num_tc; tc++) {
> +		offset = qopt->offset[tc];
> +		count = qopt->count[tc];
> +		num_stack_tx_queues += count;
> +
> +		err = netdev_set_tc_queue(ndev, tc, count, offset);
> +		if (err)
> +			goto err_reset_tc;
> +	}

We might have a problem here, and I don't know if I'm well enough
equipped with DWMAC knowledge to help you solve it.

The way I understand mqprio is that it groups TX queues into traffic
classes. All traffic classes are in strict priority relative to each
other (TC 0 having the smallest priority). If multiple TX queues go to
the same traffic class, it is expected that the NIC performs round robin
dequeuing out of them. Then there's a prio_tc_map[], which maps
skb->priority values to traffic classes. On xmit, netdev_pick_tx()
chooses a random TX queue out of those assigned to the computed traffic
class for the packet. This skb_tx_hash() is the software enqueue
counterpart to what the NIC is expected to do in terms of scheduling.
Much of this is said in newer versions of "man tc-mqprio".
https://man7.org/linux/man-pages/man8/tc-mqprio.8.html

Where I was trying to get is that you aren't programming the TC to TXQ
mapping to hardware in any way, and you are accepting any mapping that
the user requests. This isn't okay.

I believe that the DWMAC TX scheduling algorithm is strict priority by
default, with plat->tx_sched_algorithm being configurable through device
tree properties to other values. Then, individual TX queues have the
"snps,priority" device tree property for configuring their scheduling
priority. All of that can go out of sync with what the user thinks he
configures through tc-mqprio, and badly.

Consider "num_tc 2 queues 3@0 2@3". The stack will think that TXQs 0, 1, 2
have one priority, and TXQs 3 and 4 another. But in reality, each TXQ
(say by default) has a priority equal to its index. netdev_pick_tx()
will think it's okay, for a packet belonging to TC0, to select a TXQ
based on hashing between indices 0, 1, 2. But in hardware, the packets
will get sent through TXQs with different priorities, based on nothing
more than pure chance. That is a disaster, and especially noticeable
when the mqprio mapping is applied through taprio, and there is a Qbv
schedule on the port. Some packets will be scheduled on the right time
slots, and some won't.

So the idea here is that you'll either have to experiment with
reprogramming the scheduling algorithm and TXQ priorities on mqprio
offload, or refuse offloading anything that doesn't match what the
hardware was pre-programmed with (through device tree, likely).

> +
> +	err = netif_set_real_num_tx_queues(ndev, num_stack_tx_queues);
> +	if (err)
> +		goto err_reset_tc;
> +
> +	stmmac_fpe_set_preemptible_tcs(priv, priv->ioaddr, mqprio->preemptible_tcs);
> +
> +	return 0;
> +
> +err_reset_tc:
> +	stmmac_reset_tc_mqprio(ndev);
> +
> +	return err;
> +}
diff mbox series

Patch

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index af871cce767e..68cdfd9e7c8c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -1270,6 +1270,7 @@  const struct stmmac_ops dwmac410_ops = {
 	.fpe_irq_status = dwmac5_fpe_irq_status,
 	.fpe_get_add_frag_size = dwmac5_fpe_get_add_frag_size,
 	.fpe_set_add_frag_size = dwmac5_fpe_set_add_frag_size,
+	.fpe_set_preemptible_tcs = dwmac5_fpe_set_preemptible_tcs,
 	.add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr,
 	.del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr,
 	.restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr,
@@ -1324,6 +1325,7 @@  const struct stmmac_ops dwmac510_ops = {
 	.fpe_irq_status = dwmac5_fpe_irq_status,
 	.fpe_get_add_frag_size = dwmac5_fpe_get_add_frag_size,
 	.fpe_set_add_frag_size = dwmac5_fpe_set_add_frag_size,
+	.fpe_set_preemptible_tcs = dwmac5_fpe_set_preemptible_tcs,
 	.add_hw_vlan_rx_fltr = dwmac4_add_hw_vlan_rx_fltr,
 	.del_hw_vlan_rx_fltr = dwmac4_del_hw_vlan_rx_fltr,
 	.restore_hw_vlan_rx_fltr = dwmac4_restore_hw_vlan_rx_fltr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index 5d132bada3fe..068859284691 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -655,3 +655,15 @@  void dwmac5_fpe_set_add_frag_size(void __iomem *ioaddr, u32 add_frag_size)
 
 	writel(value, ioaddr + MTL_FPE_CTRL_STS);
 }
+
+void dwmac5_fpe_set_preemptible_tcs(void __iomem *ioaddr, unsigned long tcs)
+{
+	u32 value;
+
+	value = readl(ioaddr + MTL_FPE_CTRL_STS);
+
+	value &= ~PEC;
+	value |= FIELD_PREP(PEC, tcs);
+
+	writel(value, ioaddr + MTL_FPE_CTRL_STS);
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index 6b1c9d2da308..bce7e88418ac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -40,6 +40,7 @@ 
 #define MAC_PPSx_WIDTH(x)		(0x00000b8c + ((x) * 0x10))
 
 #define MTL_FPE_CTRL_STS		0x00000c90
+#define PEC				GENMASK(15, 8)
 #define AFSZ				GENMASK(1, 0)
 
 #define MTL_RXP_CONTROL_STATUS		0x00000ca0
@@ -114,5 +115,6 @@  void dwmac5_fpe_send_mpacket(void __iomem *ioaddr,
 int dwmac5_fpe_irq_status(void __iomem *ioaddr, struct net_device *dev);
 int dwmac5_fpe_get_add_frag_size(void __iomem *ioaddr);
 void dwmac5_fpe_set_add_frag_size(void __iomem *ioaddr, u32 add_frag_size);
+void dwmac5_fpe_set_preemptible_tcs(void __iomem *ioaddr, unsigned long tcs);
 
 #endif /* __DWMAC5_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index f868ffc3e64f..109699d3df0a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -7,6 +7,7 @@ 
 
 #include <linux/netdevice.h>
 #include <linux/stmmac.h>
+#include <net/pkt_cls.h>
 
 #define stmmac_do_void_callback(__priv, __module, __cname,  __arg0, __args...) \
 ({ \
@@ -427,6 +428,7 @@  struct stmmac_ops {
 	int (*fpe_irq_status)(void __iomem *ioaddr, struct net_device *dev);
 	int (*fpe_get_add_frag_size)(void __iomem *ioaddr);
 	void (*fpe_set_add_frag_size)(void __iomem *ioaddr, u32 add_frag_size);
+	void (*fpe_set_preemptible_tcs)(void __iomem *ioaddr, unsigned long tcs);
 };
 
 #define stmmac_core_init(__priv, __args...) \
@@ -535,6 +537,8 @@  struct stmmac_ops {
 	stmmac_do_callback(__priv, mac, fpe_get_add_frag_size, __args)
 #define stmmac_fpe_set_add_frag_size(__priv, __args...) \
 	stmmac_do_void_callback(__priv, mac, fpe_set_add_frag_size, __args)
+#define stmmac_fpe_set_preemptible_tcs(__priv, __args...) \
+	stmmac_do_void_callback(__priv, mac, fpe_set_preemptible_tcs, __args)
 
 /* PTP and HW Timer helpers */
 struct stmmac_hwtimestamp {
@@ -622,6 +626,8 @@  struct stmmac_tc_ops {
 			 struct tc_etf_qopt_offload *qopt);
 	int (*query_caps)(struct stmmac_priv *priv,
 			  struct tc_query_caps_base *base);
+	int (*setup_mqprio)(struct stmmac_priv *priv,
+			    struct tc_mqprio_qopt_offload *qopt);
 };
 
 #define stmmac_tc_init(__priv, __args...) \
@@ -638,6 +644,8 @@  struct stmmac_tc_ops {
 	stmmac_do_callback(__priv, tc, setup_etf, __args)
 #define stmmac_tc_query_caps(__priv, __args...) \
 	stmmac_do_callback(__priv, tc, query_caps, __args)
+#define stmmac_tc_setup_mqprio(__priv, __args...) \
+	stmmac_do_callback(__priv, tc, setup_mqprio, __args)
 
 struct stmmac_counters;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 9b1cf81c50ea..a5e3316bc410 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -6256,6 +6256,8 @@  static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 	switch (type) {
 	case TC_QUERY_CAPS:
 		return stmmac_tc_query_caps(priv, priv, type_data);
+	case TC_SETUP_QDISC_MQPRIO:
+		return stmmac_tc_setup_mqprio(priv, priv, type_data);
 	case TC_SETUP_BLOCK:
 		return flow_block_cb_setup_simple(type_data,
 						  &stmmac_block_cb_list,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 996f2bcd07a2..494fe2f68300 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -1198,6 +1198,13 @@  static int tc_query_caps(struct stmmac_priv *priv,
 			 struct tc_query_caps_base *base)
 {
 	switch (base->type) {
+	case TC_SETUP_QDISC_MQPRIO: {
+		struct tc_mqprio_caps *caps = base->caps;
+
+		caps->validate_queue_counts = true;
+
+		return 0;
+	}
 	case TC_SETUP_QDISC_TAPRIO: {
 		struct tc_taprio_caps *caps = base->caps;
 
@@ -1214,6 +1221,59 @@  static int tc_query_caps(struct stmmac_priv *priv,
 	}
 }
 
+static void stmmac_reset_tc_mqprio(struct net_device *ndev)
+{
+	struct stmmac_priv *priv = netdev_priv(ndev);
+
+	netdev_reset_tc(ndev);
+	netif_set_real_num_tx_queues(ndev, priv->plat->tx_queues_to_use);
+
+	stmmac_fpe_set_preemptible_tcs(priv, priv->ioaddr, 0);
+}
+
+static int tc_setup_mqprio(struct stmmac_priv *priv,
+			   struct tc_mqprio_qopt_offload *mqprio)
+{
+	struct tc_mqprio_qopt *qopt = &mqprio->qopt;
+	struct net_device *ndev = priv->dev;
+	int num_stack_tx_queues = 0;
+	int num_tc = qopt->num_tc;
+	int offset, count;
+	int tc, err;
+
+	if (!num_tc) {
+		stmmac_reset_tc_mqprio(ndev);
+		return 0;
+	}
+
+	err = netdev_set_num_tc(ndev, num_tc);
+	if (err)
+		return err;
+
+	for (tc = 0; tc < num_tc; tc++) {
+		offset = qopt->offset[tc];
+		count = qopt->count[tc];
+		num_stack_tx_queues += count;
+
+		err = netdev_set_tc_queue(ndev, tc, count, offset);
+		if (err)
+			goto err_reset_tc;
+	}
+
+	err = netif_set_real_num_tx_queues(ndev, num_stack_tx_queues);
+	if (err)
+		goto err_reset_tc;
+
+	stmmac_fpe_set_preemptible_tcs(priv, priv->ioaddr, mqprio->preemptible_tcs);
+
+	return 0;
+
+err_reset_tc:
+	stmmac_reset_tc_mqprio(ndev);
+
+	return err;
+}
+
 const struct stmmac_tc_ops dwmac510_tc_ops = {
 	.init = tc_init,
 	.setup_cls_u32 = tc_setup_cls_u32,
@@ -1222,4 +1282,5 @@  const struct stmmac_tc_ops dwmac510_tc_ops = {
 	.setup_taprio = tc_setup_taprio,
 	.setup_etf = tc_setup_etf,
 	.query_caps = tc_query_caps,
+	.setup_mqprio = tc_setup_mqprio,
 };