diff mbox series

[net-next,2/5] net: dpaa: eliminate NR_CPUS dependency in egress_fqs[] and conf_fqs[]

Message ID 20240710230025.46487-3-vladimir.oltean@nxp.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Eliminate CONFIG_NR_CPUS dependency in dpaa-eth and enable COMPILE_TEST in fsl_qbman | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 8 this patch: 8
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 158 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Vladimir Oltean July 10, 2024, 11 p.m. UTC
The driver uses the DPAA_TC_TXQ_NUM and DPAA_ETH_TXQ_NUM macros for TX
queue handling, and they depend on CONFIG_NR_CPUS.

In generic .config files, these can go to very large (8096 CPUs) values
for the systems that DPAA1 is integrated in (1-24 CPUs). We allocate a
lot of resources that will never be used. Those are:
- system memory
- QMan FQIDs as managed by qman_alloc_fqid_range(). This is especially
  painful since currently, when booting with CONFIG_NR_CPUS=8096, a
  LS1046A-RDB system will only manage to probe 3 of its 6 interfaces.
  The rest will run out of FQD ("/reserved-memory/qman-fqd" in the
  device tree) and fail at the qman_create_fq() stage of the probing
  process.
- netdev queues as alloc_etherdev_mq() argument. The high queue indices
  are simply hidden from the network stack after the call to
  netif_set_real_num_tx_queues().

With just a tiny bit more effort, we can replace the NR_CPUS
compile-time constant with the num_possible_cpus() run-time constant,
and dynamically allocate the egress_fqs[] and conf_fqs[] arrays.
Even on a system with a high CONFIG_NR_CPUS, num_possible_cpus() will
remain equal to the number of available cores on the SoC.

The replacement is as follows:
- DPAA_TC_TXQ_NUM -> dpaa_num_txqs_per_tc()
- DPAA_ETH_TXQ_NUM -> dpaa_max_num_txqs()

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
---
 .../net/ethernet/freescale/dpaa/dpaa_eth.c    | 43 +++++++++++++------
 .../net/ethernet/freescale/dpaa/dpaa_eth.h    | 20 ++++++---
 2 files changed, 43 insertions(+), 20 deletions(-)

Comments

Jakub Kicinski July 13, 2024, 10:35 p.m. UTC | #1
On Thu, 11 Jul 2024 02:00:22 +0300 Vladimir Oltean wrote:
> +	priv->egress_fqs = devm_kcalloc(dev, dpaa_max_num_txqs(),
> +					sizeof(*priv->egress_fqs),
> +					GFP_KERNEL);
> +	if (!priv->egress_fqs)
> +		goto free_netdev;
> +
> +	priv->conf_fqs = devm_kcalloc(dev, dpaa_max_num_txqs(),
> +				      sizeof(*priv->conf_fqs),
> +				      GFP_KERNEL);
> +	if (!priv->conf_fqs)
> +		goto free_netdev;

Gotta set err before jumping
Vladimir Oltean July 13, 2024, 10:37 p.m. UTC | #2
On Sat, Jul 13, 2024 at 03:35:32PM -0700, Jakub Kicinski wrote:
> On Thu, 11 Jul 2024 02:00:22 +0300 Vladimir Oltean wrote:
> > +	priv->egress_fqs = devm_kcalloc(dev, dpaa_max_num_txqs(),
> > +					sizeof(*priv->egress_fqs),
> > +					GFP_KERNEL);
> > +	if (!priv->egress_fqs)
> > +		goto free_netdev;
> > +
> > +	priv->conf_fqs = devm_kcalloc(dev, dpaa_max_num_txqs(),
> > +				      sizeof(*priv->conf_fqs),
> > +				      GFP_KERNEL);
> > +	if (!priv->conf_fqs)
> > +		goto free_netdev;
> 
> Gotta set err before jumping
> -- 
> pw-bot: cr

Good point. Thanks for the review.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index c856b556929d..7b0317020c89 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -371,6 +371,7 @@  static int dpaa_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
 			 void *type_data)
 {
 	struct dpaa_priv *priv = netdev_priv(net_dev);
+	int num_txqs_per_tc = dpaa_num_txqs_per_tc();
 	struct tc_mqprio_qopt *mqprio = type_data;
 	u8 num_tc;
 	int i;
@@ -398,12 +399,12 @@  static int dpaa_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
 	netdev_set_num_tc(net_dev, num_tc);
 
 	for (i = 0; i < num_tc; i++)
-		netdev_set_tc_queue(net_dev, i, DPAA_TC_TXQ_NUM,
-				    i * DPAA_TC_TXQ_NUM);
+		netdev_set_tc_queue(net_dev, i, num_txqs_per_tc,
+				    i * num_txqs_per_tc);
 
 out:
 	priv->num_tc = num_tc ? : 1;
-	netif_set_real_num_tx_queues(net_dev, priv->num_tc * DPAA_TC_TXQ_NUM);
+	netif_set_real_num_tx_queues(net_dev, priv->num_tc * num_txqs_per_tc);
 	return 0;
 }
 
@@ -649,7 +650,7 @@  static inline void dpaa_assign_wq(struct dpaa_fq *fq, int idx)
 		fq->wq = 6;
 		break;
 	case FQ_TYPE_TX:
-		switch (idx / DPAA_TC_TXQ_NUM) {
+		switch (idx / dpaa_num_txqs_per_tc()) {
 		case 0:
 			/* Low priority (best effort) */
 			fq->wq = 6;
@@ -667,8 +668,8 @@  static inline void dpaa_assign_wq(struct dpaa_fq *fq, int idx)
 			fq->wq = 0;
 			break;
 		default:
-			WARN(1, "Too many TX FQs: more than %d!\n",
-			     DPAA_ETH_TXQ_NUM);
+			WARN(1, "Too many TX FQs: more than %zu!\n",
+			     dpaa_max_num_txqs());
 		}
 		break;
 	default:
@@ -740,7 +741,8 @@  static int dpaa_alloc_all_fqs(struct device *dev, struct list_head *list,
 
 	port_fqs->rx_pcdq = &dpaa_fq[0];
 
-	if (!dpaa_fq_alloc(dev, 0, DPAA_ETH_TXQ_NUM, list, FQ_TYPE_TX_CONF_MQ))
+	if (!dpaa_fq_alloc(dev, 0, dpaa_max_num_txqs(), list,
+			   FQ_TYPE_TX_CONF_MQ))
 		goto fq_alloc_failed;
 
 	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_TX_ERROR);
@@ -755,7 +757,7 @@  static int dpaa_alloc_all_fqs(struct device *dev, struct list_head *list,
 
 	port_fqs->tx_defq = &dpaa_fq[0];
 
-	if (!dpaa_fq_alloc(dev, 0, DPAA_ETH_TXQ_NUM, list, FQ_TYPE_TX))
+	if (!dpaa_fq_alloc(dev, 0, dpaa_max_num_txqs(), list, FQ_TYPE_TX))
 		goto fq_alloc_failed;
 
 	return 0;
@@ -972,7 +974,7 @@  static int dpaa_fq_setup(struct dpaa_priv *priv,
 			/* If we have more Tx queues than the number of cores,
 			 * just ignore the extra ones.
 			 */
-			if (egress_cnt < DPAA_ETH_TXQ_NUM)
+			if (egress_cnt < dpaa_max_num_txqs())
 				priv->egress_fqs[egress_cnt++] = &fq->fq_base;
 			break;
 		case FQ_TYPE_TX_CONF_MQ:
@@ -992,12 +994,12 @@  static int dpaa_fq_setup(struct dpaa_priv *priv,
 	}
 
 	 /* Make sure all CPUs receive a corresponding Tx queue. */
-	while (egress_cnt < DPAA_ETH_TXQ_NUM) {
+	while (egress_cnt < dpaa_max_num_txqs()) {
 		list_for_each_entry(fq, &priv->dpaa_fq_list, list) {
 			if (fq->fq_type != FQ_TYPE_TX)
 				continue;
 			priv->egress_fqs[egress_cnt++] = &fq->fq_base;
-			if (egress_cnt == DPAA_ETH_TXQ_NUM)
+			if (egress_cnt == dpaa_max_num_txqs())
 				break;
 		}
 	}
@@ -1012,7 +1014,7 @@  static inline int dpaa_tx_fq_to_id(const struct dpaa_priv *priv,
 {
 	int i;
 
-	for (i = 0; i < DPAA_ETH_TXQ_NUM; i++)
+	for (i = 0; i < dpaa_max_num_txqs(); i++)
 		if (priv->egress_fqs[i] == tx_fq)
 			return i;
 
@@ -3332,7 +3334,7 @@  static int dpaa_eth_probe(struct platform_device *pdev)
 	/* Allocate this early, so we can store relevant information in
 	 * the private area
 	 */
-	net_dev = alloc_etherdev_mq(sizeof(*priv), DPAA_ETH_TXQ_NUM);
+	net_dev = alloc_etherdev_mq(sizeof(*priv), dpaa_max_num_txqs());
 	if (!net_dev) {
 		dev_err(dev, "alloc_etherdev_mq() failed\n");
 		return -ENOMEM;
@@ -3347,6 +3349,18 @@  static int dpaa_eth_probe(struct platform_device *pdev)
 
 	priv->msg_enable = netif_msg_init(debug, DPAA_MSG_DEFAULT);
 
+	priv->egress_fqs = devm_kcalloc(dev, dpaa_max_num_txqs(),
+					sizeof(*priv->egress_fqs),
+					GFP_KERNEL);
+	if (!priv->egress_fqs)
+		goto free_netdev;
+
+	priv->conf_fqs = devm_kcalloc(dev, dpaa_max_num_txqs(),
+				      sizeof(*priv->conf_fqs),
+				      GFP_KERNEL);
+	if (!priv->conf_fqs)
+		goto free_netdev;
+
 	mac_dev = dpaa_mac_dev_get(pdev);
 	if (IS_ERR(mac_dev)) {
 		netdev_err(net_dev, "dpaa_mac_dev_get() failed\n");
@@ -3472,7 +3486,8 @@  static int dpaa_eth_probe(struct platform_device *pdev)
 	}
 
 	priv->num_tc = 1;
-	netif_set_real_num_tx_queues(net_dev, priv->num_tc * DPAA_TC_TXQ_NUM);
+	netif_set_real_num_tx_queues(net_dev,
+				     priv->num_tc * dpaa_num_txqs_per_tc());
 
 	/* Initialize NAPI */
 	err = dpaa_napi_add(net_dev);
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
index ac3c8ed57bbe..7ed659eb08de 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
@@ -18,10 +18,6 @@ 
 
 /* Number of prioritised traffic classes */
 #define DPAA_TC_NUM		4
-/* Number of Tx queues per traffic class */
-#define DPAA_TC_TXQ_NUM		NR_CPUS
-/* Total number of Tx queues */
-#define DPAA_ETH_TXQ_NUM	(DPAA_TC_NUM * DPAA_TC_TXQ_NUM)
 
 /* More detailed FQ types - used for fine-grained WQ assignments */
 enum dpaa_fq_type {
@@ -142,8 +138,8 @@  struct dpaa_priv {
 	struct mac_device *mac_dev;
 	struct device *rx_dma_dev;
 	struct device *tx_dma_dev;
-	struct qman_fq *egress_fqs[DPAA_ETH_TXQ_NUM];
-	struct qman_fq *conf_fqs[DPAA_ETH_TXQ_NUM];
+	struct qman_fq **egress_fqs;
+	struct qman_fq **conf_fqs;
 
 	u16 channel;
 	struct list_head dpaa_fq_list;
@@ -185,4 +181,16 @@  extern const struct ethtool_ops dpaa_ethtool_ops;
 /* from dpaa_eth_sysfs.c */
 void dpaa_eth_sysfs_remove(struct device *dev);
 void dpaa_eth_sysfs_init(struct device *dev);
+
+static inline size_t dpaa_num_txqs_per_tc(void)
+{
+	return num_possible_cpus();
+}
+
+/* Total number of Tx queues */
+static inline size_t dpaa_max_num_txqs(void)
+{
+	return DPAA_TC_NUM * dpaa_num_txqs_per_tc();
+}
+
 #endif	/* __DPAA_H */