diff mbox series

[V2] octeontx2-pf: Add egress PFC support

Message ID 20220824060006.1430587-1-sumang@marvell.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [V2] octeontx2-pf: Add egress PFC support | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Suman Ghosh Aug. 24, 2022, 6 a.m. UTC
As of now all transmit queues transmit packets out of same scheduler
queue hierarchy. Due to this PFC frames sent by peer are not handled
properly, either all transmit queues are backpressured or none.
To fix this when user enables PFC for a given priority map relavant
transmit queue to a different scheduler queue hierarcy, so that
backpressure is applied only to the traffic egressing out of that TXQ.

Signed-off-by: Suman Ghosh <sumang@marvell.com>
---
Changes since v1:
- Re-structured code to minimize the use of compile time macro CONFIG_DCB

 .../ethernet/marvell/octeontx2/nic/cn10k.c    |   3 +-
 .../marvell/octeontx2/nic/otx2_common.c       |  57 +++-
 .../marvell/octeontx2/nic/otx2_common.h       |  28 +-
 .../marvell/octeontx2/nic/otx2_dcbnl.c        | 302 ++++++++++++++++++
 .../ethernet/marvell/octeontx2/nic/otx2_pf.c  |  56 +++-
 5 files changed, 429 insertions(+), 17 deletions(-)

Comments

Jakub Kicinski Aug. 25, 2022, 2:37 a.m. UTC | #1
On Wed, 24 Aug 2022 11:30:06 +0530 Suman Ghosh wrote:
> As of now all transmit queues transmit packets out of same scheduler
> queue hierarchy. Due to this PFC frames sent by peer are not handled
> properly, either all transmit queues are backpressured or none.
> To fix this when user enables PFC for a given priority map relavant
> transmit queue to a different scheduler queue hierarcy, so that
> backpressure is applied only to the traffic egressing out of that TXQ.

clang says:

drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c:129:6: warning: variable 'err' set but not used [-Wunused-but-set-variable]
        int err;
            ^
Suman Ghosh Aug. 25, 2022, 4:42 a.m. UTC | #2
>External Email
>
>----------------------------------------------------------------------
>On Wed, 24 Aug 2022 11:30:06 +0530 Suman Ghosh wrote:
>> As of now all transmit queues transmit packets out of same scheduler
>> queue hierarchy. Due to this PFC frames sent by peer are not handled
>> properly, either all transmit queues are backpressured or none.
>> To fix this when user enables PFC for a given priority map relavant
>> transmit queue to a different scheduler queue hierarcy, so that
>> backpressure is applied only to the traffic egressing out of that TXQ.
>
>clang says:
>
>drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c:129:6: warning:
>variable 'err' set but not used [-Wunused-but-set-variable]
>        int err;
>            ^
[Suman] Fixed in patch V3
diff mbox series

Patch

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
index fd4f083c699e..826f691de259 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c
@@ -86,8 +86,7 @@  int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
 	aq->sq.max_sqe_size = NIX_MAXSQESZ_W16; /* 128 byte */
 	aq->sq.cq_ena = 1;
 	aq->sq.ena = 1;
-	/* Only one SMQ is allocated, map all SQ's to that SMQ  */
-	aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
+	aq->sq.smq = otx2_get_smq_idx(pfvf, qidx);
 	aq->sq.smq_rr_weight = mtu_to_dwrr_weight(pfvf, pfvf->tx_max_pktlen);
 	aq->sq.default_chan = pfvf->hw.tx_chan_base;
 	aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index d686c7b6252f..84eb8ce7d1bd 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -586,8 +586,9 @@  void otx2_get_mac_from_af(struct net_device *netdev)
 }
 EXPORT_SYMBOL(otx2_get_mac_from_af);
 
-int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
+int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool txschq_for_pfc)
 {
+	u16 (*schq_list)[MAX_TXSCHQ_PER_FUNC];
 	struct otx2_hw *hw = &pfvf->hw;
 	struct nix_txschq_config *req;
 	u64 schq, parent;
@@ -602,7 +603,13 @@  int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
 	req->lvl = lvl;
 	req->num_regs = 1;
 
-	schq = hw->txschq_list[lvl][0];
+	schq_list = hw->txschq_list;
+#ifdef CONFIG_DCB
+	if (txschq_for_pfc)
+		schq_list = pfvf->pfc_schq_list;
+#endif
+
+	schq = schq_list[lvl][prio];
 	/* Set topology e.t.c configuration */
 	if (lvl == NIX_TXSCH_LVL_SMQ) {
 		req->reg[0] = NIX_AF_SMQX_CFG(schq);
@@ -611,7 +618,7 @@  int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
 				  (0x2ULL << 36);
 		req->num_regs++;
 		/* MDQ config */
-		parent =  hw->txschq_list[NIX_TXSCH_LVL_TL4][0];
+		parent = schq_list[NIX_TXSCH_LVL_TL4][prio];
 		req->reg[1] = NIX_AF_MDQX_PARENT(schq);
 		req->regval[1] = parent << 16;
 		req->num_regs++;
@@ -619,14 +626,14 @@  int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
 		req->reg[2] = NIX_AF_MDQX_SCHEDULE(schq);
 		req->regval[2] =  dwrr_val;
 	} else if (lvl == NIX_TXSCH_LVL_TL4) {
-		parent =  hw->txschq_list[NIX_TXSCH_LVL_TL3][0];
+		parent = schq_list[NIX_TXSCH_LVL_TL3][prio];
 		req->reg[0] = NIX_AF_TL4X_PARENT(schq);
 		req->regval[0] = parent << 16;
 		req->num_regs++;
 		req->reg[1] = NIX_AF_TL4X_SCHEDULE(schq);
 		req->regval[1] = dwrr_val;
 	} else if (lvl == NIX_TXSCH_LVL_TL3) {
-		parent = hw->txschq_list[NIX_TXSCH_LVL_TL2][0];
+		parent = schq_list[NIX_TXSCH_LVL_TL2][prio];
 		req->reg[0] = NIX_AF_TL3X_PARENT(schq);
 		req->regval[0] = parent << 16;
 		req->num_regs++;
@@ -635,11 +642,13 @@  int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
 		if (lvl == hw->txschq_link_cfg_lvl) {
 			req->num_regs++;
 			req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq, hw->tx_link);
-			/* Enable this queue and backpressure */
-			req->regval[2] = BIT_ULL(13) | BIT_ULL(12);
+			/* Enable this queue and backpressure
+			 * and set relative channel
+			 */
+			req->regval[2] = BIT_ULL(13) | BIT_ULL(12) | prio;
 		}
 	} else if (lvl == NIX_TXSCH_LVL_TL2) {
-		parent =  hw->txschq_list[NIX_TXSCH_LVL_TL1][0];
+		parent = schq_list[NIX_TXSCH_LVL_TL1][prio];
 		req->reg[0] = NIX_AF_TL2X_PARENT(schq);
 		req->regval[0] = parent << 16;
 
@@ -650,8 +659,10 @@  int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
 		if (lvl == hw->txschq_link_cfg_lvl) {
 			req->num_regs++;
 			req->reg[2] = NIX_AF_TL3_TL2X_LINKX_CFG(schq, hw->tx_link);
-			/* Enable this queue and backpressure */
-			req->regval[2] = BIT_ULL(13) | BIT_ULL(12);
+			/* Enable this queue and backpressure
+			 * and set relative channel
+			 */
+			req->regval[2] = BIT_ULL(13) | BIT_ULL(12) | prio;
 		}
 	} else if (lvl == NIX_TXSCH_LVL_TL1) {
 		/* Default config for TL1.
@@ -677,6 +688,29 @@  int otx2_txschq_config(struct otx2_nic *pfvf, int lvl)
 	return otx2_sync_mbox_msg(&pfvf->mbox);
 }
 
+int otx2_smq_flush(struct otx2_nic *pfvf, int smq)
+{
+	struct nix_txschq_config *req;
+	int rc;
+
+	mutex_lock(&pfvf->mbox.lock);
+
+	req = otx2_mbox_alloc_msg_nix_txschq_cfg(&pfvf->mbox);
+	if (!req) {
+		mutex_unlock(&pfvf->mbox.lock);
+		return -ENOMEM;
+	}
+
+	req->lvl = NIX_TXSCH_LVL_SMQ;
+	req->reg[0] = NIX_AF_SMQX_CFG(smq);
+	req->regval[0] |= BIT_ULL(49);
+	req->num_regs++;
+
+	rc = otx2_sync_mbox_msg(&pfvf->mbox);
+	mutex_unlock(&pfvf->mbox.lock);
+	return rc;
+}
+
 int otx2_txsch_alloc(struct otx2_nic *pfvf)
 {
 	struct nix_txsch_alloc_req *req;
@@ -806,8 +840,7 @@  int otx2_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura)
 	aq->sq.max_sqe_size = NIX_MAXSQESZ_W16; /* 128 byte */
 	aq->sq.cq_ena = 1;
 	aq->sq.ena = 1;
-	/* Only one SMQ is allocated, map all SQ's to that SMQ  */
-	aq->sq.smq = pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
+	aq->sq.smq = otx2_get_smq_idx(pfvf, qidx);
 	aq->sq.smq_rr_quantum = mtu_to_dwrr_weight(pfvf, pfvf->tx_max_pktlen);
 	aq->sq.default_chan = pfvf->hw.tx_chan_base;
 	aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index b28029cc4316..23948626b1ef 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -40,6 +40,11 @@ 
 
 #define NAME_SIZE                               32
 
+#ifdef CONFIG_DCB
+/* Max priority supported for PFC */
+#define NIX_PF_PFC_PRIO_MAX			8
+#endif
+
 enum arua_mapped_qtypes {
 	AURA_NIX_RQ,
 	AURA_NIX_SQ,
@@ -196,7 +201,7 @@  struct otx2_hw {
 
 	/* NIX */
 	u8			txschq_link_cfg_lvl;
-	u16		txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
+	u16			txschq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
 	u16			matchall_ipolicer;
 	u32			dwrr_mtu;
 
@@ -415,6 +420,8 @@  struct otx2_nic {
 	/* PFC */
 	u8			pfc_en;
 	u8			*queue_to_pfc_map;
+	u16			pfc_schq_list[NIX_TXSCH_LVL_CNT][MAX_TXSCHQ_PER_FUNC];
+	bool			pfc_alloc_status[NIX_PF_PFC_PRIO_MAX];
 #endif
 
 	/* napi event count. It is needed for adaptive irq coalescing. */
@@ -785,6 +792,16 @@  static inline void otx2_dma_unmap_page(struct otx2_nic *pfvf,
 			     dir, DMA_ATTR_SKIP_CPU_SYNC);
 }
 
+static inline u16 otx2_get_smq_idx(struct otx2_nic *pfvf, u16 qidx)
+{
+#ifdef CONFIG_DCB
+	if (pfvf->pfc_alloc_status[qidx])
+		return pfvf->pfc_schq_list[NIX_TXSCH_LVL_SMQ][qidx];
+#endif
+
+	return pfvf->hw.txschq_list[NIX_TXSCH_LVL_SMQ][0];
+}
+
 /* MSI-X APIs */
 void otx2_free_cints(struct otx2_nic *pfvf, int n);
 void otx2_set_cints_affinity(struct otx2_nic *pfvf);
@@ -807,7 +824,7 @@  void otx2_free_aura_ptr(struct otx2_nic *pfvf, int type);
 void otx2_sq_free_sqbs(struct otx2_nic *pfvf);
 int otx2_config_nix(struct otx2_nic *pfvf);
 int otx2_config_nix_queues(struct otx2_nic *pfvf);
-int otx2_txschq_config(struct otx2_nic *pfvf, int lvl);
+int otx2_txschq_config(struct otx2_nic *pfvf, int lvl, int prio, bool pfc_en);
 int otx2_txsch_alloc(struct otx2_nic *pfvf);
 int otx2_txschq_stop(struct otx2_nic *pfvf);
 void otx2_sqb_flush(struct otx2_nic *pfvf);
@@ -888,6 +905,8 @@  bool otx2_xdp_sq_append_pkt(struct otx2_nic *pfvf, u64 iova, int len, u16 qidx);
 u16 otx2_get_max_mtu(struct otx2_nic *pfvf);
 int otx2_handle_ntuple_tc_features(struct net_device *netdev,
 				   netdev_features_t features);
+int otx2_smq_flush(struct otx2_nic *pfvf, int smq);
+
 /* tc support */
 int otx2_init_tc(struct otx2_nic *nic);
 void otx2_shutdown_tc(struct otx2_nic *nic);
@@ -907,5 +926,10 @@  void otx2_dmacflt_update_pfmac_flow(struct otx2_nic *pfvf);
 void otx2_update_bpid_in_rqctx(struct otx2_nic *pfvf, int vlan_prio, int qidx, bool pfc_enable);
 int otx2_config_priority_flow_ctrl(struct otx2_nic *pfvf);
 int otx2_dcbnl_set_ops(struct net_device *dev);
+/* PFC support */
+int otx2_pfc_txschq_config(struct otx2_nic *pfvf);
+int otx2_pfc_txschq_alloc(struct otx2_nic *pfvf);
+int otx2_pfc_txschq_update(struct otx2_nic *pfvf);
+int otx2_pfc_txschq_stop(struct otx2_nic *pfvf);
 #endif
 #endif /* OTX2_COMMON_H */
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
index 723d2506d309..82bd8cd496c5 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_dcbnl.c
@@ -7,6 +7,291 @@ 
 
 #include "otx2_common.h"
 
+static int otx2_check_pfc_config(struct otx2_nic *pfvf)
+{
+	u8 tx_queues = pfvf->hw.tx_queues, prio;
+	u8 pfc_en = pfvf->pfc_en;
+
+	for (prio = 0; prio < NIX_PF_PFC_PRIO_MAX; prio++) {
+		if ((pfc_en & (1 << prio)) &&
+		    prio > tx_queues - 1) {
+			dev_warn(pfvf->dev,
+				 "Increase number of tx queues from %d to %d to support PFC.\n",
+				 tx_queues, prio + 1);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+int otx2_pfc_txschq_config(struct otx2_nic *pfvf)
+{
+	u8 pfc_en, pfc_bit_set;
+	int prio, lvl, err;
+
+	pfc_en = pfvf->pfc_en;
+	for (prio = 0; prio < NIX_PF_PFC_PRIO_MAX; prio++) {
+		pfc_bit_set = pfc_en & (1 << prio);
+
+		/* Either PFC bit is not set
+		 * or tx scheduler is not allocated for the priority
+		 */
+		if (!pfc_bit_set || !pfvf->pfc_alloc_status[prio])
+			continue;
+
+		/* configure the scheduler for the tls*/
+		for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
+			err = otx2_txschq_config(pfvf, lvl, prio, true);
+			if (err) {
+				dev_err(pfvf->dev,
+					"%s configure PFC tx schq for lvl:%d, prio:%d failed!\n",
+					__func__, lvl, prio);
+				return err;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int otx2_pfc_txschq_alloc_one(struct otx2_nic *pfvf, u8 prio)
+{
+	struct nix_txsch_alloc_req *req;
+	struct nix_txsch_alloc_rsp *rsp;
+	int lvl, rc;
+
+	/* Get memory to put this msg */
+	req = otx2_mbox_alloc_msg_nix_txsch_alloc(&pfvf->mbox);
+	if (!req)
+		return -ENOMEM;
+
+	/* Request one schq per level upto max level as configured
+	 * link config level. These rest of the scheduler can be
+	 * same as hw.txschq_list.
+	 */
+	for (lvl = 0; lvl < pfvf->hw.txschq_link_cfg_lvl; lvl++)
+		req->schq[lvl] = 1;
+
+	rc = otx2_sync_mbox_msg(&pfvf->mbox);
+	if (rc)
+		return rc;
+
+	rsp = (struct nix_txsch_alloc_rsp *)
+	      otx2_mbox_get_rsp(&pfvf->mbox.mbox, 0, &req->hdr);
+	if (IS_ERR(rsp))
+		return PTR_ERR(rsp);
+
+	/* Setup transmit scheduler list */
+	for (lvl = 0; lvl < pfvf->hw.txschq_link_cfg_lvl; lvl++) {
+		if (!rsp->schq[lvl])
+			return -ENOSPC;
+
+		pfvf->pfc_schq_list[lvl][prio] = rsp->schq_list[lvl][0];
+	}
+
+	/* Set the Tx schedulers for rest of the levels same as
+	 * hw.txschq_list as those will be common for all.
+	 */
+	for (; lvl < NIX_TXSCH_LVL_CNT; lvl++)
+		pfvf->pfc_schq_list[lvl][prio] = pfvf->hw.txschq_list[lvl][0];
+
+	pfvf->pfc_alloc_status[prio] = true;
+	return 0;
+}
+
+int otx2_pfc_txschq_alloc(struct otx2_nic *pfvf)
+{
+	u8 pfc_en = pfvf->pfc_en;
+	u8 pfc_bit_set;
+	int err, prio;
+
+	for (prio = 0; prio < NIX_PF_PFC_PRIO_MAX; prio++) {
+		pfc_bit_set = pfc_en & (1 << prio);
+
+		if (!pfc_bit_set || pfvf->pfc_alloc_status[prio])
+			continue;
+
+		/* Add new scheduler to the priority */
+		err = otx2_pfc_txschq_alloc_one(pfvf, prio);
+		if (err) {
+			dev_err(pfvf->dev, "%s failed to allocate PFC TX schedulers\n", __func__);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int otx2_pfc_txschq_stop_one(struct otx2_nic *pfvf, u8 prio)
+{
+	struct nix_txsch_free_req *free_req;
+	int err;
+
+	mutex_lock(&pfvf->mbox.lock);
+	/* free PFC TLx nodes */
+	free_req = otx2_mbox_alloc_msg_nix_txsch_free(&pfvf->mbox);
+	if (!free_req) {
+		mutex_unlock(&pfvf->mbox.lock);
+		return -ENOMEM;
+	}
+
+	free_req->flags = TXSCHQ_FREE_ALL;
+	err = otx2_sync_mbox_msg(&pfvf->mbox);
+	mutex_unlock(&pfvf->mbox.lock);
+
+	pfvf->pfc_alloc_status[prio] = false;
+	return 0;
+}
+
+static int otx2_pfc_update_sq_smq_mapping(struct otx2_nic *pfvf, int prio)
+{
+	struct nix_cn10k_aq_enq_req *cn10k_sq_aq;
+	struct net_device *dev = pfvf->netdev;
+	bool if_up = netif_running(dev);
+	struct nix_aq_enq_req *sq_aq;
+
+	if (if_up) {
+		if (pfvf->pfc_alloc_status[prio])
+			netif_tx_stop_all_queues(pfvf->netdev);
+		else
+			netif_tx_stop_queue(netdev_get_tx_queue(dev, prio));
+	}
+
+	if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) {
+		cn10k_sq_aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox);
+		if (!cn10k_sq_aq)
+			return -ENOMEM;
+
+		/* Fill AQ info */
+		cn10k_sq_aq->qidx = prio;
+		cn10k_sq_aq->ctype = NIX_AQ_CTYPE_SQ;
+		cn10k_sq_aq->op = NIX_AQ_INSTOP_WRITE;
+
+		/* Fill fields to update */
+		cn10k_sq_aq->sq.ena = 1;
+		cn10k_sq_aq->sq_mask.ena = 1;
+		cn10k_sq_aq->sq_mask.smq = GENMASK(9, 0);
+		cn10k_sq_aq->sq.smq = otx2_get_smq_idx(pfvf, prio);
+	} else {
+		sq_aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
+		if (!sq_aq)
+			return -ENOMEM;
+
+		/* Fill AQ info */
+		sq_aq->qidx = prio;
+		sq_aq->ctype = NIX_AQ_CTYPE_SQ;
+		sq_aq->op = NIX_AQ_INSTOP_WRITE;
+
+		/* Fill fields to update */
+		sq_aq->sq.ena = 1;
+		sq_aq->sq_mask.ena = 1;
+		sq_aq->sq_mask.smq = GENMASK(8, 0);
+		sq_aq->sq.smq = otx2_get_smq_idx(pfvf, prio);
+	}
+
+	otx2_sync_mbox_msg(&pfvf->mbox);
+
+	if (if_up) {
+		if (pfvf->pfc_alloc_status[prio])
+			netif_tx_start_all_queues(pfvf->netdev);
+		else
+			netif_tx_start_queue(netdev_get_tx_queue(dev, prio));
+	}
+
+	return 0;
+}
+
+int otx2_pfc_txschq_update(struct otx2_nic *pfvf)
+{
+	u8 pfc_en = pfvf->pfc_en, pfc_bit_set;
+	struct mbox *mbox = &pfvf->mbox;
+	bool if_up = netif_running(pfvf->netdev);
+	int err, prio;
+
+	mutex_lock(&mbox->lock);
+	for (prio = 0; prio < NIX_PF_PFC_PRIO_MAX; prio++) {
+		pfc_bit_set = pfc_en & (1 << prio);
+
+		/* tx scheduler was created but user wants to disable now */
+		if (!pfc_bit_set && pfvf->pfc_alloc_status[prio]) {
+			mutex_unlock(&mbox->lock);
+			if (if_up)
+				netif_tx_stop_all_queues(pfvf->netdev);
+
+			otx2_smq_flush(pfvf, pfvf->pfc_schq_list[NIX_TXSCH_LVL_SMQ][prio]);
+			if (if_up)
+				netif_tx_start_all_queues(pfvf->netdev);
+
+			/* delete the schq */
+			err = otx2_pfc_txschq_stop_one(pfvf, prio);
+			if (err) {
+				dev_err(pfvf->dev,
+					"%s failed to stop PFC tx schedulers for priority: %d\n",
+					__func__, prio);
+				return err;
+			}
+
+			mutex_lock(&mbox->lock);
+			goto update_sq_smq_map;
+		}
+
+		/* Either PFC bit is not set
+		 * or Tx scheduler is already mapped for the priority
+		 */
+		if (!pfc_bit_set || pfvf->pfc_alloc_status[prio])
+			continue;
+
+		/* Add new scheduler to the priority */
+		err = otx2_pfc_txschq_alloc_one(pfvf, prio);
+		if (err) {
+			mutex_unlock(&mbox->lock);
+			dev_err(pfvf->dev,
+				"%s failed to allocate PFC tx schedulers for priority: %d\n",
+				__func__, prio);
+			return err;
+		}
+
+update_sq_smq_map:
+		err = otx2_pfc_update_sq_smq_mapping(pfvf, prio);
+		if (err) {
+			mutex_unlock(&mbox->lock);
+			dev_err(pfvf->dev, "%s failed PFC Tx schq sq:%d mapping", __func__, prio);
+			return err;
+		}
+	}
+
+	err = otx2_pfc_txschq_config(pfvf);
+	mutex_unlock(&mbox->lock);
+	if (err)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL(otx2_pfc_txschq_update);
+
+int otx2_pfc_txschq_stop(struct otx2_nic *pfvf)
+{
+	u8 pfc_en, pfc_bit_set;
+	int prio, err;
+
+	pfc_en = pfvf->pfc_en;
+	for (prio = 0; prio < NIX_PF_PFC_PRIO_MAX; prio++) {
+		pfc_bit_set = pfc_en & (1 << prio);
+		if (!pfc_bit_set || !pfvf->pfc_alloc_status[prio])
+			continue;
+
+		/* Delete the existing scheduler */
+		err = otx2_pfc_txschq_stop_one(pfvf, prio);
+		if (err) {
+			dev_err(pfvf->dev, "%s failed to stop PFC TX schedulers\n", __func__);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
 int otx2_config_priority_flow_ctrl(struct otx2_nic *pfvf)
 {
 	struct cgx_pfc_cfg *req;
@@ -128,6 +413,17 @@  static int otx2_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc)
 	/* Save PFC configuration to interface */
 	pfvf->pfc_en = pfc->pfc_en;
 
+	if (pfvf->hw.tx_queues >= NIX_PF_PFC_PRIO_MAX)
+		goto process_pfc;
+
+	/* Check if the PFC configuration can be
+	 * supported by the tx queue configuration
+	 */
+	err = otx2_check_pfc_config(pfvf);
+	if (err)
+		return err;
+
+process_pfc:
 	err = otx2_config_priority_flow_ctrl(pfvf);
 	if (err)
 		return err;
@@ -136,6 +432,12 @@  static int otx2_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc)
 	if (pfc->pfc_en)
 		otx2_nix_config_bp(pfvf, true);
 
+	err = otx2_pfc_txschq_update(pfvf);
+	if (err) {
+		dev_err(pfvf->dev, "%s failed to update TX schedulers\n", __func__);
+		return err;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 9376d0e62914..a365be5f99a5 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1389,18 +1389,40 @@  static int otx2_init_hw_resources(struct otx2_nic *pf)
 		goto err_free_sq_ptrs;
 	}
 
+#ifdef CONFIG_DCB
+	if (pf->pfc_en) {
+		err = otx2_pfc_txschq_alloc(pf);
+		if (err) {
+			mutex_unlock(&mbox->lock);
+			goto err_free_sq_ptrs;
+		}
+	}
+#endif
+
 	err = otx2_config_nix_queues(pf);
 	if (err) {
 		mutex_unlock(&mbox->lock);
 		goto err_free_txsch;
 	}
+
 	for (lvl = 0; lvl < NIX_TXSCH_LVL_CNT; lvl++) {
-		err = otx2_txschq_config(pf, lvl);
+		err = otx2_txschq_config(pf, lvl, 0, false);
+		if (err) {
+			mutex_unlock(&mbox->lock);
+			goto err_free_nix_queues;
+		}
+	}
+
+#ifdef CONFIG_DCB
+	if (pf->pfc_en) {
+		err = otx2_pfc_txschq_config(pf);
 		if (err) {
 			mutex_unlock(&mbox->lock);
 			goto err_free_nix_queues;
 		}
 	}
+#endif
+
 	mutex_unlock(&mbox->lock);
 	return err;
 
@@ -1455,6 +1477,11 @@  static void otx2_free_hw_resources(struct otx2_nic *pf)
 	if (err)
 		dev_err(pf->dev, "RVUPF: Failed to stop/free TX schedulers\n");
 
+#ifdef CONFIG_DCB
+	if (pf->pfc_en)
+		otx2_pfc_txschq_stop(pf);
+#endif
+
 	mutex_lock(&mbox->lock);
 	/* Disable backpressure */
 	if (!(pf->pcifunc & RVU_PFVF_FUNC_MASK))
@@ -1853,6 +1880,32 @@  static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev)
 	return NETDEV_TX_OK;
 }
 
+static u16 otx2_select_queue(struct net_device *netdev, struct sk_buff *skb,
+			     struct net_device *sb_dev)
+{
+	struct otx2_nic *pf = netdev_priv(netdev);
+#ifdef CONFIG_DCB
+	u8 vlan_prio;
+#endif
+	int txq;
+
+#ifdef CONFIG_DCB
+	if (!skb->vlan_present)
+		goto pick_tx;
+
+	vlan_prio = skb->vlan_tci >> 13;
+	if ((vlan_prio > pf->hw.tx_queues - 1) ||
+	    !pf->pfc_alloc_status[vlan_prio])
+		goto pick_tx;
+
+	return vlan_prio;
+
+pick_tx:
+#endif
+	txq = netdev_pick_tx(netdev, skb, NULL);
+	return txq;
+}
+
 static netdev_features_t otx2_fix_features(struct net_device *dev,
 					   netdev_features_t features)
 {
@@ -2447,6 +2500,7 @@  static const struct net_device_ops otx2_netdev_ops = {
 	.ndo_open		= otx2_open,
 	.ndo_stop		= otx2_stop,
 	.ndo_start_xmit		= otx2_xmit,
+	.ndo_select_queue	= otx2_select_queue,
 	.ndo_fix_features	= otx2_fix_features,
 	.ndo_set_mac_address    = otx2_set_mac_address,
 	.ndo_change_mtu		= otx2_change_mtu,