diff mbox series

[V1,1/2] bnxt_en: Add TPH support in BNXT driver

Message ID 20241115200412.1340286-2-wei.huang2@amd.com (mailing list archive)
State New
Headers show
Series Enable TPH support in BNXT driver | expand

Commit Message

Wei Huang Nov. 15, 2024, 8:04 p.m. UTC
From: Manoj Panicker <manoj.panicker2@amd.com>

Add TPH support to the Broadcom BNXT device driver. This allows the
driver to utilize TPH functions for retrieving and configuring Steering
Tags when changing interrupt affinity. With compatible NIC firmware,
network traffic will be tagged correctly with Steering Tags, resulting
in significant memory bandwidth savings and other advantages as
demonstrated by real network benchmarks on TPH-capable platforms.

Co-developed-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Co-developed-by: Wei Huang <wei.huang2@amd.com>
Signed-off-by: Wei Huang <wei.huang2@amd.com>
Signed-off-by: Manoj Panicker <manoj.panicker2@amd.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 103 ++++++++++++++++++++++
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |   7 ++
 net/core/netdev_rx_queue.c                |   1 +
 3 files changed, 111 insertions(+)

Comments

Jakub Kicinski Nov. 15, 2024, 10:04 p.m. UTC | #1
On Fri, 15 Nov 2024 14:04:11 -0600 Wei Huang wrote:
> +static void bnxt_irq_affinity_release(struct kref __always_unused *ref)

unused? you're using it now

> +{
> +	struct irq_affinity_notify *notify =
> +		(struct irq_affinity_notify *)
> +		container_of(ref, struct irq_affinity_notify, kref);

this is ugly, and cast is unnecessary.

> +	struct bnxt_irq *irq;
> +
> +	irq = container_of(notify, struct bnxt_irq, affinity_notify);

since you init irq out of line you can as well init notify here

> +	if (pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, 0)) {

You checked this function can sleep, right? Because rtnl_lock()
will sleep.


Bjorn, do you have a strong preference to have a user of the TPH code
merged as part of 6.13?  We're very close to the merge window, I'm not
sure build bots etc. will have enough time to hammer this code.
My weak preference would be to punt these driver changes to 6.14
avoid all the conflicts and risks (unless Linus gives us another week.)
Michael Chan Nov. 15, 2024, 10:19 p.m. UTC | #2
On Fri, Nov 15, 2024 at 2:04 PM Jakub Kicinski <kuba@kernel.org> wrote:
>
> Bjorn, do you have a strong preference to have a user of the TPH code
> merged as part of 6.13?  We're very close to the merge window, I'm not
> sure build bots etc. will have enough time to hammer this code.
> My weak preference would be to punt these driver changes to 6.14
> avoid all the conflicts and risks (unless Linus gives us another week.)

Driver changes going in through net-next for 6.14 sounds good to us.
Bjorn Helgaas Nov. 15, 2024, 10:20 p.m. UTC | #3
On Fri, Nov 15, 2024 at 02:04:34PM -0800, Jakub Kicinski wrote:
> ...
> Bjorn, do you have a strong preference to have a user of the TPH code
> merged as part of 6.13?  We're very close to the merge window, I'm not
> sure build bots etc. will have enough time to hammer this code.
> My weak preference would be to punt these driver changes to 6.14
> avoid all the conflicts and risks (unless Linus gives us another week.)

I do not have a preference.  The PCI core changes are queued for
v6.13, so driver changes will be able to go the normal netdev route
for v6.14.

I agree it seems late to add significant things for v6.13.

Bjorn
Andy Gospodarek Nov. 15, 2024, 10:28 p.m. UTC | #4
On Fri, Nov 15, 2024 at 04:20:38PM -0600, Bjorn Helgaas wrote:
> On Fri, Nov 15, 2024 at 02:04:34PM -0800, Jakub Kicinski wrote:
> > ...
> > Bjorn, do you have a strong preference to have a user of the TPH code
> > merged as part of 6.13?  We're very close to the merge window, I'm not
> > sure build bots etc. will have enough time to hammer this code.
> > My weak preference would be to punt these driver changes to 6.14
> > avoid all the conflicts and risks (unless Linus gives us another week.)
> 
> I do not have a preference.  The PCI core changes are queued for
> v6.13, so driver changes will be able to go the normal netdev route
> for v6.14.
> 
> I agree it seems late to add significant things for v6.13.
> 

Excellent.  Thank you!
diff mbox series

Patch

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6e422e24750a..beabc4b4a913 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -55,6 +55,8 @@ 
 #include <net/page_pool/helpers.h>
 #include <linux/align.h>
 #include <net/netdev_queues.h>
+#include <net/netdev_rx_queue.h>
+#include <linux/pci-tph.h>
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
@@ -10865,6 +10867,81 @@  int bnxt_reserve_rings(struct bnxt *bp, bool irq_re_init)
 	return 0;
 }
 
+static void bnxt_irq_affinity_notify(struct irq_affinity_notify *notify,
+				     const cpumask_t *mask)
+{
+	struct bnxt_irq *irq;
+	u16 tag;
+	int err;
+
+	irq = container_of(notify, struct bnxt_irq, affinity_notify);
+
+	if (!irq->bp->tph_mode)
+		return;
+
+	cpumask_copy(irq->cpu_mask, mask);
+
+	if (pcie_tph_get_cpu_st(irq->bp->pdev, TPH_MEM_TYPE_VM,
+				cpumask_first(irq->cpu_mask), &tag))
+		return;
+
+	if (pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, tag))
+		return;
+
+	rtnl_lock();
+	if (netif_running(irq->bp->dev)) {
+		err = netdev_rx_queue_restart(irq->bp->dev, irq->ring_nr);
+		if (err)
+			netdev_err(irq->bp->dev,
+				   "RX queue restart failed: err=%d\n", err);
+	}
+	rtnl_unlock();
+}
+
+static void bnxt_irq_affinity_release(struct kref __always_unused *ref)
+{
+	struct irq_affinity_notify *notify =
+		(struct irq_affinity_notify *)
+		container_of(ref, struct irq_affinity_notify, kref);
+	struct bnxt_irq *irq;
+
+	irq = container_of(notify, struct bnxt_irq, affinity_notify);
+
+	if (!irq->bp->tph_mode)
+		return;
+
+	if (pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, 0)) {
+		netdev_err(irq->bp->dev,
+			   "Setting ST=0 for MSIX entry %d failed\n",
+			   irq->msix_nr);
+		return;
+	}
+}
+
+static void bnxt_release_irq_notifier(struct bnxt_irq *irq)
+{
+	irq_set_affinity_notifier(irq->vector, NULL);
+}
+
+static void bnxt_register_irq_notifier(struct bnxt *bp, struct bnxt_irq *irq)
+{
+	struct irq_affinity_notify *notify;
+
+	irq->bp = bp;
+
+	/* Nothing to do if TPH is not enabled */
+	if (!bp->tph_mode)
+		return;
+
+	/* Register IRQ affinity notifier */
+	notify = &irq->affinity_notify;
+	notify->irq = irq->vector;
+	notify->notify = bnxt_irq_affinity_notify;
+	notify->release = bnxt_irq_affinity_release;
+
+	irq_set_affinity_notifier(irq->vector, notify);
+}
+
 static void bnxt_free_irq(struct bnxt *bp)
 {
 	struct bnxt_irq *irq;
@@ -10887,11 +10964,18 @@  static void bnxt_free_irq(struct bnxt *bp)
 				free_cpumask_var(irq->cpu_mask);
 				irq->have_cpumask = 0;
 			}
+
+			bnxt_release_irq_notifier(irq);
+
 			free_irq(irq->vector, bp->bnapi[i]);
 		}
 
 		irq->requested = 0;
 	}
+
+	/* Disable TPH support */
+	pcie_disable_tph(bp->pdev);
+	bp->tph_mode = 0;
 }
 
 static int bnxt_request_irq(struct bnxt *bp)
@@ -10911,6 +10995,12 @@  static int bnxt_request_irq(struct bnxt *bp)
 #ifdef CONFIG_RFS_ACCEL
 	rmap = bp->dev->rx_cpu_rmap;
 #endif
+
+	/* Enable TPH support as part of IRQ request */
+	rc = pcie_enable_tph(bp->pdev, PCI_TPH_ST_IV_MODE);
+	if (!rc)
+		bp->tph_mode = PCI_TPH_ST_IV_MODE;
+
 	for (i = 0, j = 0; i < bp->cp_nr_rings; i++) {
 		int map_idx = bnxt_cp_num_to_irq_num(bp, i);
 		struct bnxt_irq *irq = &bp->irq_tbl[map_idx];
@@ -10934,8 +11024,11 @@  static int bnxt_request_irq(struct bnxt *bp)
 
 		if (zalloc_cpumask_var(&irq->cpu_mask, GFP_KERNEL)) {
 			int numa_node = dev_to_node(&bp->pdev->dev);
+			u16 tag;
 
 			irq->have_cpumask = 1;
+			irq->msix_nr = map_idx;
+			irq->ring_nr = i;
 			cpumask_set_cpu(cpumask_local_spread(i, numa_node),
 					irq->cpu_mask);
 			rc = irq_set_affinity_hint(irq->vector, irq->cpu_mask);
@@ -10945,6 +11038,16 @@  static int bnxt_request_irq(struct bnxt *bp)
 					    irq->vector);
 				break;
 			}
+
+			bnxt_register_irq_notifier(bp, irq);
+
+			/* Init ST table entry */
+			if (pcie_tph_get_cpu_st(irq->bp->pdev, TPH_MEM_TYPE_VM,
+						cpumask_first(irq->cpu_mask),
+						&tag))
+				continue;
+
+			pcie_tph_set_st_entry(irq->bp->pdev, irq->msix_nr, tag);
 		}
 	}
 	return rc;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 69231e85140b..641d25646367 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1227,6 +1227,11 @@  struct bnxt_irq {
 	u8		have_cpumask:1;
 	char		name[IFNAMSIZ + BNXT_IRQ_NAME_EXTRA];
 	cpumask_var_t	cpu_mask;
+
+	struct bnxt	*bp;
+	int		msix_nr;
+	int		ring_nr;
+	struct irq_affinity_notify affinity_notify;
 };
 
 #define HWRM_RING_ALLOC_TX	0x1
@@ -2183,6 +2188,8 @@  struct bnxt {
 	struct net_device	*dev;
 	struct pci_dev		*pdev;
 
+	u8			tph_mode;
+
 	atomic_t		intr_sem;
 
 	u32			flags;
diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c
index e217a5838c87..10e95d7b6892 100644
--- a/net/core/netdev_rx_queue.c
+++ b/net/core/netdev_rx_queue.c
@@ -79,3 +79,4 @@  int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
 
 	return err;
 }
+EXPORT_SYMBOL_GPL(netdev_rx_queue_restart);