diff mbox series

[net-next,v7,5/6] net: ngbe: add sriov function support

Message ID 20250206103750.36064-6-mengyuanlou@net-swift.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series add sriov support for wangxun NICs | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 6 maintainers not CCed: andrew+netdev@lunn.ch linux@armlinux.org.uk edumazet@google.com jacob.e.keller@intel.com horms@kernel.org pabeni@redhat.com
netdev/build_clang success Errors and warnings before: 17 this patch: 17
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 224 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 8 this patch: 8
netdev/source_inline success Was 0 now: 0

Commit Message

mengyuanlou@net-swift.com Feb. 6, 2025, 10:37 a.m. UTC
From: Mengyuan Lou <mengyuanlou@net-swift.com>

Add sriov_configure for driver ops.
Add mailbox handler wx_msg_task for ngbe in
the interrupt handler.
Add the notification flow when the vfs exist.

Signed-off-by: Mengyuan Lou <mengyuanlou@net-swift.com>
---
 drivers/net/ethernet/wangxun/libwx/wx_sriov.c | 31 ++++++++++
 drivers/net/ethernet/wangxun/libwx/wx_sriov.h |  2 +
 drivers/net/ethernet/wangxun/libwx/wx_type.h  |  2 +
 drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 62 +++++++++++++++++--
 drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c | 10 +++
 drivers/net/ethernet/wangxun/ngbe/ngbe_type.h |  3 +
 6 files changed, 106 insertions(+), 4 deletions(-)

Comments

Jakub Kicinski Feb. 8, 2025, 1:19 a.m. UTC | #1
On Thu,  6 Feb 2025 18:37:49 +0800 mengyuanlou wrote:
>  static irqreturn_t ngbe_msix_other(int __always_unused irq, void *data)
>  {
> -	struct wx *wx = data;
> +	struct wx_q_vector *q_vector;
> +	struct wx *wx  = data;
> +	u32 eicr;
>  
> -	/* re-enable the original interrupt state, no lsc, no queues */
> -	if (netif_running(wx->netdev))
> -		ngbe_irq_enable(wx, false);
> +	q_vector = wx->q_vector[0];
> +
> +	eicr = wx_misc_isb(wx, WX_ISB_MISC);
> +
> +	if (eicr & NGBE_PX_MISC_IC_VF_MBOX)
> +		wx_msg_task(wx);
> +
> +	if (wx->num_vfs == 7) {
> +		napi_schedule_irqoff(&q_vector->napi);
> +		ngbe_irq_enable(wx, true);
> +	} else {
> +		/* re-enable the original interrupt state, no lsc, no queues */
> +		if (netif_running(wx->netdev))
> +			ngbe_irq_enable(wx, false);
> +	}
>  
>  	return IRQ_HANDLED;
>  }

You need to explain in the commit message what's happening here.

IDK why the num_vfs == 7 is special.
Also why are you now scheduling NAPI from a misc MSI-X handler?
mengyuanlou@net-swift.com Feb. 11, 2025, 11:14 a.m. UTC | #2
> 2025年2月8日 09:19,Jakub Kicinski <kuba@kernel.org> 写道:
> 
> On Thu,  6 Feb 2025 18:37:49 +0800 mengyuanlou wrote:
>> static irqreturn_t ngbe_msix_other(int __always_unused irq, void *data)
>> {
>> - struct wx *wx = data;
>> + struct wx_q_vector *q_vector;
>> + struct wx *wx  = data;
>> + u32 eicr;
>> 
>> - /* re-enable the original interrupt state, no lsc, no queues */
>> - if (netif_running(wx->netdev))
>> - ngbe_irq_enable(wx, false);
>> + q_vector = wx->q_vector[0];
>> +
>> + eicr = wx_misc_isb(wx, WX_ISB_MISC);
>> +
>> + if (eicr & NGBE_PX_MISC_IC_VF_MBOX)
>> + wx_msg_task(wx);
>> +
>> + if (wx->num_vfs == 7) {
>> + napi_schedule_irqoff(&q_vector->napi);
>> + ngbe_irq_enable(wx, true);
>> + } else {
>> + /* re-enable the original interrupt state, no lsc, no queues */
>> + if (netif_running(wx->netdev))
>> + ngbe_irq_enable(wx, false);
>> + }
>> 
>> return IRQ_HANDLED;
>> }
> 
> You need to explain in the commit message what's happening here.

> 
> IDK why the num_vfs == 7 is special.

Due to hardware design, when 6 vfs are assigned. 
+------------------------------------------------------------+
|        | pf | pf  | vf5 | vf4 | vf3 | vf2 | vf1 | vf0 | pf |
|--------|----|-----|-----|-----|-----|-----|-----|-----|----|
| vector | 0  | 1   | 2   | 3   | 4   | 5   | 6   | 7   | 8  |
+------------------------------------------------------------+

When 7 vfs are assigned. 
+------------------------------------------------------------+
|        | pf | vf6 | vf5 | vf4 | vf3 | vf2 | vf1 | vf0 | pf |
|--------|----|-----|-----|-----|-----|-----|-----|-----|----|
| vector | 0  | 1   | 2   | 3   | 4   | 5   | 6   | 7   | 8  |
+------------------------------------------------------------+

When num_vfs < 7, pf can use 0 for misc and 1 for queue.
But when num_vfs == 7, vector 1 is assigned to vf6.
1. Alloc 9 irq vectors, but only request_irq for 0 and 8. 
2. Reuse interrupt vector 0.


> Also why are you now scheduling NAPI from a misc MSI-X handler?
The code used the second. Misc and queue reuse interrupt vector 0.


> -- 
> pw-bot: cr
>
Jakub Kicinski Feb. 11, 2025, 10:06 p.m. UTC | #3
On Tue, 11 Feb 2025 19:14:54 +0800 mengyuanlou@net-swift.com wrote:
> Due to hardware design, when 6 vfs are assigned. 
> +------------------------------------------------------------+
> |        | pf | pf  | vf5 | vf4 | vf3 | vf2 | vf1 | vf0 | pf |
> |--------|----|-----|-----|-----|-----|-----|-----|-----|----|
> | vector | 0  | 1   | 2   | 3   | 4   | 5   | 6   | 7   | 8  |
> +------------------------------------------------------------+
> 
> When 7 vfs are assigned. 
> +------------------------------------------------------------+
> |        | pf | vf6 | vf5 | vf4 | vf3 | vf2 | vf1 | vf0 | pf |
> |--------|----|-----|-----|-----|-----|-----|-----|-----|----|
> | vector | 0  | 1   | 2   | 3   | 4   | 5   | 6   | 7   | 8  |
> +------------------------------------------------------------+
> 
> When num_vfs < 7, pf can use 0 for misc and 1 for queue.
> But when num_vfs == 7, vector 1 is assigned to vf6.
> 1. Alloc 9 irq vectors, but only request_irq for 0 and 8. 
> 2. Reuse interrupt vector 0.

Do you have proper synchronization in place to make sure IRQs
don't get mis-routed when SR-IOV is enabled?
The goal should be to make sure the right handler is register
for the IRQ, or at least do the muxing earlier in a safe fashion.
Not decide that it was a packet IRQ half way thru a function called
ngbe_msix_other
mengyuanlou@net-swift.com Feb. 12, 2025, 11:06 a.m. UTC | #4
> 2025年2月12日 06:06,Jakub Kicinski <kuba@kernel.org> 写道:
> 
> On Tue, 11 Feb 2025 19:14:54 +0800 mengyuanlou@net-swift.com wrote:
>> Due to hardware design, when 6 vfs are assigned. 
>> +------------------------------------------------------------+
>> |        | pf | pf  | vf5 | vf4 | vf3 | vf2 | vf1 | vf0 | pf |
>> |--------|----|-----|-----|-----|-----|-----|-----|-----|----|
>> | vector | 0  | 1   | 2   | 3   | 4   | 5   | 6   | 7   | 8  |
>> +------------------------------------------------------------+
>> 
>> When 7 vfs are assigned. 
>> +------------------------------------------------------------+
>> |        | pf | vf6 | vf5 | vf4 | vf3 | vf2 | vf1 | vf0 | pf |
>> |--------|----|-----|-----|-----|-----|-----|-----|-----|----|
>> | vector | 0  | 1   | 2   | 3   | 4   | 5   | 6   | 7   | 8  |
>> +------------------------------------------------------------+
>> 
>> When num_vfs < 7, pf can use 0 for misc and 1 for queue.
>> But when num_vfs == 7, vector 1 is assigned to vf6.
>> 1. Alloc 9 irq vectors, but only request_irq for 0 and 8. 
>> 2. Reuse interrupt vector 0.
> 
> Do you have proper synchronization in place to make sure IRQs
> don't get mis-routed when SR-IOV is enabled?

+ 	q_vector = wx->q_vector[0];
+
+ 	eicr = wx_misc_isb(wx, WX_ISB_MISC);
+
+ 	if (eicr & NGBE_PX_MISC_IC_VF_MBOX)
+ 	wx_msg_task(wx);
+
+ 	if (wx->num_vfs == 7) { 
+ 		napi_schedule_irqoff(&q_vector->napi);
+ 		ngbe_irq_enable(wx, true);
+ 	} else {
+ 		/* re-enable the original interrupt state, no lsc, no queues */
+ 		if (netif_running(wx->netdev))
+ 			ngbe_irq_enable(wx, false);
+ 	}

 if (wx->num_vfs == 7) { 
	if (!eicr) {
  		napi_schedule_irqoff(&q_vector->napi);
  		ngbe_irq_enable(wx, true);
	} else {
  		if (netif_running(wx->netdev))
  			ngbe_irq_enable(wx, false);
	}
 } else {
  	/* re-enable the original interrupt state, no lsc, no queues */
 	if (netif_running(wx->netdev))
 		ngbe_irq_enable(wx, false);
 }

Use eicr to determine whether it is an msic interrupt or a queue interrupt.

> The goal should be to make sure the right handler is register
> for the IRQ, or at least do the muxing earlier in a safe fashion.
> Not decide that it was a packet IRQ half way thru a function called
> ngbe_msix_other

Whether the first way(Alloc 9 irq vectors, but only request_irq for 0 and 8) can be better
than reuse vector 0 in the real?

>
Jakub Kicinski Feb. 13, 2025, 2:09 a.m. UTC | #5
On Wed, 12 Feb 2025 19:06:52 +0800 mengyuanlou@net-swift.com wrote:
> > The goal should be to make sure the right handler is register
> > for the IRQ, or at least do the muxing earlier in a safe fashion.
> > Not decide that it was a packet IRQ half way thru a function called
> > ngbe_msix_other  
> 
> Whether the first way(Alloc 9 irq vectors, but only request_irq for 0
> and 8) can be better than reuse vector 0 in the real?

I don't understand, sorry.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c
index 2abac0d75939..744692fb53fa 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c
@@ -865,3 +865,34 @@  void wx_msg_task(struct wx *wx)
 	}
 }
 EXPORT_SYMBOL(wx_msg_task);
+
+void wx_disable_vf_rx_tx(struct wx *wx)
+{
+	wr32(wx, WX_TDM_VFTE_CLR(0), 0);
+	wr32(wx, WX_RDM_VFRE_CLR(0), 0);
+	if (wx->mac.type == wx_mac_sp) {
+		wr32(wx, WX_TDM_VFTE_CLR(1), 0);
+		wr32(wx, WX_RDM_VFRE_CLR(1), 0);
+	}
+}
+EXPORT_SYMBOL(wx_disable_vf_rx_tx);
+
+void wx_ping_all_vfs_with_link_status(struct wx *wx, bool link_up)
+{
+	u32 msgbuf[2] = {0, 0};
+	u16 i;
+
+	if (!wx->num_vfs)
+		return;
+	msgbuf[0] = WX_PF_NOFITY_VF_LINK_STATUS | WX_PF_CONTROL_MSG;
+	if (link_up)
+		msgbuf[1] = (wx->speed << 1) | link_up;
+	if (wx->notify_down)
+		msgbuf[1] |= WX_PF_NOFITY_VF_NET_NOT_RUNNING;
+	for (i = 0 ; i < wx->num_vfs; i++) {
+		if (wx->vfinfo[i].clear_to_send)
+			msgbuf[0] |= WX_VT_MSGTYPE_CTS;
+		wx_write_mbx_pf(wx, msgbuf, 2, i);
+	}
+}
+EXPORT_SYMBOL(wx_ping_all_vfs_with_link_status);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.h b/drivers/net/ethernet/wangxun/libwx/wx_sriov.h
index 5d1486f92dee..3cbec7fb51bc 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_sriov.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.h
@@ -7,5 +7,7 @@ 
 void wx_disable_sriov(struct wx *wx);
 int wx_pci_sriov_configure(struct pci_dev *pdev, int num_vfs);
 void wx_msg_task(struct wx *wx);
+void wx_disable_vf_rx_tx(struct wx *wx);
+void wx_ping_all_vfs_with_link_status(struct wx *wx, bool link_up);
 
 #endif /* _WX_SRIOV_H_ */
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 84dabedb7e78..7d085b1ffd94 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -90,6 +90,7 @@ 
 /************************* Port Registers ************************************/
 /* port cfg Registers */
 #define WX_CFG_PORT_CTL              0x14400
+#define WX_CFG_PORT_CTL_PFRSTD       BIT(14)
 #define WX_CFG_PORT_CTL_DRV_LOAD     BIT(3)
 #define WX_CFG_PORT_CTL_QINQ         BIT(2)
 #define WX_CFG_PORT_CTL_D_VLAN       BIT(0) /* double vlan*/
@@ -1145,6 +1146,7 @@  struct wx {
 	enum wx_reset_type reset_type;
 
 	/* PHY stuff */
+	bool notify_down;
 	unsigned int link;
 	int speed;
 	int duplex;
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index 53aeae2f884b..5233558cba51 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -14,6 +14,8 @@ 
 #include "../libwx/wx_type.h"
 #include "../libwx/wx_hw.h"
 #include "../libwx/wx_lib.h"
+#include "../libwx/wx_mbx.h"
+#include "../libwx/wx_sriov.h"
 #include "ngbe_type.h"
 #include "ngbe_mdio.h"
 #include "ngbe_hw.h"
@@ -128,6 +130,10 @@  static int ngbe_sw_init(struct wx *wx)
 	wx->tx_work_limit = NGBE_DEFAULT_TX_WORK;
 	wx->rx_work_limit = NGBE_DEFAULT_RX_WORK;
 
+	wx->mbx.size = WX_VXMAILBOX_SIZE;
+	wx->setup_tc = ngbe_setup_tc;
+	set_bit(0, &wx->fwd_bitmask);
+
 	return 0;
 }
 
@@ -197,11 +203,25 @@  static irqreturn_t ngbe_intr(int __always_unused irq, void *data)
 
 static irqreturn_t ngbe_msix_other(int __always_unused irq, void *data)
 {
-	struct wx *wx = data;
+	struct wx_q_vector *q_vector;
+	struct wx *wx  = data;
+	u32 eicr;
 
-	/* re-enable the original interrupt state, no lsc, no queues */
-	if (netif_running(wx->netdev))
-		ngbe_irq_enable(wx, false);
+	q_vector = wx->q_vector[0];
+
+	eicr = wx_misc_isb(wx, WX_ISB_MISC);
+
+	if (eicr & NGBE_PX_MISC_IC_VF_MBOX)
+		wx_msg_task(wx);
+
+	if (wx->num_vfs == 7) {
+		napi_schedule_irqoff(&q_vector->napi);
+		ngbe_irq_enable(wx, true);
+	} else {
+		/* re-enable the original interrupt state, no lsc, no queues */
+		if (netif_running(wx->netdev))
+			ngbe_irq_enable(wx, false);
+	}
 
 	return IRQ_HANDLED;
 }
@@ -291,6 +311,22 @@  static void ngbe_disable_device(struct wx *wx)
 	struct net_device *netdev = wx->netdev;
 	u32 i;
 
+	if (wx->num_vfs) {
+		/* Clear EITR Select mapping */
+		wr32(wx, WX_PX_ITRSEL, 0);
+
+		/* Mark all the VFs as inactive */
+		for (i = 0 ; i < wx->num_vfs; i++)
+			wx->vfinfo[i].clear_to_send = 0;
+		wx->notify_down = true;
+		/* ping all the active vfs to let them know we are going down */
+		wx_ping_all_vfs_with_link_status(wx, false);
+		wx->notify_down = false;
+
+		/* Disable all VFTE/VFRE TX/RX */
+		wx_disable_vf_rx_tx(wx);
+	}
+
 	/* disable all enabled rx queues */
 	for (i = 0; i < wx->num_rx_queues; i++)
 		/* this call also flushes the previous write */
@@ -313,10 +349,17 @@  static void ngbe_disable_device(struct wx *wx)
 	wx_update_stats(wx);
 }
 
+static void ngbe_reset(struct wx *wx)
+{
+	wx_flush_sw_mac_table(wx);
+	wx_mac_set_default_filter(wx, wx->mac.addr);
+}
+
 void ngbe_down(struct wx *wx)
 {
 	phylink_stop(wx->phylink);
 	ngbe_disable_device(wx);
+	ngbe_reset(wx);
 	wx_clean_all_tx_rings(wx);
 	wx_clean_all_rx_rings(wx);
 }
@@ -339,6 +382,11 @@  void ngbe_up(struct wx *wx)
 		ngbe_sfp_modules_txrx_powerctl(wx, true);
 
 	phylink_start(wx->phylink);
+	/* Set PF Reset Done bit so PF/VF Mail Ops can work */
+	wr32m(wx, WX_CFG_PORT_CTL,
+	      WX_CFG_PORT_CTL_PFRSTD, WX_CFG_PORT_CTL_PFRSTD);
+	if (wx->num_vfs)
+		wx_ping_all_vfs_with_link_status(wx, false);
 }
 
 /**
@@ -578,6 +626,10 @@  static int ngbe_probe(struct pci_dev *pdev,
 		goto err_pci_release_regions;
 	}
 
+	/* The emerald supports up to 8 VFs per pf, but physical
+	 * function also need one pool for basic networking.
+	 */
+	pci_sriov_set_totalvfs(pdev, NGBE_MAX_VFS_DRV_LIMIT);
 	wx->driver_name = ngbe_driver_name;
 	ngbe_set_ethtool_ops(netdev);
 	netdev->netdev_ops = &ngbe_netdev_ops;
@@ -725,6 +777,7 @@  static void ngbe_remove(struct pci_dev *pdev)
 	struct net_device *netdev;
 
 	netdev = wx->netdev;
+	wx_disable_sriov(wx);
 	unregister_netdev(netdev);
 	phylink_destroy(wx->phylink);
 	pci_release_selected_regions(pdev,
@@ -784,6 +837,7 @@  static struct pci_driver ngbe_driver = {
 	.suspend  = ngbe_suspend,
 	.resume   = ngbe_resume,
 	.shutdown = ngbe_shutdown,
+	.sriov_configure = wx_pci_sriov_configure,
 };
 
 module_pci_driver(ngbe_driver);
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
index a5e9b779c44d..d44204f7e12a 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c
@@ -8,6 +8,7 @@ 
 
 #include "../libwx/wx_type.h"
 #include "../libwx/wx_hw.h"
+#include "../libwx/wx_sriov.h"
 #include "ngbe_type.h"
 #include "ngbe_mdio.h"
 
@@ -64,6 +65,11 @@  static void ngbe_mac_config(struct phylink_config *config, unsigned int mode,
 static void ngbe_mac_link_down(struct phylink_config *config,
 			       unsigned int mode, phy_interface_t interface)
 {
+	struct wx *wx = phylink_to_wx(config);
+
+	wx->speed = 0;
+	/* ping all the active vfs to let them know we are going down */
+	wx_ping_all_vfs_with_link_status(wx, false);
 }
 
 static void ngbe_mac_link_up(struct phylink_config *config,
@@ -103,6 +109,10 @@  static void ngbe_mac_link_up(struct phylink_config *config,
 	wr32(wx, WX_MAC_PKT_FLT, WX_MAC_PKT_FLT_PR);
 	reg = rd32(wx, WX_MAC_WDG_TIMEOUT);
 	wr32(wx, WX_MAC_WDG_TIMEOUT, reg);
+
+	wx->speed = speed;
+	/* ping all the active vfs to let them know we are going up */
+	wx_ping_all_vfs_with_link_status(wx, true);
 }
 
 static const struct phylink_mac_ops ngbe_mac_ops = {
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
index f48ed7fc1805..2b1302c73a16 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
@@ -72,11 +72,13 @@ 
 #define NGBE_PX_MISC_IEN_DEV_RST		BIT(10)
 #define NGBE_PX_MISC_IEN_ETH_LK			BIT(18)
 #define NGBE_PX_MISC_IEN_INT_ERR		BIT(20)
+#define NGBE_PX_MISC_IC_VF_MBOX			BIT(23)
 #define NGBE_PX_MISC_IEN_GPIO			BIT(26)
 #define NGBE_PX_MISC_IEN_MASK ( \
 				NGBE_PX_MISC_IEN_DEV_RST | \
 				NGBE_PX_MISC_IEN_ETH_LK | \
 				NGBE_PX_MISC_IEN_INT_ERR | \
+				NGBE_PX_MISC_IC_VF_MBOX | \
 				NGBE_PX_MISC_IEN_GPIO)
 
 #define NGBE_INTR_ALL				0x1FF
@@ -129,6 +131,7 @@ 
 #define NGBE_MAX_RXD				8192
 #define NGBE_MIN_RXD				128
 
+#define NGBE_MAX_VFS_DRV_LIMIT			7
 extern char ngbe_driver_name[];
 
 void ngbe_down(struct wx *wx);