Message ID | 20250206103750.36064-6-mengyuanlou@net-swift.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | add sriov support for wangxun NICs | expand |
On Thu, 6 Feb 2025 18:37:49 +0800 mengyuanlou wrote: > static irqreturn_t ngbe_msix_other(int __always_unused irq, void *data) > { > - struct wx *wx = data; > + struct wx_q_vector *q_vector; > + struct wx *wx = data; > + u32 eicr; > > - /* re-enable the original interrupt state, no lsc, no queues */ > - if (netif_running(wx->netdev)) > - ngbe_irq_enable(wx, false); > + q_vector = wx->q_vector[0]; > + > + eicr = wx_misc_isb(wx, WX_ISB_MISC); > + > + if (eicr & NGBE_PX_MISC_IC_VF_MBOX) > + wx_msg_task(wx); > + > + if (wx->num_vfs == 7) { > + napi_schedule_irqoff(&q_vector->napi); > + ngbe_irq_enable(wx, true); > + } else { > + /* re-enable the original interrupt state, no lsc, no queues */ > + if (netif_running(wx->netdev)) > + ngbe_irq_enable(wx, false); > + } > > return IRQ_HANDLED; > } You need to explain in the commit message what's happening here. IDK why the num_vfs == 7 is special. Also why are you now scheduling NAPI from a misc MSI-X handler?
> 2025年2月8日 09:19,Jakub Kicinski <kuba@kernel.org> 写道: > > On Thu, 6 Feb 2025 18:37:49 +0800 mengyuanlou wrote: >> static irqreturn_t ngbe_msix_other(int __always_unused irq, void *data) >> { >> - struct wx *wx = data; >> + struct wx_q_vector *q_vector; >> + struct wx *wx = data; >> + u32 eicr; >> >> - /* re-enable the original interrupt state, no lsc, no queues */ >> - if (netif_running(wx->netdev)) >> - ngbe_irq_enable(wx, false); >> + q_vector = wx->q_vector[0]; >> + >> + eicr = wx_misc_isb(wx, WX_ISB_MISC); >> + >> + if (eicr & NGBE_PX_MISC_IC_VF_MBOX) >> + wx_msg_task(wx); >> + >> + if (wx->num_vfs == 7) { >> + napi_schedule_irqoff(&q_vector->napi); >> + ngbe_irq_enable(wx, true); >> + } else { >> + /* re-enable the original interrupt state, no lsc, no queues */ >> + if (netif_running(wx->netdev)) >> + ngbe_irq_enable(wx, false); >> + } >> >> return IRQ_HANDLED; >> } > > You need to explain in the commit message what's happening here. > > IDK why the num_vfs == 7 is special. Due to hardware design, when 6 vfs are assigned. +------------------------------------------------------------+ | | pf | pf | vf5 | vf4 | vf3 | vf2 | vf1 | vf0 | pf | |--------|----|-----|-----|-----|-----|-----|-----|-----|----| | vector | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | +------------------------------------------------------------+ When 7 vfs are assigned. +------------------------------------------------------------+ | | pf | vf6 | vf5 | vf4 | vf3 | vf2 | vf1 | vf0 | pf | |--------|----|-----|-----|-----|-----|-----|-----|-----|----| | vector | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | +------------------------------------------------------------+ When num_vfs < 7, pf can use 0 for misc and 1 for queue. But when num_vfs == 7, vector 1 is assigned to vf6. 1. Alloc 9 irq vectors, but only request_irq for 0 and 8. 2. Reuse interrupt vector 0. > Also why are you now scheduling NAPI from a misc MSI-X handler? The code used the second. Misc and queue reuse interrupt vector 0. > -- > pw-bot: cr >
On Tue, 11 Feb 2025 19:14:54 +0800 mengyuanlou@net-swift.com wrote: > Due to hardware design, when 6 vfs are assigned. > +------------------------------------------------------------+ > | | pf | pf | vf5 | vf4 | vf3 | vf2 | vf1 | vf0 | pf | > |--------|----|-----|-----|-----|-----|-----|-----|-----|----| > | vector | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | > +------------------------------------------------------------+ > > When 7 vfs are assigned. > +------------------------------------------------------------+ > | | pf | vf6 | vf5 | vf4 | vf3 | vf2 | vf1 | vf0 | pf | > |--------|----|-----|-----|-----|-----|-----|-----|-----|----| > | vector | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | > +------------------------------------------------------------+ > > When num_vfs < 7, pf can use 0 for misc and 1 for queue. > But when num_vfs == 7, vector 1 is assigned to vf6. > 1. Alloc 9 irq vectors, but only request_irq for 0 and 8. > 2. Reuse interrupt vector 0. Do you have proper synchronization in place to make sure IRQs don't get mis-routed when SR-IOV is enabled? The goal should be to make sure the right handler is register for the IRQ, or at least do the muxing earlier in a safe fashion. Not decide that it was a packet IRQ half way thru a function called ngbe_msix_other
> 2025年2月12日 06:06,Jakub Kicinski <kuba@kernel.org> 写道: > > On Tue, 11 Feb 2025 19:14:54 +0800 mengyuanlou@net-swift.com wrote: >> Due to hardware design, when 6 vfs are assigned. >> +------------------------------------------------------------+ >> | | pf | pf | vf5 | vf4 | vf3 | vf2 | vf1 | vf0 | pf | >> |--------|----|-----|-----|-----|-----|-----|-----|-----|----| >> | vector | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | >> +------------------------------------------------------------+ >> >> When 7 vfs are assigned. >> +------------------------------------------------------------+ >> | | pf | vf6 | vf5 | vf4 | vf3 | vf2 | vf1 | vf0 | pf | >> |--------|----|-----|-----|-----|-----|-----|-----|-----|----| >> | vector | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | >> +------------------------------------------------------------+ >> >> When num_vfs < 7, pf can use 0 for misc and 1 for queue. >> But when num_vfs == 7, vector 1 is assigned to vf6. >> 1. Alloc 9 irq vectors, but only request_irq for 0 and 8. >> 2. Reuse interrupt vector 0. > > Do you have proper synchronization in place to make sure IRQs > don't get mis-routed when SR-IOV is enabled? + q_vector = wx->q_vector[0]; + + eicr = wx_misc_isb(wx, WX_ISB_MISC); + + if (eicr & NGBE_PX_MISC_IC_VF_MBOX) + wx_msg_task(wx); + + if (wx->num_vfs == 7) { + napi_schedule_irqoff(&q_vector->napi); + ngbe_irq_enable(wx, true); + } else { + /* re-enable the original interrupt state, no lsc, no queues */ + if (netif_running(wx->netdev)) + ngbe_irq_enable(wx, false); + } if (wx->num_vfs == 7) { if (!eicr) { napi_schedule_irqoff(&q_vector->napi); ngbe_irq_enable(wx, true); } else { if (netif_running(wx->netdev)) ngbe_irq_enable(wx, false); } } else { /* re-enable the original interrupt state, no lsc, no queues */ if (netif_running(wx->netdev)) ngbe_irq_enable(wx, false); } Use eicr to determine whether it is an msic interrupt or a queue interrupt. > The goal should be to make sure the right handler is register > for the IRQ, or at least do the muxing earlier in a safe fashion. > Not decide that it was a packet IRQ half way thru a function called > ngbe_msix_other Whether the first way(Alloc 9 irq vectors, but only request_irq for 0 and 8) can be better than reuse vector 0 in the real? >
On Wed, 12 Feb 2025 19:06:52 +0800 mengyuanlou@net-swift.com wrote: > > The goal should be to make sure the right handler is register > > for the IRQ, or at least do the muxing earlier in a safe fashion. > > Not decide that it was a packet IRQ half way thru a function called > > ngbe_msix_other > > Whether the first way(Alloc 9 irq vectors, but only request_irq for 0 > and 8) can be better than reuse vector 0 in the real? I don't understand, sorry.
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c index 2abac0d75939..744692fb53fa 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c @@ -865,3 +865,34 @@ void wx_msg_task(struct wx *wx) } } EXPORT_SYMBOL(wx_msg_task); + +void wx_disable_vf_rx_tx(struct wx *wx) +{ + wr32(wx, WX_TDM_VFTE_CLR(0), 0); + wr32(wx, WX_RDM_VFRE_CLR(0), 0); + if (wx->mac.type == wx_mac_sp) { + wr32(wx, WX_TDM_VFTE_CLR(1), 0); + wr32(wx, WX_RDM_VFRE_CLR(1), 0); + } +} +EXPORT_SYMBOL(wx_disable_vf_rx_tx); + +void wx_ping_all_vfs_with_link_status(struct wx *wx, bool link_up) +{ + u32 msgbuf[2] = {0, 0}; + u16 i; + + if (!wx->num_vfs) + return; + msgbuf[0] = WX_PF_NOFITY_VF_LINK_STATUS | WX_PF_CONTROL_MSG; + if (link_up) + msgbuf[1] = (wx->speed << 1) | link_up; + if (wx->notify_down) + msgbuf[1] |= WX_PF_NOFITY_VF_NET_NOT_RUNNING; + for (i = 0 ; i < wx->num_vfs; i++) { + if (wx->vfinfo[i].clear_to_send) + msgbuf[0] |= WX_VT_MSGTYPE_CTS; + wx_write_mbx_pf(wx, msgbuf, 2, i); + } +} +EXPORT_SYMBOL(wx_ping_all_vfs_with_link_status); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.h b/drivers/net/ethernet/wangxun/libwx/wx_sriov.h index 5d1486f92dee..3cbec7fb51bc 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_sriov.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.h @@ -7,5 +7,7 @@ void wx_disable_sriov(struct wx *wx); int wx_pci_sriov_configure(struct pci_dev *pdev, int num_vfs); void wx_msg_task(struct wx *wx); +void wx_disable_vf_rx_tx(struct wx *wx); +void wx_ping_all_vfs_with_link_status(struct wx *wx, bool link_up); #endif /* _WX_SRIOV_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 84dabedb7e78..7d085b1ffd94 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -90,6 +90,7 @@ /************************* Port Registers ************************************/ /* port cfg Registers */ #define WX_CFG_PORT_CTL 0x14400 +#define WX_CFG_PORT_CTL_PFRSTD BIT(14) #define WX_CFG_PORT_CTL_DRV_LOAD BIT(3) #define WX_CFG_PORT_CTL_QINQ BIT(2) #define WX_CFG_PORT_CTL_D_VLAN BIT(0) /* double vlan*/ @@ -1145,6 +1146,7 @@ struct wx { enum wx_reset_type reset_type; /* PHY stuff */ + bool notify_down; unsigned int link; int speed; int duplex; diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index 53aeae2f884b..5233558cba51 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -14,6 +14,8 @@ #include "../libwx/wx_type.h" #include "../libwx/wx_hw.h" #include "../libwx/wx_lib.h" +#include "../libwx/wx_mbx.h" +#include "../libwx/wx_sriov.h" #include "ngbe_type.h" #include "ngbe_mdio.h" #include "ngbe_hw.h" @@ -128,6 +130,10 @@ static int ngbe_sw_init(struct wx *wx) wx->tx_work_limit = NGBE_DEFAULT_TX_WORK; wx->rx_work_limit = NGBE_DEFAULT_RX_WORK; + wx->mbx.size = WX_VXMAILBOX_SIZE; + wx->setup_tc = ngbe_setup_tc; + set_bit(0, &wx->fwd_bitmask); + return 0; } @@ -197,11 +203,25 @@ static irqreturn_t ngbe_intr(int __always_unused irq, void *data) static irqreturn_t ngbe_msix_other(int __always_unused irq, void *data) { - struct wx *wx = data; + struct wx_q_vector *q_vector; + struct wx *wx = data; + u32 eicr; - /* re-enable the original interrupt state, no lsc, no queues */ - if (netif_running(wx->netdev)) - ngbe_irq_enable(wx, false); + q_vector = wx->q_vector[0]; + + eicr = wx_misc_isb(wx, WX_ISB_MISC); + + if (eicr & NGBE_PX_MISC_IC_VF_MBOX) + wx_msg_task(wx); + + if (wx->num_vfs == 7) { + napi_schedule_irqoff(&q_vector->napi); + ngbe_irq_enable(wx, true); + } else { + /* re-enable the original interrupt state, no lsc, no queues */ + if (netif_running(wx->netdev)) + ngbe_irq_enable(wx, false); + } return IRQ_HANDLED; } @@ -291,6 +311,22 @@ static void ngbe_disable_device(struct wx *wx) struct net_device *netdev = wx->netdev; u32 i; + if (wx->num_vfs) { + /* Clear EITR Select mapping */ + wr32(wx, WX_PX_ITRSEL, 0); + + /* Mark all the VFs as inactive */ + for (i = 0 ; i < wx->num_vfs; i++) + wx->vfinfo[i].clear_to_send = 0; + wx->notify_down = true; + /* ping all the active vfs to let them know we are going down */ + wx_ping_all_vfs_with_link_status(wx, false); + wx->notify_down = false; + + /* Disable all VFTE/VFRE TX/RX */ + wx_disable_vf_rx_tx(wx); + } + /* disable all enabled rx queues */ for (i = 0; i < wx->num_rx_queues; i++) /* this call also flushes the previous write */ @@ -313,10 +349,17 @@ static void ngbe_disable_device(struct wx *wx) wx_update_stats(wx); } +static void ngbe_reset(struct wx *wx) +{ + wx_flush_sw_mac_table(wx); + wx_mac_set_default_filter(wx, wx->mac.addr); +} + void ngbe_down(struct wx *wx) { phylink_stop(wx->phylink); ngbe_disable_device(wx); + ngbe_reset(wx); wx_clean_all_tx_rings(wx); wx_clean_all_rx_rings(wx); } @@ -339,6 +382,11 @@ void ngbe_up(struct wx *wx) ngbe_sfp_modules_txrx_powerctl(wx, true); phylink_start(wx->phylink); + /* Set PF Reset Done bit so PF/VF Mail Ops can work */ + wr32m(wx, WX_CFG_PORT_CTL, + WX_CFG_PORT_CTL_PFRSTD, WX_CFG_PORT_CTL_PFRSTD); + if (wx->num_vfs) + wx_ping_all_vfs_with_link_status(wx, false); } /** @@ -578,6 +626,10 @@ static int ngbe_probe(struct pci_dev *pdev, goto err_pci_release_regions; } + /* The emerald supports up to 8 VFs per pf, but physical + * function also need one pool for basic networking. + */ + pci_sriov_set_totalvfs(pdev, NGBE_MAX_VFS_DRV_LIMIT); wx->driver_name = ngbe_driver_name; ngbe_set_ethtool_ops(netdev); netdev->netdev_ops = &ngbe_netdev_ops; @@ -725,6 +777,7 @@ static void ngbe_remove(struct pci_dev *pdev) struct net_device *netdev; netdev = wx->netdev; + wx_disable_sriov(wx); unregister_netdev(netdev); phylink_destroy(wx->phylink); pci_release_selected_regions(pdev, @@ -784,6 +837,7 @@ static struct pci_driver ngbe_driver = { .suspend = ngbe_suspend, .resume = ngbe_resume, .shutdown = ngbe_shutdown, + .sriov_configure = wx_pci_sriov_configure, }; module_pci_driver(ngbe_driver); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c index a5e9b779c44d..d44204f7e12a 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c @@ -8,6 +8,7 @@ #include "../libwx/wx_type.h" #include "../libwx/wx_hw.h" +#include "../libwx/wx_sriov.h" #include "ngbe_type.h" #include "ngbe_mdio.h" @@ -64,6 +65,11 @@ static void ngbe_mac_config(struct phylink_config *config, unsigned int mode, static void ngbe_mac_link_down(struct phylink_config *config, unsigned int mode, phy_interface_t interface) { + struct wx *wx = phylink_to_wx(config); + + wx->speed = 0; + /* ping all the active vfs to let them know we are going down */ + wx_ping_all_vfs_with_link_status(wx, false); } static void ngbe_mac_link_up(struct phylink_config *config, @@ -103,6 +109,10 @@ static void ngbe_mac_link_up(struct phylink_config *config, wr32(wx, WX_MAC_PKT_FLT, WX_MAC_PKT_FLT_PR); reg = rd32(wx, WX_MAC_WDG_TIMEOUT); wr32(wx, WX_MAC_WDG_TIMEOUT, reg); + + wx->speed = speed; + /* ping all the active vfs to let them know we are going up */ + wx_ping_all_vfs_with_link_status(wx, true); } static const struct phylink_mac_ops ngbe_mac_ops = { diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h index f48ed7fc1805..2b1302c73a16 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h @@ -72,11 +72,13 @@ #define NGBE_PX_MISC_IEN_DEV_RST BIT(10) #define NGBE_PX_MISC_IEN_ETH_LK BIT(18) #define NGBE_PX_MISC_IEN_INT_ERR BIT(20) +#define NGBE_PX_MISC_IC_VF_MBOX BIT(23) #define NGBE_PX_MISC_IEN_GPIO BIT(26) #define NGBE_PX_MISC_IEN_MASK ( \ NGBE_PX_MISC_IEN_DEV_RST | \ NGBE_PX_MISC_IEN_ETH_LK | \ NGBE_PX_MISC_IEN_INT_ERR | \ + NGBE_PX_MISC_IC_VF_MBOX | \ NGBE_PX_MISC_IEN_GPIO) #define NGBE_INTR_ALL 0x1FF @@ -129,6 +131,7 @@ #define NGBE_MAX_RXD 8192 #define NGBE_MIN_RXD 128 +#define NGBE_MAX_VFS_DRV_LIMIT 7 extern char ngbe_driver_name[]; void ngbe_down(struct wx *wx);