Message ID | 20240711-b4-igb_zero_copy-v6-4-4bfb68773b18@linutronix.de (mailing list archive) |
---|---|
State | Awaiting Upstream |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | igb: Add support for AF_XDP zero-copy | expand |
On Fri, Aug 16, 2024 at 11:24:03AM +0200, Kurt Kanzenbach wrote: > From: Sriram Yagnaraman <sriram.yagnaraman@est.tech> > > Add the following ring flag: > - IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup) > > Add a xdp_buff array for use with XSK receive batch API, and a pointer > to xsk_pool in igb_adapter. > > Add enable/disable functions for TX and RX rings. > Add enable/disable functions for XSK pool. > Add xsk wakeup function. > > None of the above functionality will be active until > NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev->xdp_features. > > Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> Sriram's mail bounces unfortunately, is it possible to grab his current address? You could also update the copyright date in igb_xsk.c. Besides: Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com> > [Kurt: Add READ/WRITE_ONCE(), synchronize_net(), > remove IGB_RING_FLAG_AF_XDP_ZC] > Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de> > --- > drivers/net/ethernet/intel/igb/Makefile | 2 +- > drivers/net/ethernet/intel/igb/igb.h | 13 +- > drivers/net/ethernet/intel/igb/igb_main.c | 9 ++ > drivers/net/ethernet/intel/igb/igb_xsk.c | 207 ++++++++++++++++++++++++++++++ > 4 files changed, 229 insertions(+), 2 deletions(-) > > diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile > index 463c0d26b9d4..6c1b702fd992 100644 > --- a/drivers/net/ethernet/intel/igb/Makefile > +++ b/drivers/net/ethernet/intel/igb/Makefile > @@ -8,4 +8,4 @@ obj-$(CONFIG_IGB) += igb.o > > igb-y := igb_main.o igb_ethtool.o e1000_82575.o \ > e1000_mac.o e1000_nvm.o e1000_phy.o e1000_mbx.o \ > - e1000_i210.o igb_ptp.o igb_hwmon.o > + e1000_i210.o igb_ptp.o igb_hwmon.o igb_xsk.o > diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h > index dbba193241b9..8db5b44b4576 100644 > --- a/drivers/net/ethernet/intel/igb/igb.h > +++ b/drivers/net/ethernet/intel/igb/igb.h > @@ -20,6 +20,7 @@ > #include <linux/mdio.h> > > #include <net/xdp.h> > +#include <net/xdp_sock_drv.h> > > struct igb_adapter; > > @@ -320,6 +321,7 @@ struct igb_ring { > union { /* array of buffer info structs */ > struct igb_tx_buffer *tx_buffer_info; > struct igb_rx_buffer *rx_buffer_info; > + struct xdp_buff **rx_buffer_info_zc; > }; > void *desc; /* descriptor ring memory */ > unsigned long flags; /* ring specific flags */ > @@ -357,6 +359,7 @@ struct igb_ring { > }; > }; > struct xdp_rxq_info xdp_rxq; > + struct xsk_buff_pool *xsk_pool; > } ____cacheline_internodealigned_in_smp; > > struct igb_q_vector { > @@ -384,7 +387,8 @@ enum e1000_ring_flags_t { > IGB_RING_FLAG_RX_SCTP_CSUM, > IGB_RING_FLAG_RX_LB_VLAN_BSWAP, > IGB_RING_FLAG_TX_CTX_IDX, > - IGB_RING_FLAG_TX_DETECT_HANG > + IGB_RING_FLAG_TX_DETECT_HANG, > + IGB_RING_FLAG_TX_DISABLED > }; > > #define ring_uses_large_buffer(ring) \ > @@ -820,4 +824,11 @@ int igb_add_mac_steering_filter(struct igb_adapter *adapter, > int igb_del_mac_steering_filter(struct igb_adapter *adapter, > const u8 *addr, u8 queue, u8 flags); > > +struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter, > + struct igb_ring *ring); > +int igb_xsk_pool_setup(struct igb_adapter *adapter, > + struct xsk_buff_pool *pool, > + u16 qid); > +int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags); > + > #endif /* _IGB_H_ */ > diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c > index db1598876424..9234c768a600 100644 > --- a/drivers/net/ethernet/intel/igb/igb_main.c > +++ b/drivers/net/ethernet/intel/igb/igb_main.c > @@ -2904,9 +2904,14 @@ static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf) > > static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp) > { > + struct igb_adapter *adapter = netdev_priv(dev); > + > switch (xdp->command) { > case XDP_SETUP_PROG: > return igb_xdp_setup(dev, xdp); > + case XDP_SETUP_XSK_POOL: > + return igb_xsk_pool_setup(adapter, xdp->xsk.pool, > + xdp->xsk.queue_id); > default: > return -EINVAL; > } > @@ -3035,6 +3040,7 @@ static const struct net_device_ops igb_netdev_ops = { > .ndo_setup_tc = igb_setup_tc, > .ndo_bpf = igb_xdp, > .ndo_xdp_xmit = igb_xdp_xmit, > + .ndo_xsk_wakeup = igb_xsk_wakeup, > }; > > /** > @@ -4357,6 +4363,8 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, > u64 tdba = ring->dma; > int reg_idx = ring->reg_idx; > > + WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring)); > + > wr32(E1000_TDLEN(reg_idx), > ring->count * sizeof(union e1000_adv_tx_desc)); > wr32(E1000_TDBAL(reg_idx), > @@ -4752,6 +4760,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, > xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); > WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, > MEM_TYPE_PAGE_SHARED, NULL)); > + WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring)); > > /* disable the queue */ > wr32(E1000_RXDCTL(reg_idx), 0); > diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c > new file mode 100644 > index 000000000000..7b632be3e7e3 > --- /dev/null > +++ b/drivers/net/ethernet/intel/igb/igb_xsk.c > @@ -0,0 +1,207 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright(c) 2018 Intel Corporation. */ > + > +#include <linux/bpf_trace.h> > +#include <net/xdp_sock_drv.h> > +#include <net/xdp.h> > + > +#include "e1000_hw.h" > +#include "igb.h" > + > +static int igb_realloc_rx_buffer_info(struct igb_ring *ring, bool pool_present) > +{ > + int size = pool_present ? > + sizeof(*ring->rx_buffer_info_zc) * ring->count : > + sizeof(*ring->rx_buffer_info) * ring->count; > + void *buff_info = vmalloc(size); > + > + if (!buff_info) > + return -ENOMEM; > + > + if (pool_present) { > + vfree(ring->rx_buffer_info); > + ring->rx_buffer_info = NULL; > + ring->rx_buffer_info_zc = buff_info; > + } else { > + vfree(ring->rx_buffer_info_zc); > + ring->rx_buffer_info_zc = NULL; > + ring->rx_buffer_info = buff_info; > + } > + > + return 0; > +} > + > +static void igb_txrx_ring_disable(struct igb_adapter *adapter, u16 qid) > +{ > + struct igb_ring *tx_ring = adapter->tx_ring[qid]; > + struct igb_ring *rx_ring = adapter->rx_ring[qid]; > + struct e1000_hw *hw = &adapter->hw; > + > + set_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags); > + > + wr32(E1000_TXDCTL(tx_ring->reg_idx), 0); > + wr32(E1000_RXDCTL(rx_ring->reg_idx), 0); > + > + synchronize_net(); > + > + /* Rx/Tx share the same napi context. */ > + napi_disable(&rx_ring->q_vector->napi); > + > + igb_clean_tx_ring(tx_ring); > + igb_clean_rx_ring(rx_ring); > + > + memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats)); > + memset(&tx_ring->tx_stats, 0, sizeof(tx_ring->tx_stats)); > +} > + > +static void igb_txrx_ring_enable(struct igb_adapter *adapter, u16 qid) > +{ > + struct igb_ring *tx_ring = adapter->tx_ring[qid]; > + struct igb_ring *rx_ring = adapter->rx_ring[qid]; > + > + igb_configure_tx_ring(adapter, tx_ring); > + igb_configure_rx_ring(adapter, rx_ring); > + > + synchronize_net(); > + > + clear_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags); > + > + /* call igb_desc_unused which always leaves > + * at least 1 descriptor unused to make sure > + * next_to_use != next_to_clean > + */ > + igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring)); > + > + /* Rx/Tx share the same napi context. */ > + napi_enable(&rx_ring->q_vector->napi); > +} > + > +struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter, > + struct igb_ring *ring) > +{ > + int qid = ring->queue_index; > + struct xsk_buff_pool *pool; > + > + pool = xsk_get_pool_from_qid(adapter->netdev, qid); > + > + if (!igb_xdp_is_enabled(adapter)) > + return NULL; > + > + return (pool && pool->dev) ? pool : NULL; > +} > + > +static int igb_xsk_pool_enable(struct igb_adapter *adapter, > + struct xsk_buff_pool *pool, > + u16 qid) > +{ > + struct net_device *netdev = adapter->netdev; > + struct igb_ring *rx_ring; > + bool if_running; > + int err; > + > + if (qid >= adapter->num_rx_queues) > + return -EINVAL; > + > + if (qid >= netdev->real_num_rx_queues || > + qid >= netdev->real_num_tx_queues) > + return -EINVAL; > + > + err = xsk_pool_dma_map(pool, &adapter->pdev->dev, IGB_RX_DMA_ATTR); > + if (err) > + return err; > + > + rx_ring = adapter->rx_ring[qid]; > + if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter); > + if (if_running) > + igb_txrx_ring_disable(adapter, qid); > + > + if (if_running) { > + err = igb_realloc_rx_buffer_info(rx_ring, true); > + if (!err) { > + igb_txrx_ring_enable(adapter, qid); > + /* Kick start the NAPI context so that receiving will start */ > + err = igb_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX); > + } > + > + if (err) { > + xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR); > + return err; > + } > + } > + > + return 0; > +} > + > +static int igb_xsk_pool_disable(struct igb_adapter *adapter, u16 qid) > +{ > + struct xsk_buff_pool *pool; > + struct igb_ring *rx_ring; > + bool if_running; > + int err; > + > + pool = xsk_get_pool_from_qid(adapter->netdev, qid); > + if (!pool) > + return -EINVAL; > + > + rx_ring = adapter->rx_ring[qid]; > + if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter); > + if (if_running) > + igb_txrx_ring_disable(adapter, qid); > + > + xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR); > + > + if (if_running) { > + err = igb_realloc_rx_buffer_info(rx_ring, false); > + if (err) > + return err; > + > + igb_txrx_ring_enable(adapter, qid); > + } > + > + return 0; > +} > + > +int igb_xsk_pool_setup(struct igb_adapter *adapter, > + struct xsk_buff_pool *pool, > + u16 qid) > +{ > + return pool ? igb_xsk_pool_enable(adapter, pool, qid) : > + igb_xsk_pool_disable(adapter, qid); > +} > + > +int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) > +{ > + struct igb_adapter *adapter = netdev_priv(dev); > + struct e1000_hw *hw = &adapter->hw; > + struct igb_ring *ring; > + u32 eics = 0; > + > + if (test_bit(__IGB_DOWN, &adapter->state)) > + return -ENETDOWN; > + > + if (!igb_xdp_is_enabled(adapter)) > + return -EINVAL; > + > + if (qid >= adapter->num_tx_queues) > + return -EINVAL; > + > + ring = adapter->tx_ring[qid]; > + > + if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags)) > + return -ENETDOWN; > + > + if (!READ_ONCE(ring->xsk_pool)) > + return -EINVAL; > + > + if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) { > + /* Cause software interrupt */ > + if (adapter->flags & IGB_FLAG_HAS_MSIX) { > + eics |= ring->q_vector->eims_value; > + wr32(E1000_EICS, eics); > + } else { > + wr32(E1000_ICS, E1000_ICS_RXDMT0); > + } > + } > + > + return 0; > +} > > -- > 2.39.2 >
On Mon Aug 19 2024, Maciej Fijalkowski wrote: > On Fri, Aug 16, 2024 at 11:24:03AM +0200, Kurt Kanzenbach wrote: >> From: Sriram Yagnaraman <sriram.yagnaraman@est.tech> >> >> Add the following ring flag: >> - IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup) >> >> Add a xdp_buff array for use with XSK receive batch API, and a pointer >> to xsk_pool in igb_adapter. >> >> Add enable/disable functions for TX and RX rings. >> Add enable/disable functions for XSK pool. >> Add xsk wakeup function. >> >> None of the above functionality will be active until >> NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev->xdp_features. >> >> Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> > > Sriram's mail bounces unfortunately, is it possible to grab his current > address? His current email address is in the Cc list. However, i wasn't sure if it's okay to update the From and SoB of these patches? > > You could also update the copyright date in igb_xsk.c. Ditto for the copyright. It probably has to be something like Copyright(c) 2023 Ericsson? Thanks, Kurt
On Mon, Aug 19, 2024 at 03:41:20PM +0200, Kurt Kanzenbach wrote: > On Mon Aug 19 2024, Maciej Fijalkowski wrote: > > On Fri, Aug 16, 2024 at 11:24:03AM +0200, Kurt Kanzenbach wrote: > >> From: Sriram Yagnaraman <sriram.yagnaraman@est.tech> > >> > >> Add the following ring flag: > >> - IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup) > >> > >> Add a xdp_buff array for use with XSK receive batch API, and a pointer > >> to xsk_pool in igb_adapter. > >> > >> Add enable/disable functions for TX and RX rings. > >> Add enable/disable functions for XSK pool. > >> Add xsk wakeup function. > >> > >> None of the above functionality will be active until > >> NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev->xdp_features. > >> > >> Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> > > > > Sriram's mail bounces unfortunately, is it possible to grab his current > > address? > > His current email address is in the Cc list. However, i wasn't sure if > it's okay to update the From and SoB of these patches? Okay. Then I believe Sriram should provide a mailmap entry to map his old mail to a new one. > > > > > You could also update the copyright date in igb_xsk.c. > > Ditto for the copyright. It probably has to be something like > Copyright(c) 2023 Ericsson? It says 2018 Intel. I don't think Sriram was working under E/// employment as he said he was forbidden to work on this further and that's why you picked it up, right? My intent was not stir up the copyright pot, though. It can be left as-is or have something of a Linutronix/Sriram Yagnamaran mix :P > > Thanks, > Kurt
On Mon Aug 19 2024, Maciej Fijalkowski wrote: > On Mon, Aug 19, 2024 at 03:41:20PM +0200, Kurt Kanzenbach wrote: >> On Mon Aug 19 2024, Maciej Fijalkowski wrote: >> > On Fri, Aug 16, 2024 at 11:24:03AM +0200, Kurt Kanzenbach wrote: >> >> From: Sriram Yagnaraman <sriram.yagnaraman@est.tech> >> >> >> >> Add the following ring flag: >> >> - IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup) >> >> >> >> Add a xdp_buff array for use with XSK receive batch API, and a pointer >> >> to xsk_pool in igb_adapter. >> >> >> >> Add enable/disable functions for TX and RX rings. >> >> Add enable/disable functions for XSK pool. >> >> Add xsk wakeup function. >> >> >> >> None of the above functionality will be active until >> >> NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev->xdp_features. >> >> >> >> Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> >> > >> > Sriram's mail bounces unfortunately, is it possible to grab his current >> > address? >> >> His current email address is in the Cc list. However, i wasn't sure if >> it's okay to update the From and SoB of these patches? > > Okay. Then I believe Sriram should provide a mailmap entry to map his old > mail to a new one. > >> >> > >> > You could also update the copyright date in igb_xsk.c. >> >> Ditto for the copyright. It probably has to be something like >> Copyright(c) 2023 Ericsson? > > It says 2018 Intel. I don't think Sriram was working under E/// employment > as he said he was forbidden to work on this further and that's why you > picked it up, right? > > My intent was not stir up the copyright pot, though. It can be left as-is > or have something of a Linutronix/Sriram Yagnamaran mix :P Let's see if Sriram has something to say about the copyright. If not i'll just leave this as-is. Thanks, Kurt
> -----Original Message----- > From: Maciej Fijalkowski <maciej.fijalkowski@intel.com> > Sent: Monday, 19 August 2024 16:11 > To: Kurt Kanzenbach <kurt@linutronix.de> > Cc: Tony Nguyen <anthony.l.nguyen@intel.com>; Przemek Kitszel > <przemyslaw.kitszel@intel.com>; David S. Miller <davem@davemloft.net>; > Eric Dumazet <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; > Paolo Abeni <pabeni@redhat.com>; Alexei Starovoitov <ast@kernel.org>; > Daniel Borkmann <daniel@iogearbox.net>; Jesper Dangaard Brouer > <hawk@kernel.org>; John Fastabend <john.fastabend@gmail.com>; Richard > Cochran <richardcochran@gmail.com>; Sriram Yagnaraman > <sriram.yagnaraman@ericsson.com>; Benjamin Steinke > <benjamin.steinke@woks-audio.com>; Sebastian Andrzej Siewior > <bigeasy@linutronix.de>; intel-wired-lan@lists.osuosl.org; > netdev@vger.kernel.org; bpf@vger.kernel.org; Sriram Yagnaraman > <sriram.yagnaraman@est.tech> > Subject: Re: [PATCH iwl-next v6 4/6] igb: Introduce XSK data structures and > helpers > > On Mon, Aug 19, 2024 at 03:41:20PM +0200, Kurt Kanzenbach wrote: > > On Mon Aug 19 2024, Maciej Fijalkowski wrote: > > > On Fri, Aug 16, 2024 at 11:24:03AM +0200, Kurt Kanzenbach wrote: > > >> From: Sriram Yagnaraman <sriram.yagnaraman@est.tech> > > >> > > >> Add the following ring flag: > > >> - IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup) > > >> > > >> Add a xdp_buff array for use with XSK receive batch API, and a > > >> pointer to xsk_pool in igb_adapter. > > >> > > >> Add enable/disable functions for TX and RX rings. > > >> Add enable/disable functions for XSK pool. > > >> Add xsk wakeup function. > > >> > > >> None of the above functionality will be active until > > >> NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev- > >xdp_features. > > >> > > >> Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> > > > > > > Sriram's mail bounces unfortunately, is it possible to grab his > > > current address? > > > > His current email address is in the Cc list. However, i wasn't sure if > > it's okay to update the From and SoB of these patches? > > Okay. Then I believe Sriram should provide a mailmap entry to map his old > mail to a new one. Please feel free to remove my "est.tech" address from From: and Signed-of-By: I am just glad that my work has not gone to waste. Thank you for that. I will check with my company's *lawyers* to see if I can provide a mailmap to my current address :( > > > > > > > > > You could also update the copyright date in igb_xsk.c. > > > > Ditto for the copyright. It probably has to be something like > > Copyright(c) 2023 Ericsson? > > It says 2018 Intel. I don't think Sriram was working under E/// employment as > he said he was forbidden to work on this further and that's why you picked it > up, right? > > My intent was not stir up the copyright pot, though. It can be left as-is or have > something of a Linutronix/Sriram Yagnamaran mix :P > > > > > Thanks, > > Kurt >
Hi Sriram, On Mon Aug 19 2024, Sriram Yagnaraman wrote: >> -----Original Message----- >> From: Maciej Fijalkowski <maciej.fijalkowski@intel.com> >> Sent: Monday, 19 August 2024 16:11 >> To: Kurt Kanzenbach <kurt@linutronix.de> >> Cc: Tony Nguyen <anthony.l.nguyen@intel.com>; Przemek Kitszel >> <przemyslaw.kitszel@intel.com>; David S. Miller <davem@davemloft.net>; >> Eric Dumazet <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; >> Paolo Abeni <pabeni@redhat.com>; Alexei Starovoitov <ast@kernel.org>; >> Daniel Borkmann <daniel@iogearbox.net>; Jesper Dangaard Brouer >> <hawk@kernel.org>; John Fastabend <john.fastabend@gmail.com>; Richard >> Cochran <richardcochran@gmail.com>; Sriram Yagnaraman >> <sriram.yagnaraman@ericsson.com>; Benjamin Steinke >> <benjamin.steinke@woks-audio.com>; Sebastian Andrzej Siewior >> <bigeasy@linutronix.de>; intel-wired-lan@lists.osuosl.org; >> netdev@vger.kernel.org; bpf@vger.kernel.org; Sriram Yagnaraman >> <sriram.yagnaraman@est.tech> >> Subject: Re: [PATCH iwl-next v6 4/6] igb: Introduce XSK data structures and >> helpers >> >> On Mon, Aug 19, 2024 at 03:41:20PM +0200, Kurt Kanzenbach wrote: >> > On Mon Aug 19 2024, Maciej Fijalkowski wrote: >> > > On Fri, Aug 16, 2024 at 11:24:03AM +0200, Kurt Kanzenbach wrote: >> > >> From: Sriram Yagnaraman <sriram.yagnaraman@est.tech> >> > >> >> > >> Add the following ring flag: >> > >> - IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup) >> > >> >> > >> Add a xdp_buff array for use with XSK receive batch API, and a >> > >> pointer to xsk_pool in igb_adapter. >> > >> >> > >> Add enable/disable functions for TX and RX rings. >> > >> Add enable/disable functions for XSK pool. >> > >> Add xsk wakeup function. >> > >> >> > >> None of the above functionality will be active until >> > >> NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev- >> >xdp_features. >> > >> >> > >> Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> >> > > >> > > Sriram's mail bounces unfortunately, is it possible to grab his >> > > current address? >> > >> > His current email address is in the Cc list. However, i wasn't sure if >> > it's okay to update the From and SoB of these patches? >> >> Okay. Then I believe Sriram should provide a mailmap entry to map his old >> mail to a new one. > > Please feel free to remove my "est.tech" address from From: and > Signed-of-By: Ok, I'll replace your est.tech email address with your ericsson one in all patches. Or do you have a personal address (like gmail), which you prefer? What about the copyright in igb_xsk.c? Does it belong to you, or Intel or to your previous employer? > I am just glad that my work has not gone to waste. Thank you for that. You're welcome. > I will check with my company's *lawyers* to see if I can provide a > mailmap to my current address :( Good luck with that :-). Thanks, Kurt
> Hi Sriram, > > On Mon Aug 19 2024, Sriram Yagnaraman wrote: > >> -----Original Message----- > >> From: Maciej Fijalkowski <maciej.fijalkowski@intel.com> > >> Sent: Monday, 19 August 2024 16:11 > >> To: Kurt Kanzenbach <kurt@linutronix.de> > >> Cc: Tony Nguyen <anthony.l.nguyen@intel.com>; Przemek Kitszel > >> <przemyslaw.kitszel@intel.com>; David S. Miller <davem@davemloft.net>; > >> Eric Dumazet <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>; > >> Paolo Abeni <pabeni@redhat.com>; Alexei Starovoitov <ast@kernel.org>; > >> Daniel Borkmann <daniel@iogearbox.net>; Jesper Dangaard Brouer > >> <hawk@kernel.org>; John Fastabend <john.fastabend@gmail.com>; Richard > >> Cochran <richardcochran@gmail.com>; Sriram Yagnaraman > >> <sriram.yagnaraman@ericsson.com>; Benjamin Steinke > >> <benjamin.steinke@woks-audio.com>; Sebastian Andrzej Siewior > >> <bigeasy@linutronix.de>; intel-wired-lan@lists.osuosl.org; > >> netdev@vger.kernel.org; bpf@vger.kernel.org; Sriram Yagnaraman > >> <sriram.yagnaraman@est.tech> > >> Subject: Re: [PATCH iwl-next v6 4/6] igb: Introduce XSK data structures and > >> helpers > >> > >> On Mon, Aug 19, 2024 at 03:41:20PM +0200, Kurt Kanzenbach wrote: > >> > On Mon Aug 19 2024, Maciej Fijalkowski wrote: > >> > > On Fri, Aug 16, 2024 at 11:24:03AM +0200, Kurt Kanzenbach wrote: > >> > >> From: Sriram Yagnaraman <sriram.yagnaraman@est.tech> > >> > >> > >> > >> Add the following ring flag: > >> > >> - IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup) > >> > >> > >> > >> Add a xdp_buff array for use with XSK receive batch API, and a > >> > >> pointer to xsk_pool in igb_adapter. > >> > >> > >> > >> Add enable/disable functions for TX and RX rings. > >> > >> Add enable/disable functions for XSK pool. > >> > >> Add xsk wakeup function. > >> > >> > >> > >> None of the above functionality will be active until > >> > >> NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev- > >> >xdp_features. > >> > >> > >> > >> Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> > >> > > > >> > > Sriram's mail bounces unfortunately, is it possible to grab his > >> > > current address? > >> > > >> > His current email address is in the Cc list. However, i wasn't sure if > >> > it's okay to update the From and SoB of these patches? > >> > >> Okay. Then I believe Sriram should provide a mailmap entry to map his old > >> mail to a new one. > > > > Please feel free to remove my "est.tech" address from From: and > > Signed-of-By: > > Ok, I'll replace your est.tech email address with your ericsson one in > all patches. Or do you have a personal address (like gmail), which you > prefer? Personal mail is probably best choice. > > What about the copyright in igb_xsk.c? Does it belong to you, or Intel > or to your previous employer? > > > I am just glad that my work has not gone to waste. Thank you for that. > > You're welcome. > > > I will check with my company's *lawyers* to see if I can provide a > > mailmap to my current address :( > > Good luck with that :-). > > Thanks, > Kurt
diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile index 463c0d26b9d4..6c1b702fd992 100644 --- a/drivers/net/ethernet/intel/igb/Makefile +++ b/drivers/net/ethernet/intel/igb/Makefile @@ -8,4 +8,4 @@ obj-$(CONFIG_IGB) += igb.o igb-y := igb_main.o igb_ethtool.o e1000_82575.o \ e1000_mac.o e1000_nvm.o e1000_phy.o e1000_mbx.o \ - e1000_i210.o igb_ptp.o igb_hwmon.o + e1000_i210.o igb_ptp.o igb_hwmon.o igb_xsk.o diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index dbba193241b9..8db5b44b4576 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -20,6 +20,7 @@ #include <linux/mdio.h> #include <net/xdp.h> +#include <net/xdp_sock_drv.h> struct igb_adapter; @@ -320,6 +321,7 @@ struct igb_ring { union { /* array of buffer info structs */ struct igb_tx_buffer *tx_buffer_info; struct igb_rx_buffer *rx_buffer_info; + struct xdp_buff **rx_buffer_info_zc; }; void *desc; /* descriptor ring memory */ unsigned long flags; /* ring specific flags */ @@ -357,6 +359,7 @@ struct igb_ring { }; }; struct xdp_rxq_info xdp_rxq; + struct xsk_buff_pool *xsk_pool; } ____cacheline_internodealigned_in_smp; struct igb_q_vector { @@ -384,7 +387,8 @@ enum e1000_ring_flags_t { IGB_RING_FLAG_RX_SCTP_CSUM, IGB_RING_FLAG_RX_LB_VLAN_BSWAP, IGB_RING_FLAG_TX_CTX_IDX, - IGB_RING_FLAG_TX_DETECT_HANG + IGB_RING_FLAG_TX_DETECT_HANG, + IGB_RING_FLAG_TX_DISABLED }; #define ring_uses_large_buffer(ring) \ @@ -820,4 +824,11 @@ int igb_add_mac_steering_filter(struct igb_adapter *adapter, int igb_del_mac_steering_filter(struct igb_adapter *adapter, const u8 *addr, u8 queue, u8 flags); +struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter, + struct igb_ring *ring); +int igb_xsk_pool_setup(struct igb_adapter *adapter, + struct xsk_buff_pool *pool, + u16 qid); +int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags); + #endif /* _IGB_H_ */ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index db1598876424..9234c768a600 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2904,9 +2904,14 @@ static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf) static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp) { + struct igb_adapter *adapter = netdev_priv(dev); + switch (xdp->command) { case XDP_SETUP_PROG: return igb_xdp_setup(dev, xdp); + case XDP_SETUP_XSK_POOL: + return igb_xsk_pool_setup(adapter, xdp->xsk.pool, + xdp->xsk.queue_id); default: return -EINVAL; } @@ -3035,6 +3040,7 @@ static const struct net_device_ops igb_netdev_ops = { .ndo_setup_tc = igb_setup_tc, .ndo_bpf = igb_xdp, .ndo_xdp_xmit = igb_xdp_xmit, + .ndo_xsk_wakeup = igb_xsk_wakeup, }; /** @@ -4357,6 +4363,8 @@ void igb_configure_tx_ring(struct igb_adapter *adapter, u64 tdba = ring->dma; int reg_idx = ring->reg_idx; + WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring)); + wr32(E1000_TDLEN(reg_idx), ring->count * sizeof(union e1000_adv_tx_desc)); wr32(E1000_TDBAL(reg_idx), @@ -4752,6 +4760,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter, xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq); WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL)); + WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring)); /* disable the queue */ wr32(E1000_RXDCTL(reg_idx), 0); diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c new file mode 100644 index 000000000000..7b632be3e7e3 --- /dev/null +++ b/drivers/net/ethernet/intel/igb/igb_xsk.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2018 Intel Corporation. */ + +#include <linux/bpf_trace.h> +#include <net/xdp_sock_drv.h> +#include <net/xdp.h> + +#include "e1000_hw.h" +#include "igb.h" + +static int igb_realloc_rx_buffer_info(struct igb_ring *ring, bool pool_present) +{ + int size = pool_present ? + sizeof(*ring->rx_buffer_info_zc) * ring->count : + sizeof(*ring->rx_buffer_info) * ring->count; + void *buff_info = vmalloc(size); + + if (!buff_info) + return -ENOMEM; + + if (pool_present) { + vfree(ring->rx_buffer_info); + ring->rx_buffer_info = NULL; + ring->rx_buffer_info_zc = buff_info; + } else { + vfree(ring->rx_buffer_info_zc); + ring->rx_buffer_info_zc = NULL; + ring->rx_buffer_info = buff_info; + } + + return 0; +} + +static void igb_txrx_ring_disable(struct igb_adapter *adapter, u16 qid) +{ + struct igb_ring *tx_ring = adapter->tx_ring[qid]; + struct igb_ring *rx_ring = adapter->rx_ring[qid]; + struct e1000_hw *hw = &adapter->hw; + + set_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags); + + wr32(E1000_TXDCTL(tx_ring->reg_idx), 0); + wr32(E1000_RXDCTL(rx_ring->reg_idx), 0); + + synchronize_net(); + + /* Rx/Tx share the same napi context. */ + napi_disable(&rx_ring->q_vector->napi); + + igb_clean_tx_ring(tx_ring); + igb_clean_rx_ring(rx_ring); + + memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats)); + memset(&tx_ring->tx_stats, 0, sizeof(tx_ring->tx_stats)); +} + +static void igb_txrx_ring_enable(struct igb_adapter *adapter, u16 qid) +{ + struct igb_ring *tx_ring = adapter->tx_ring[qid]; + struct igb_ring *rx_ring = adapter->rx_ring[qid]; + + igb_configure_tx_ring(adapter, tx_ring); + igb_configure_rx_ring(adapter, rx_ring); + + synchronize_net(); + + clear_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags); + + /* call igb_desc_unused which always leaves + * at least 1 descriptor unused to make sure + * next_to_use != next_to_clean + */ + igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring)); + + /* Rx/Tx share the same napi context. */ + napi_enable(&rx_ring->q_vector->napi); +} + +struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter, + struct igb_ring *ring) +{ + int qid = ring->queue_index; + struct xsk_buff_pool *pool; + + pool = xsk_get_pool_from_qid(adapter->netdev, qid); + + if (!igb_xdp_is_enabled(adapter)) + return NULL; + + return (pool && pool->dev) ? pool : NULL; +} + +static int igb_xsk_pool_enable(struct igb_adapter *adapter, + struct xsk_buff_pool *pool, + u16 qid) +{ + struct net_device *netdev = adapter->netdev; + struct igb_ring *rx_ring; + bool if_running; + int err; + + if (qid >= adapter->num_rx_queues) + return -EINVAL; + + if (qid >= netdev->real_num_rx_queues || + qid >= netdev->real_num_tx_queues) + return -EINVAL; + + err = xsk_pool_dma_map(pool, &adapter->pdev->dev, IGB_RX_DMA_ATTR); + if (err) + return err; + + rx_ring = adapter->rx_ring[qid]; + if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter); + if (if_running) + igb_txrx_ring_disable(adapter, qid); + + if (if_running) { + err = igb_realloc_rx_buffer_info(rx_ring, true); + if (!err) { + igb_txrx_ring_enable(adapter, qid); + /* Kick start the NAPI context so that receiving will start */ + err = igb_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX); + } + + if (err) { + xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR); + return err; + } + } + + return 0; +} + +static int igb_xsk_pool_disable(struct igb_adapter *adapter, u16 qid) +{ + struct xsk_buff_pool *pool; + struct igb_ring *rx_ring; + bool if_running; + int err; + + pool = xsk_get_pool_from_qid(adapter->netdev, qid); + if (!pool) + return -EINVAL; + + rx_ring = adapter->rx_ring[qid]; + if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter); + if (if_running) + igb_txrx_ring_disable(adapter, qid); + + xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR); + + if (if_running) { + err = igb_realloc_rx_buffer_info(rx_ring, false); + if (err) + return err; + + igb_txrx_ring_enable(adapter, qid); + } + + return 0; +} + +int igb_xsk_pool_setup(struct igb_adapter *adapter, + struct xsk_buff_pool *pool, + u16 qid) +{ + return pool ? igb_xsk_pool_enable(adapter, pool, qid) : + igb_xsk_pool_disable(adapter, qid); +} + +int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) +{ + struct igb_adapter *adapter = netdev_priv(dev); + struct e1000_hw *hw = &adapter->hw; + struct igb_ring *ring; + u32 eics = 0; + + if (test_bit(__IGB_DOWN, &adapter->state)) + return -ENETDOWN; + + if (!igb_xdp_is_enabled(adapter)) + return -EINVAL; + + if (qid >= adapter->num_tx_queues) + return -EINVAL; + + ring = adapter->tx_ring[qid]; + + if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags)) + return -ENETDOWN; + + if (!READ_ONCE(ring->xsk_pool)) + return -EINVAL; + + if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) { + /* Cause software interrupt */ + if (adapter->flags & IGB_FLAG_HAS_MSIX) { + eics |= ring->q_vector->eims_value; + wr32(E1000_EICS, eics); + } else { + wr32(E1000_ICS, E1000_ICS_RXDMT0); + } + } + + return 0; +}