diff mbox series

[iwl-next,v6,4/6] igb: Introduce XSK data structures and helpers

Message ID 20240711-b4-igb_zero_copy-v6-4-4bfb68773b18@linutronix.de (mailing list archive)
State Awaiting Upstream
Delegated to: Netdev Maintainers
Headers show
Series igb: Add support for AF_XDP zero-copy | expand

Checks

Context Check Description
netdev/series_format warning Target tree name not specified in the subject
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 16 this patch: 16
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 13 of 13 maintainers
netdev/build_clang success Errors and warnings before: 17 this patch: 17
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 22 this patch: 22
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 38 this patch: 38
netdev/source_inline success Was 0 now: 0

Commit Message

Kurt Kanzenbach Aug. 16, 2024, 9:24 a.m. UTC
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>

Add the following ring flag:
- IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup)

Add a xdp_buff array for use with XSK receive batch API, and a pointer
to xsk_pool in igb_adapter.

Add enable/disable functions for TX and RX rings.
Add enable/disable functions for XSK pool.
Add xsk wakeup function.

None of the above functionality will be active until
NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev->xdp_features.

Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
[Kurt: Add READ/WRITE_ONCE(), synchronize_net(),
       remove IGB_RING_FLAG_AF_XDP_ZC]
Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
---
 drivers/net/ethernet/intel/igb/Makefile   |   2 +-
 drivers/net/ethernet/intel/igb/igb.h      |  13 +-
 drivers/net/ethernet/intel/igb/igb_main.c |   9 ++
 drivers/net/ethernet/intel/igb/igb_xsk.c  | 207 ++++++++++++++++++++++++++++++
 4 files changed, 229 insertions(+), 2 deletions(-)

Comments

Fijalkowski, Maciej Aug. 19, 2024, 1:19 p.m. UTC | #1
On Fri, Aug 16, 2024 at 11:24:03AM +0200, Kurt Kanzenbach wrote:
> From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
> 
> Add the following ring flag:
> - IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup)
> 
> Add a xdp_buff array for use with XSK receive batch API, and a pointer
> to xsk_pool in igb_adapter.
> 
> Add enable/disable functions for TX and RX rings.
> Add enable/disable functions for XSK pool.
> Add xsk wakeup function.
> 
> None of the above functionality will be active until
> NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev->xdp_features.
> 
> Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>

Sriram's mail bounces unfortunately, is it possible to grab his current
address?

You could also update the copyright date in igb_xsk.c. Besides:

Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>

> [Kurt: Add READ/WRITE_ONCE(), synchronize_net(),
>        remove IGB_RING_FLAG_AF_XDP_ZC]
> Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
> ---
>  drivers/net/ethernet/intel/igb/Makefile   |   2 +-
>  drivers/net/ethernet/intel/igb/igb.h      |  13 +-
>  drivers/net/ethernet/intel/igb/igb_main.c |   9 ++
>  drivers/net/ethernet/intel/igb/igb_xsk.c  | 207 ++++++++++++++++++++++++++++++
>  4 files changed, 229 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile
> index 463c0d26b9d4..6c1b702fd992 100644
> --- a/drivers/net/ethernet/intel/igb/Makefile
> +++ b/drivers/net/ethernet/intel/igb/Makefile
> @@ -8,4 +8,4 @@ obj-$(CONFIG_IGB) += igb.o
>  
>  igb-y := igb_main.o igb_ethtool.o e1000_82575.o \
>  	 e1000_mac.o e1000_nvm.o e1000_phy.o e1000_mbx.o \
> -	 e1000_i210.o igb_ptp.o igb_hwmon.o
> +	 e1000_i210.o igb_ptp.o igb_hwmon.o igb_xsk.o
> diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
> index dbba193241b9..8db5b44b4576 100644
> --- a/drivers/net/ethernet/intel/igb/igb.h
> +++ b/drivers/net/ethernet/intel/igb/igb.h
> @@ -20,6 +20,7 @@
>  #include <linux/mdio.h>
>  
>  #include <net/xdp.h>
> +#include <net/xdp_sock_drv.h>
>  
>  struct igb_adapter;
>  
> @@ -320,6 +321,7 @@ struct igb_ring {
>  	union {				/* array of buffer info structs */
>  		struct igb_tx_buffer *tx_buffer_info;
>  		struct igb_rx_buffer *rx_buffer_info;
> +		struct xdp_buff **rx_buffer_info_zc;
>  	};
>  	void *desc;			/* descriptor ring memory */
>  	unsigned long flags;		/* ring specific flags */
> @@ -357,6 +359,7 @@ struct igb_ring {
>  		};
>  	};
>  	struct xdp_rxq_info xdp_rxq;
> +	struct xsk_buff_pool *xsk_pool;
>  } ____cacheline_internodealigned_in_smp;
>  
>  struct igb_q_vector {
> @@ -384,7 +387,8 @@ enum e1000_ring_flags_t {
>  	IGB_RING_FLAG_RX_SCTP_CSUM,
>  	IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
>  	IGB_RING_FLAG_TX_CTX_IDX,
> -	IGB_RING_FLAG_TX_DETECT_HANG
> +	IGB_RING_FLAG_TX_DETECT_HANG,
> +	IGB_RING_FLAG_TX_DISABLED
>  };
>  
>  #define ring_uses_large_buffer(ring) \
> @@ -820,4 +824,11 @@ int igb_add_mac_steering_filter(struct igb_adapter *adapter,
>  int igb_del_mac_steering_filter(struct igb_adapter *adapter,
>  				const u8 *addr, u8 queue, u8 flags);
>  
> +struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter,
> +				   struct igb_ring *ring);
> +int igb_xsk_pool_setup(struct igb_adapter *adapter,
> +		       struct xsk_buff_pool *pool,
> +		       u16 qid);
> +int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
> +
>  #endif /* _IGB_H_ */
> diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
> index db1598876424..9234c768a600 100644
> --- a/drivers/net/ethernet/intel/igb/igb_main.c
> +++ b/drivers/net/ethernet/intel/igb/igb_main.c
> @@ -2904,9 +2904,14 @@ static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf)
>  
>  static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
>  {
> +	struct igb_adapter *adapter = netdev_priv(dev);
> +
>  	switch (xdp->command) {
>  	case XDP_SETUP_PROG:
>  		return igb_xdp_setup(dev, xdp);
> +	case XDP_SETUP_XSK_POOL:
> +		return igb_xsk_pool_setup(adapter, xdp->xsk.pool,
> +					  xdp->xsk.queue_id);
>  	default:
>  		return -EINVAL;
>  	}
> @@ -3035,6 +3040,7 @@ static const struct net_device_ops igb_netdev_ops = {
>  	.ndo_setup_tc		= igb_setup_tc,
>  	.ndo_bpf		= igb_xdp,
>  	.ndo_xdp_xmit		= igb_xdp_xmit,
> +	.ndo_xsk_wakeup         = igb_xsk_wakeup,
>  };
>  
>  /**
> @@ -4357,6 +4363,8 @@ void igb_configure_tx_ring(struct igb_adapter *adapter,
>  	u64 tdba = ring->dma;
>  	int reg_idx = ring->reg_idx;
>  
> +	WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring));
> +
>  	wr32(E1000_TDLEN(reg_idx),
>  	     ring->count * sizeof(union e1000_adv_tx_desc));
>  	wr32(E1000_TDBAL(reg_idx),
> @@ -4752,6 +4760,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
>  	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
>  	WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
>  					   MEM_TYPE_PAGE_SHARED, NULL));
> +	WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring));
>  
>  	/* disable the queue */
>  	wr32(E1000_RXDCTL(reg_idx), 0);
> diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c
> new file mode 100644
> index 000000000000..7b632be3e7e3
> --- /dev/null
> +++ b/drivers/net/ethernet/intel/igb/igb_xsk.c
> @@ -0,0 +1,207 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright(c) 2018 Intel Corporation. */
> +
> +#include <linux/bpf_trace.h>
> +#include <net/xdp_sock_drv.h>
> +#include <net/xdp.h>
> +
> +#include "e1000_hw.h"
> +#include "igb.h"
> +
> +static int igb_realloc_rx_buffer_info(struct igb_ring *ring, bool pool_present)
> +{
> +	int size = pool_present ?
> +		sizeof(*ring->rx_buffer_info_zc) * ring->count :
> +		sizeof(*ring->rx_buffer_info) * ring->count;
> +	void *buff_info = vmalloc(size);
> +
> +	if (!buff_info)
> +		return -ENOMEM;
> +
> +	if (pool_present) {
> +		vfree(ring->rx_buffer_info);
> +		ring->rx_buffer_info = NULL;
> +		ring->rx_buffer_info_zc = buff_info;
> +	} else {
> +		vfree(ring->rx_buffer_info_zc);
> +		ring->rx_buffer_info_zc = NULL;
> +		ring->rx_buffer_info = buff_info;
> +	}
> +
> +	return 0;
> +}
> +
> +static void igb_txrx_ring_disable(struct igb_adapter *adapter, u16 qid)
> +{
> +	struct igb_ring *tx_ring = adapter->tx_ring[qid];
> +	struct igb_ring *rx_ring = adapter->rx_ring[qid];
> +	struct e1000_hw *hw = &adapter->hw;
> +
> +	set_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags);
> +
> +	wr32(E1000_TXDCTL(tx_ring->reg_idx), 0);
> +	wr32(E1000_RXDCTL(rx_ring->reg_idx), 0);
> +
> +	synchronize_net();
> +
> +	/* Rx/Tx share the same napi context. */
> +	napi_disable(&rx_ring->q_vector->napi);
> +
> +	igb_clean_tx_ring(tx_ring);
> +	igb_clean_rx_ring(rx_ring);
> +
> +	memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats));
> +	memset(&tx_ring->tx_stats, 0, sizeof(tx_ring->tx_stats));
> +}
> +
> +static void igb_txrx_ring_enable(struct igb_adapter *adapter, u16 qid)
> +{
> +	struct igb_ring *tx_ring = adapter->tx_ring[qid];
> +	struct igb_ring *rx_ring = adapter->rx_ring[qid];
> +
> +	igb_configure_tx_ring(adapter, tx_ring);
> +	igb_configure_rx_ring(adapter, rx_ring);
> +
> +	synchronize_net();
> +
> +	clear_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags);
> +
> +	/* call igb_desc_unused which always leaves
> +	 * at least 1 descriptor unused to make sure
> +	 * next_to_use != next_to_clean
> +	 */
> +	igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring));
> +
> +	/* Rx/Tx share the same napi context. */
> +	napi_enable(&rx_ring->q_vector->napi);
> +}
> +
> +struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter,
> +				   struct igb_ring *ring)
> +{
> +	int qid = ring->queue_index;
> +	struct xsk_buff_pool *pool;
> +
> +	pool = xsk_get_pool_from_qid(adapter->netdev, qid);
> +
> +	if (!igb_xdp_is_enabled(adapter))
> +		return NULL;
> +
> +	return (pool && pool->dev) ? pool : NULL;
> +}
> +
> +static int igb_xsk_pool_enable(struct igb_adapter *adapter,
> +			       struct xsk_buff_pool *pool,
> +			       u16 qid)
> +{
> +	struct net_device *netdev = adapter->netdev;
> +	struct igb_ring *rx_ring;
> +	bool if_running;
> +	int err;
> +
> +	if (qid >= adapter->num_rx_queues)
> +		return -EINVAL;
> +
> +	if (qid >= netdev->real_num_rx_queues ||
> +	    qid >= netdev->real_num_tx_queues)
> +		return -EINVAL;
> +
> +	err = xsk_pool_dma_map(pool, &adapter->pdev->dev, IGB_RX_DMA_ATTR);
> +	if (err)
> +		return err;
> +
> +	rx_ring = adapter->rx_ring[qid];
> +	if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter);
> +	if (if_running)
> +		igb_txrx_ring_disable(adapter, qid);
> +
> +	if (if_running) {
> +		err = igb_realloc_rx_buffer_info(rx_ring, true);
> +		if (!err) {
> +			igb_txrx_ring_enable(adapter, qid);
> +			/* Kick start the NAPI context so that receiving will start */
> +			err = igb_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX);
> +		}
> +
> +		if (err) {
> +			xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR);
> +			return err;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int igb_xsk_pool_disable(struct igb_adapter *adapter, u16 qid)
> +{
> +	struct xsk_buff_pool *pool;
> +	struct igb_ring *rx_ring;
> +	bool if_running;
> +	int err;
> +
> +	pool = xsk_get_pool_from_qid(adapter->netdev, qid);
> +	if (!pool)
> +		return -EINVAL;
> +
> +	rx_ring = adapter->rx_ring[qid];
> +	if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter);
> +	if (if_running)
> +		igb_txrx_ring_disable(adapter, qid);
> +
> +	xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR);
> +
> +	if (if_running) {
> +		err = igb_realloc_rx_buffer_info(rx_ring, false);
> +		if (err)
> +			return err;
> +
> +		igb_txrx_ring_enable(adapter, qid);
> +	}
> +
> +	return 0;
> +}
> +
> +int igb_xsk_pool_setup(struct igb_adapter *adapter,
> +		       struct xsk_buff_pool *pool,
> +		       u16 qid)
> +{
> +	return pool ? igb_xsk_pool_enable(adapter, pool, qid) :
> +		igb_xsk_pool_disable(adapter, qid);
> +}
> +
> +int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
> +{
> +	struct igb_adapter *adapter = netdev_priv(dev);
> +	struct e1000_hw *hw = &adapter->hw;
> +	struct igb_ring *ring;
> +	u32 eics = 0;
> +
> +	if (test_bit(__IGB_DOWN, &adapter->state))
> +		return -ENETDOWN;
> +
> +	if (!igb_xdp_is_enabled(adapter))
> +		return -EINVAL;
> +
> +	if (qid >= adapter->num_tx_queues)
> +		return -EINVAL;
> +
> +	ring = adapter->tx_ring[qid];
> +
> +	if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags))
> +		return -ENETDOWN;
> +
> +	if (!READ_ONCE(ring->xsk_pool))
> +		return -EINVAL;
> +
> +	if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
> +		/* Cause software interrupt */
> +		if (adapter->flags & IGB_FLAG_HAS_MSIX) {
> +			eics |= ring->q_vector->eims_value;
> +			wr32(E1000_EICS, eics);
> +		} else {
> +			wr32(E1000_ICS, E1000_ICS_RXDMT0);
> +		}
> +	}
> +
> +	return 0;
> +}
> 
> -- 
> 2.39.2
>
Kurt Kanzenbach Aug. 19, 2024, 1:41 p.m. UTC | #2
On Mon Aug 19 2024, Maciej Fijalkowski wrote:
> On Fri, Aug 16, 2024 at 11:24:03AM +0200, Kurt Kanzenbach wrote:
>> From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
>> 
>> Add the following ring flag:
>> - IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup)
>> 
>> Add a xdp_buff array for use with XSK receive batch API, and a pointer
>> to xsk_pool in igb_adapter.
>> 
>> Add enable/disable functions for TX and RX rings.
>> Add enable/disable functions for XSK pool.
>> Add xsk wakeup function.
>> 
>> None of the above functionality will be active until
>> NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev->xdp_features.
>> 
>> Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
>
> Sriram's mail bounces unfortunately, is it possible to grab his current
> address?

His current email address is in the Cc list. However, i wasn't sure if
it's okay to update the From and SoB of these patches?

>
> You could also update the copyright date in igb_xsk.c.

Ditto for the copyright. It probably has to be something like
Copyright(c) 2023 Ericsson?

Thanks,
Kurt
Fijalkowski, Maciej Aug. 19, 2024, 2:10 p.m. UTC | #3
On Mon, Aug 19, 2024 at 03:41:20PM +0200, Kurt Kanzenbach wrote:
> On Mon Aug 19 2024, Maciej Fijalkowski wrote:
> > On Fri, Aug 16, 2024 at 11:24:03AM +0200, Kurt Kanzenbach wrote:
> >> From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
> >> 
> >> Add the following ring flag:
> >> - IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup)
> >> 
> >> Add a xdp_buff array for use with XSK receive batch API, and a pointer
> >> to xsk_pool in igb_adapter.
> >> 
> >> Add enable/disable functions for TX and RX rings.
> >> Add enable/disable functions for XSK pool.
> >> Add xsk wakeup function.
> >> 
> >> None of the above functionality will be active until
> >> NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev->xdp_features.
> >> 
> >> Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
> >
> > Sriram's mail bounces unfortunately, is it possible to grab his current
> > address?
> 
> His current email address is in the Cc list. However, i wasn't sure if
> it's okay to update the From and SoB of these patches?

Okay. Then I believe Sriram should provide a mailmap entry to map his old
mail to a new one.

> 
> >
> > You could also update the copyright date in igb_xsk.c.
> 
> Ditto for the copyright. It probably has to be something like
> Copyright(c) 2023 Ericsson?

It says 2018 Intel. I don't think Sriram was working under E/// employment
as he said he was forbidden to work on this further and that's why you
picked it up, right?

My intent was not stir up the copyright pot, though. It can be left as-is
or have something of a Linutronix/Sriram Yagnamaran mix :P

> 
> Thanks,
> Kurt
Kurt Kanzenbach Aug. 19, 2024, 2:27 p.m. UTC | #4
On Mon Aug 19 2024, Maciej Fijalkowski wrote:
> On Mon, Aug 19, 2024 at 03:41:20PM +0200, Kurt Kanzenbach wrote:
>> On Mon Aug 19 2024, Maciej Fijalkowski wrote:
>> > On Fri, Aug 16, 2024 at 11:24:03AM +0200, Kurt Kanzenbach wrote:
>> >> From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
>> >> 
>> >> Add the following ring flag:
>> >> - IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup)
>> >> 
>> >> Add a xdp_buff array for use with XSK receive batch API, and a pointer
>> >> to xsk_pool in igb_adapter.
>> >> 
>> >> Add enable/disable functions for TX and RX rings.
>> >> Add enable/disable functions for XSK pool.
>> >> Add xsk wakeup function.
>> >> 
>> >> None of the above functionality will be active until
>> >> NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev->xdp_features.
>> >> 
>> >> Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
>> >
>> > Sriram's mail bounces unfortunately, is it possible to grab his current
>> > address?
>> 
>> His current email address is in the Cc list. However, i wasn't sure if
>> it's okay to update the From and SoB of these patches?
>
> Okay. Then I believe Sriram should provide a mailmap entry to map his old
> mail to a new one.
>
>> 
>> >
>> > You could also update the copyright date in igb_xsk.c.
>> 
>> Ditto for the copyright. It probably has to be something like
>> Copyright(c) 2023 Ericsson?
>
> It says 2018 Intel. I don't think Sriram was working under E/// employment
> as he said he was forbidden to work on this further and that's why you
> picked it up, right?
>
> My intent was not stir up the copyright pot, though. It can be left as-is
> or have something of a Linutronix/Sriram Yagnamaran mix :P

Let's see if Sriram has something to say about the copyright. If not
i'll just leave this as-is.

Thanks,
Kurt
Sriram Yagnaraman Aug. 19, 2024, 7:34 p.m. UTC | #5
> -----Original Message-----
> From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> Sent: Monday, 19 August 2024 16:11
> To: Kurt Kanzenbach <kurt@linutronix.de>
> Cc: Tony Nguyen <anthony.l.nguyen@intel.com>; Przemek Kitszel
> <przemyslaw.kitszel@intel.com>; David S. Miller <davem@davemloft.net>;
> Eric Dumazet <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>;
> Paolo Abeni <pabeni@redhat.com>; Alexei Starovoitov <ast@kernel.org>;
> Daniel Borkmann <daniel@iogearbox.net>; Jesper Dangaard Brouer
> <hawk@kernel.org>; John Fastabend <john.fastabend@gmail.com>; Richard
> Cochran <richardcochran@gmail.com>; Sriram Yagnaraman
> <sriram.yagnaraman@ericsson.com>; Benjamin Steinke
> <benjamin.steinke@woks-audio.com>; Sebastian Andrzej Siewior
> <bigeasy@linutronix.de>; intel-wired-lan@lists.osuosl.org;
> netdev@vger.kernel.org; bpf@vger.kernel.org; Sriram Yagnaraman
> <sriram.yagnaraman@est.tech>
> Subject: Re: [PATCH iwl-next v6 4/6] igb: Introduce XSK data structures and
> helpers
>
> On Mon, Aug 19, 2024 at 03:41:20PM +0200, Kurt Kanzenbach wrote:
> > On Mon Aug 19 2024, Maciej Fijalkowski wrote:
> > > On Fri, Aug 16, 2024 at 11:24:03AM +0200, Kurt Kanzenbach wrote:
> > >> From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
> > >>
> > >> Add the following ring flag:
> > >> - IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup)
> > >>
> > >> Add a xdp_buff array for use with XSK receive batch API, and a
> > >> pointer to xsk_pool in igb_adapter.
> > >>
> > >> Add enable/disable functions for TX and RX rings.
> > >> Add enable/disable functions for XSK pool.
> > >> Add xsk wakeup function.
> > >>
> > >> None of the above functionality will be active until
> > >> NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev-
> >xdp_features.
> > >>
> > >> Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
> > >
> > > Sriram's mail bounces unfortunately, is it possible to grab his
> > > current address?
> >
> > His current email address is in the Cc list. However, i wasn't sure if
> > it's okay to update the From and SoB of these patches?
>
> Okay. Then I believe Sriram should provide a mailmap entry to map his old
> mail to a new one.

Please feel free to remove my "est.tech" address from From: and Signed-of-By:
I am just glad that my work has not gone to waste. Thank you for that.
I will check with my company's *lawyers* to see if I can provide a mailmap to my current address :(

>
> >
> > >
> > > You could also update the copyright date in igb_xsk.c.
> >
> > Ditto for the copyright. It probably has to be something like
> > Copyright(c) 2023 Ericsson?
>
> It says 2018 Intel. I don't think Sriram was working under E/// employment as
> he said he was forbidden to work on this further and that's why you picked it
> up, right?
>
> My intent was not stir up the copyright pot, though. It can be left as-is or have
> something of a Linutronix/Sriram Yagnamaran mix :P
>
> >
> > Thanks,
> > Kurt
>
Kurt Kanzenbach Aug. 20, 2024, 12:24 p.m. UTC | #6
Hi Sriram,

On Mon Aug 19 2024, Sriram Yagnaraman wrote:
>> -----Original Message-----
>> From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
>> Sent: Monday, 19 August 2024 16:11
>> To: Kurt Kanzenbach <kurt@linutronix.de>
>> Cc: Tony Nguyen <anthony.l.nguyen@intel.com>; Przemek Kitszel
>> <przemyslaw.kitszel@intel.com>; David S. Miller <davem@davemloft.net>;
>> Eric Dumazet <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>;
>> Paolo Abeni <pabeni@redhat.com>; Alexei Starovoitov <ast@kernel.org>;
>> Daniel Borkmann <daniel@iogearbox.net>; Jesper Dangaard Brouer
>> <hawk@kernel.org>; John Fastabend <john.fastabend@gmail.com>; Richard
>> Cochran <richardcochran@gmail.com>; Sriram Yagnaraman
>> <sriram.yagnaraman@ericsson.com>; Benjamin Steinke
>> <benjamin.steinke@woks-audio.com>; Sebastian Andrzej Siewior
>> <bigeasy@linutronix.de>; intel-wired-lan@lists.osuosl.org;
>> netdev@vger.kernel.org; bpf@vger.kernel.org; Sriram Yagnaraman
>> <sriram.yagnaraman@est.tech>
>> Subject: Re: [PATCH iwl-next v6 4/6] igb: Introduce XSK data structures and
>> helpers
>>
>> On Mon, Aug 19, 2024 at 03:41:20PM +0200, Kurt Kanzenbach wrote:
>> > On Mon Aug 19 2024, Maciej Fijalkowski wrote:
>> > > On Fri, Aug 16, 2024 at 11:24:03AM +0200, Kurt Kanzenbach wrote:
>> > >> From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
>> > >>
>> > >> Add the following ring flag:
>> > >> - IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup)
>> > >>
>> > >> Add a xdp_buff array for use with XSK receive batch API, and a
>> > >> pointer to xsk_pool in igb_adapter.
>> > >>
>> > >> Add enable/disable functions for TX and RX rings.
>> > >> Add enable/disable functions for XSK pool.
>> > >> Add xsk wakeup function.
>> > >>
>> > >> None of the above functionality will be active until
>> > >> NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev-
>> >xdp_features.
>> > >>
>> > >> Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
>> > >
>> > > Sriram's mail bounces unfortunately, is it possible to grab his
>> > > current address?
>> >
>> > His current email address is in the Cc list. However, i wasn't sure if
>> > it's okay to update the From and SoB of these patches?
>>
>> Okay. Then I believe Sriram should provide a mailmap entry to map his old
>> mail to a new one.
>
> Please feel free to remove my "est.tech" address from From: and
> Signed-of-By:

Ok, I'll replace your est.tech email address with your ericsson one in
all patches. Or do you have a personal address (like gmail), which you
prefer?

What about the copyright in igb_xsk.c? Does it belong to you, or Intel
or to your previous employer?

> I am just glad that my work has not gone to waste. Thank you for that.

You're welcome.

> I will check with my company's *lawyers* to see if I can provide a
> mailmap to my current address :(

Good luck with that :-).

Thanks,
Kurt
Fijalkowski, Maciej Aug. 20, 2024, 12:33 p.m. UTC | #7
> Hi Sriram,
> 
> On Mon Aug 19 2024, Sriram Yagnaraman wrote:
> >> -----Original Message-----
> >> From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
> >> Sent: Monday, 19 August 2024 16:11
> >> To: Kurt Kanzenbach <kurt@linutronix.de>
> >> Cc: Tony Nguyen <anthony.l.nguyen@intel.com>; Przemek Kitszel
> >> <przemyslaw.kitszel@intel.com>; David S. Miller <davem@davemloft.net>;
> >> Eric Dumazet <edumazet@google.com>; Jakub Kicinski <kuba@kernel.org>;
> >> Paolo Abeni <pabeni@redhat.com>; Alexei Starovoitov <ast@kernel.org>;
> >> Daniel Borkmann <daniel@iogearbox.net>; Jesper Dangaard Brouer
> >> <hawk@kernel.org>; John Fastabend <john.fastabend@gmail.com>; Richard
> >> Cochran <richardcochran@gmail.com>; Sriram Yagnaraman
> >> <sriram.yagnaraman@ericsson.com>; Benjamin Steinke
> >> <benjamin.steinke@woks-audio.com>; Sebastian Andrzej Siewior
> >> <bigeasy@linutronix.de>; intel-wired-lan@lists.osuosl.org;
> >> netdev@vger.kernel.org; bpf@vger.kernel.org; Sriram Yagnaraman
> >> <sriram.yagnaraman@est.tech>
> >> Subject: Re: [PATCH iwl-next v6 4/6] igb: Introduce XSK data structures and
> >> helpers
> >>
> >> On Mon, Aug 19, 2024 at 03:41:20PM +0200, Kurt Kanzenbach wrote:
> >> > On Mon Aug 19 2024, Maciej Fijalkowski wrote:
> >> > > On Fri, Aug 16, 2024 at 11:24:03AM +0200, Kurt Kanzenbach wrote:
> >> > >> From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
> >> > >>
> >> > >> Add the following ring flag:
> >> > >> - IGB_RING_FLAG_TX_DISABLED (when xsk pool is being setup)
> >> > >>
> >> > >> Add a xdp_buff array for use with XSK receive batch API, and a
> >> > >> pointer to xsk_pool in igb_adapter.
> >> > >>
> >> > >> Add enable/disable functions for TX and RX rings.
> >> > >> Add enable/disable functions for XSK pool.
> >> > >> Add xsk wakeup function.
> >> > >>
> >> > >> None of the above functionality will be active until
> >> > >> NETDEV_XDP_ACT_XSK_ZEROCOPY is advertised in netdev-
> >> >xdp_features.
> >> > >>
> >> > >> Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
> >> > >
> >> > > Sriram's mail bounces unfortunately, is it possible to grab his
> >> > > current address?
> >> >
> >> > His current email address is in the Cc list. However, i wasn't sure if
> >> > it's okay to update the From and SoB of these patches?
> >>
> >> Okay. Then I believe Sriram should provide a mailmap entry to map his old
> >> mail to a new one.
> >
> > Please feel free to remove my "est.tech" address from From: and
> > Signed-of-By:
> 
> Ok, I'll replace your est.tech email address with your ericsson one in
> all patches. Or do you have a personal address (like gmail), which you
> prefer?

Personal mail is probably best choice.

> 
> What about the copyright in igb_xsk.c? Does it belong to you, or Intel
> or to your previous employer?
> 
> > I am just glad that my work has not gone to waste. Thank you for that.
> 
> You're welcome.
> 
> > I will check with my company's *lawyers* to see if I can provide a
> > mailmap to my current address :(
> 
> Good luck with that :-).
> 
> Thanks,
> Kurt
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/igb/Makefile b/drivers/net/ethernet/intel/igb/Makefile
index 463c0d26b9d4..6c1b702fd992 100644
--- a/drivers/net/ethernet/intel/igb/Makefile
+++ b/drivers/net/ethernet/intel/igb/Makefile
@@ -8,4 +8,4 @@  obj-$(CONFIG_IGB) += igb.o
 
 igb-y := igb_main.o igb_ethtool.o e1000_82575.o \
 	 e1000_mac.o e1000_nvm.o e1000_phy.o e1000_mbx.o \
-	 e1000_i210.o igb_ptp.o igb_hwmon.o
+	 e1000_i210.o igb_ptp.o igb_hwmon.o igb_xsk.o
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index dbba193241b9..8db5b44b4576 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -20,6 +20,7 @@ 
 #include <linux/mdio.h>
 
 #include <net/xdp.h>
+#include <net/xdp_sock_drv.h>
 
 struct igb_adapter;
 
@@ -320,6 +321,7 @@  struct igb_ring {
 	union {				/* array of buffer info structs */
 		struct igb_tx_buffer *tx_buffer_info;
 		struct igb_rx_buffer *rx_buffer_info;
+		struct xdp_buff **rx_buffer_info_zc;
 	};
 	void *desc;			/* descriptor ring memory */
 	unsigned long flags;		/* ring specific flags */
@@ -357,6 +359,7 @@  struct igb_ring {
 		};
 	};
 	struct xdp_rxq_info xdp_rxq;
+	struct xsk_buff_pool *xsk_pool;
 } ____cacheline_internodealigned_in_smp;
 
 struct igb_q_vector {
@@ -384,7 +387,8 @@  enum e1000_ring_flags_t {
 	IGB_RING_FLAG_RX_SCTP_CSUM,
 	IGB_RING_FLAG_RX_LB_VLAN_BSWAP,
 	IGB_RING_FLAG_TX_CTX_IDX,
-	IGB_RING_FLAG_TX_DETECT_HANG
+	IGB_RING_FLAG_TX_DETECT_HANG,
+	IGB_RING_FLAG_TX_DISABLED
 };
 
 #define ring_uses_large_buffer(ring) \
@@ -820,4 +824,11 @@  int igb_add_mac_steering_filter(struct igb_adapter *adapter,
 int igb_del_mac_steering_filter(struct igb_adapter *adapter,
 				const u8 *addr, u8 queue, u8 flags);
 
+struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter,
+				   struct igb_ring *ring);
+int igb_xsk_pool_setup(struct igb_adapter *adapter,
+		       struct xsk_buff_pool *pool,
+		       u16 qid);
+int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags);
+
 #endif /* _IGB_H_ */
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index db1598876424..9234c768a600 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2904,9 +2904,14 @@  static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf)
 
 static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 {
+	struct igb_adapter *adapter = netdev_priv(dev);
+
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
 		return igb_xdp_setup(dev, xdp);
+	case XDP_SETUP_XSK_POOL:
+		return igb_xsk_pool_setup(adapter, xdp->xsk.pool,
+					  xdp->xsk.queue_id);
 	default:
 		return -EINVAL;
 	}
@@ -3035,6 +3040,7 @@  static const struct net_device_ops igb_netdev_ops = {
 	.ndo_setup_tc		= igb_setup_tc,
 	.ndo_bpf		= igb_xdp,
 	.ndo_xdp_xmit		= igb_xdp_xmit,
+	.ndo_xsk_wakeup         = igb_xsk_wakeup,
 };
 
 /**
@@ -4357,6 +4363,8 @@  void igb_configure_tx_ring(struct igb_adapter *adapter,
 	u64 tdba = ring->dma;
 	int reg_idx = ring->reg_idx;
 
+	WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring));
+
 	wr32(E1000_TDLEN(reg_idx),
 	     ring->count * sizeof(union e1000_adv_tx_desc));
 	wr32(E1000_TDBAL(reg_idx),
@@ -4752,6 +4760,7 @@  void igb_configure_rx_ring(struct igb_adapter *adapter,
 	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
 	WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
 					   MEM_TYPE_PAGE_SHARED, NULL));
+	WRITE_ONCE(ring->xsk_pool, igb_xsk_pool(adapter, ring));
 
 	/* disable the queue */
 	wr32(E1000_RXDCTL(reg_idx), 0);
diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c
new file mode 100644
index 000000000000..7b632be3e7e3
--- /dev/null
+++ b/drivers/net/ethernet/intel/igb/igb_xsk.c
@@ -0,0 +1,207 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2018 Intel Corporation. */
+
+#include <linux/bpf_trace.h>
+#include <net/xdp_sock_drv.h>
+#include <net/xdp.h>
+
+#include "e1000_hw.h"
+#include "igb.h"
+
+static int igb_realloc_rx_buffer_info(struct igb_ring *ring, bool pool_present)
+{
+	int size = pool_present ?
+		sizeof(*ring->rx_buffer_info_zc) * ring->count :
+		sizeof(*ring->rx_buffer_info) * ring->count;
+	void *buff_info = vmalloc(size);
+
+	if (!buff_info)
+		return -ENOMEM;
+
+	if (pool_present) {
+		vfree(ring->rx_buffer_info);
+		ring->rx_buffer_info = NULL;
+		ring->rx_buffer_info_zc = buff_info;
+	} else {
+		vfree(ring->rx_buffer_info_zc);
+		ring->rx_buffer_info_zc = NULL;
+		ring->rx_buffer_info = buff_info;
+	}
+
+	return 0;
+}
+
+static void igb_txrx_ring_disable(struct igb_adapter *adapter, u16 qid)
+{
+	struct igb_ring *tx_ring = adapter->tx_ring[qid];
+	struct igb_ring *rx_ring = adapter->rx_ring[qid];
+	struct e1000_hw *hw = &adapter->hw;
+
+	set_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags);
+
+	wr32(E1000_TXDCTL(tx_ring->reg_idx), 0);
+	wr32(E1000_RXDCTL(rx_ring->reg_idx), 0);
+
+	synchronize_net();
+
+	/* Rx/Tx share the same napi context. */
+	napi_disable(&rx_ring->q_vector->napi);
+
+	igb_clean_tx_ring(tx_ring);
+	igb_clean_rx_ring(rx_ring);
+
+	memset(&rx_ring->rx_stats, 0, sizeof(rx_ring->rx_stats));
+	memset(&tx_ring->tx_stats, 0, sizeof(tx_ring->tx_stats));
+}
+
+static void igb_txrx_ring_enable(struct igb_adapter *adapter, u16 qid)
+{
+	struct igb_ring *tx_ring = adapter->tx_ring[qid];
+	struct igb_ring *rx_ring = adapter->rx_ring[qid];
+
+	igb_configure_tx_ring(adapter, tx_ring);
+	igb_configure_rx_ring(adapter, rx_ring);
+
+	synchronize_net();
+
+	clear_bit(IGB_RING_FLAG_TX_DISABLED, &tx_ring->flags);
+
+	/* call igb_desc_unused which always leaves
+	 * at least 1 descriptor unused to make sure
+	 * next_to_use != next_to_clean
+	 */
+	igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring));
+
+	/* Rx/Tx share the same napi context. */
+	napi_enable(&rx_ring->q_vector->napi);
+}
+
+struct xsk_buff_pool *igb_xsk_pool(struct igb_adapter *adapter,
+				   struct igb_ring *ring)
+{
+	int qid = ring->queue_index;
+	struct xsk_buff_pool *pool;
+
+	pool = xsk_get_pool_from_qid(adapter->netdev, qid);
+
+	if (!igb_xdp_is_enabled(adapter))
+		return NULL;
+
+	return (pool && pool->dev) ? pool : NULL;
+}
+
+static int igb_xsk_pool_enable(struct igb_adapter *adapter,
+			       struct xsk_buff_pool *pool,
+			       u16 qid)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct igb_ring *rx_ring;
+	bool if_running;
+	int err;
+
+	if (qid >= adapter->num_rx_queues)
+		return -EINVAL;
+
+	if (qid >= netdev->real_num_rx_queues ||
+	    qid >= netdev->real_num_tx_queues)
+		return -EINVAL;
+
+	err = xsk_pool_dma_map(pool, &adapter->pdev->dev, IGB_RX_DMA_ATTR);
+	if (err)
+		return err;
+
+	rx_ring = adapter->rx_ring[qid];
+	if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter);
+	if (if_running)
+		igb_txrx_ring_disable(adapter, qid);
+
+	if (if_running) {
+		err = igb_realloc_rx_buffer_info(rx_ring, true);
+		if (!err) {
+			igb_txrx_ring_enable(adapter, qid);
+			/* Kick start the NAPI context so that receiving will start */
+			err = igb_xsk_wakeup(adapter->netdev, qid, XDP_WAKEUP_RX);
+		}
+
+		if (err) {
+			xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int igb_xsk_pool_disable(struct igb_adapter *adapter, u16 qid)
+{
+	struct xsk_buff_pool *pool;
+	struct igb_ring *rx_ring;
+	bool if_running;
+	int err;
+
+	pool = xsk_get_pool_from_qid(adapter->netdev, qid);
+	if (!pool)
+		return -EINVAL;
+
+	rx_ring = adapter->rx_ring[qid];
+	if_running = netif_running(adapter->netdev) && igb_xdp_is_enabled(adapter);
+	if (if_running)
+		igb_txrx_ring_disable(adapter, qid);
+
+	xsk_pool_dma_unmap(pool, IGB_RX_DMA_ATTR);
+
+	if (if_running) {
+		err = igb_realloc_rx_buffer_info(rx_ring, false);
+		if (err)
+			return err;
+
+		igb_txrx_ring_enable(adapter, qid);
+	}
+
+	return 0;
+}
+
+int igb_xsk_pool_setup(struct igb_adapter *adapter,
+		       struct xsk_buff_pool *pool,
+		       u16 qid)
+{
+	return pool ? igb_xsk_pool_enable(adapter, pool, qid) :
+		igb_xsk_pool_disable(adapter, qid);
+}
+
+int igb_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
+{
+	struct igb_adapter *adapter = netdev_priv(dev);
+	struct e1000_hw *hw = &adapter->hw;
+	struct igb_ring *ring;
+	u32 eics = 0;
+
+	if (test_bit(__IGB_DOWN, &adapter->state))
+		return -ENETDOWN;
+
+	if (!igb_xdp_is_enabled(adapter))
+		return -EINVAL;
+
+	if (qid >= adapter->num_tx_queues)
+		return -EINVAL;
+
+	ring = adapter->tx_ring[qid];
+
+	if (test_bit(IGB_RING_FLAG_TX_DISABLED, &ring->flags))
+		return -ENETDOWN;
+
+	if (!READ_ONCE(ring->xsk_pool))
+		return -EINVAL;
+
+	if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
+		/* Cause software interrupt */
+		if (adapter->flags & IGB_FLAG_HAS_MSIX) {
+			eics |= ring->q_vector->eims_value;
+			wr32(E1000_EICS, eics);
+		} else {
+			wr32(E1000_ICS, E1000_ICS_RXDMT0);
+		}
+	}
+
+	return 0;
+}