diff mbox series

[V4,net-next,9/9] net: ena: introduce ndo_xdp_xmit() function for XDP_REDIRECT

Message ID 1607083875-32134-10-git-send-email-akiyano@amazon.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series XDP Redirect implementation for ENA driver | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 84 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success Link
netdev/stable success Stable not CCed

Commit Message

Kiyanovski, Arthur Dec. 4, 2020, 12:11 p.m. UTC
From: Arthur Kiyanovski <akiyano@amazon.com>

This patch implements the ndo_xdp_xmit() net_device function which is
called when a packet is redirected to this driver using an
XDP_REDIRECT directive.

The function receives an array of xdp frames that it needs to xmit.
The TX queues that are used to xmit these frames are the XDP
queues used by the XDP_TX flow. Therefore a lock is added to synchronize
both flows (XDP_TX and XDP_REDIRECT).

Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 83 +++++++++++++++++---
 drivers/net/ethernet/amazon/ena/ena_netdev.h |  1 +
 2 files changed, 72 insertions(+), 12 deletions(-)

Comments

Maciej Fijalkowski Dec. 6, 2020, 8:22 p.m. UTC | #1
On Fri, Dec 04, 2020 at 02:11:15PM +0200, akiyano@amazon.com wrote:
> From: Arthur Kiyanovski <akiyano@amazon.com>
> 
> This patch implements the ndo_xdp_xmit() net_device function which is
> called when a packet is redirected to this driver using an
> XDP_REDIRECT directive.
> 
> The function receives an array of xdp frames that it needs to xmit.
> The TX queues that are used to xmit these frames are the XDP
> queues used by the XDP_TX flow. Therefore a lock is added to synchronize
> both flows (XDP_TX and XDP_REDIRECT).
> 
> Signed-off-by: Shay Agroskin <shayagr@amazon.com>
> Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
> ---
>  drivers/net/ethernet/amazon/ena/ena_netdev.c | 83 +++++++++++++++++---
>  drivers/net/ethernet/amazon/ena/ena_netdev.h |  1 +
>  2 files changed, 72 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> index 0d077a626604..6c5d8b8c4d13 100644
> --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> @@ -281,20 +281,18 @@ static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
>  	return -EINVAL;
>  }
>  
> -static int ena_xdp_xmit_frame(struct net_device *dev,
> +static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
> +			      struct net_device *dev,
>  			      struct xdp_frame *xdpf,
> -			      int qid)
> +			      int flags)
>  {
> -	struct ena_adapter *adapter = netdev_priv(dev);
>  	struct ena_com_tx_ctx ena_tx_ctx = {};
>  	struct ena_tx_buffer *tx_info;
> -	struct ena_ring *xdp_ring;
>  	u16 next_to_use, req_id;
> -	int rc;
>  	void *push_hdr;
>  	u32 push_len;
> +	int rc;
>  
> -	xdp_ring = &adapter->tx_ring[qid];
>  	next_to_use = xdp_ring->next_to_use;
>  	req_id = xdp_ring->free_ids[next_to_use];
>  	tx_info = &xdp_ring->tx_buffer_info[req_id];
> @@ -321,25 +319,76 @@ static int ena_xdp_xmit_frame(struct net_device *dev,
>  	/* trigger the dma engine. ena_com_write_sq_doorbell()
>  	 * has a mb
>  	 */
> -	ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
> -	ena_increase_stat(&xdp_ring->tx_stats.doorbells, 1, &xdp_ring->syncp);
> +	if (flags & XDP_XMIT_FLUSH) {
> +		ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
> +		ena_increase_stat(&xdp_ring->tx_stats.doorbells, 1,
> +				  &xdp_ring->syncp);
> +	}
>  
> -	return NETDEV_TX_OK;
> +	return rc;
>  
>  error_unmap_dma:
>  	ena_unmap_tx_buff(xdp_ring, tx_info);
>  	tx_info->xdpf = NULL;
>  error_drop_packet:
>  	xdp_return_frame(xdpf);
> -	return NETDEV_TX_OK;
> +	return rc;
> +}
> +
> +static int ena_xdp_xmit(struct net_device *dev, int n,
> +			struct xdp_frame **frames, u32 flags)
> +{
> +	struct ena_adapter *adapter = netdev_priv(dev);
> +	int qid, i, err, drops = 0;
> +	struct ena_ring *xdp_ring;
> +
> +	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
> +		return -EINVAL;
> +
> +	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
> +		return -ENETDOWN;
> +
> +	/* We assume that all rings have the same XDP program */
> +	if (!READ_ONCE(adapter->rx_ring->xdp_bpf_prog))
> +		return -ENXIO;
> +
> +	qid = smp_processor_id() % adapter->xdp_num_queues;
> +	qid += adapter->xdp_first_ring;
> +	xdp_ring = &adapter->tx_ring[qid];
> +
> +	/* Other CPU ids might try to send thorugh this queue */
> +	spin_lock(&xdp_ring->xdp_tx_lock);

I have a feeling that we are not consistent with this locking approach as
some drivers do that and some don't.

> +
> +	for (i = 0; i < n; i++) {
> +		err = ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0);
> +		/* The descriptor is freed by ena_xdp_xmit_frame in case
> +		 * of an error.
> +		 */
> +		if (err)
> +			drops++;
> +	}
> +
> +	/* Ring doorbell to make device aware of the packets */
> +	if (flags & XDP_XMIT_FLUSH) {
> +		ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
> +		ena_increase_stat(&xdp_ring->tx_stats.doorbells, 1,
> +				  &xdp_ring->syncp);

Have you thought of ringing the doorbell once per a batch of xmitted
frames?

> +	}
> +
> +	spin_unlock(&xdp_ring->xdp_tx_lock);
> +
> +	/* Return number of packets sent */
> +	return n - drops;
>  }
>  
>  static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
>  {
>  	struct bpf_prog *xdp_prog;
> +	struct ena_ring *xdp_ring;
>  	u32 verdict = XDP_PASS;
>  	struct xdp_frame *xdpf;
>  	u64 *xdp_stat;
> +	int qid;
>  
>  	rcu_read_lock();
>  	xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
> @@ -352,8 +401,16 @@ static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
>  	switch (verdict) {
>  	case XDP_TX:
>  		xdpf = xdp_convert_buff_to_frame(xdp);
> -		ena_xdp_xmit_frame(rx_ring->netdev, xdpf,
> -				   rx_ring->qid + rx_ring->adapter->num_io_queues);
> +		/* Find xmit queue */
> +		qid = rx_ring->qid + rx_ring->adapter->num_io_queues;
> +		xdp_ring = &rx_ring->adapter->tx_ring[qid];
> +
> +		/* The XDP queues are shared between XDP_TX and XDP_REDIRECT */
> +		spin_lock(&xdp_ring->xdp_tx_lock);
> +
> +		ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf, XDP_XMIT_FLUSH);

Once again you don't check retval over here.

> +
> +		spin_unlock(&xdp_ring->xdp_tx_lock);
>  		xdp_stat = &rx_ring->rx_stats.xdp_tx;
>  		break;
>  	case XDP_REDIRECT:
> @@ -644,6 +701,7 @@ static void ena_init_io_rings(struct ena_adapter *adapter,
>  		txr->smoothed_interval =
>  			ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
>  		txr->disable_meta_caching = adapter->disable_meta_caching;
> +		spin_lock_init(&txr->xdp_tx_lock);
>  
>  		/* Don't init RX queues for xdp queues */
>  		if (!ENA_IS_XDP_INDEX(adapter, i)) {
> @@ -3236,6 +3294,7 @@ static const struct net_device_ops ena_netdev_ops = {
>  	.ndo_set_mac_address	= NULL,
>  	.ndo_validate_addr	= eth_validate_addr,
>  	.ndo_bpf		= ena_xdp,
> +	.ndo_xdp_xmit		= ena_xdp_xmit,
>  };
>  
>  static int ena_device_validate_params(struct ena_adapter *adapter,
> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
> index fed79c50a870..74af15d62ee1 100644
> --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
> +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
> @@ -258,6 +258,7 @@ struct ena_ring {
>  	struct ena_com_io_sq *ena_com_io_sq;
>  	struct bpf_prog *xdp_bpf_prog;
>  	struct xdp_rxq_info xdp_rxq;
> +	spinlock_t xdp_tx_lock;	/* synchronize XDP TX/Redirect traffic */
>  
>  	u16 next_to_use;
>  	u16 next_to_clean;
> -- 
> 2.23.3
>
Shay Agroskin Dec. 7, 2020, 7:28 p.m. UTC | #2
Maciej Fijalkowski <maciej.fijalkowski@intel.com> writes:

> On Fri, Dec 04, 2020 at 02:11:15PM +0200, akiyano@amazon.com 
> wrote:
>> From: Arthur Kiyanovski <akiyano@amazon.com>
>> 
>> This patch implements the ndo_xdp_xmit() net_device function 
>> which is
>> called when a packet is redirected to this driver using an
>> XDP_REDIRECT directive.
>> 
>> The function receives an array of xdp frames that it needs to 
>> xmit.
>> The TX queues that are used to xmit these frames are the XDP
>> queues used by the XDP_TX flow. Therefore a lock is added to 
>> synchronize
>> both flows (XDP_TX and XDP_REDIRECT).
>> 
>> Signed-off-by: Shay Agroskin <shayagr@amazon.com>
>> Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
>> ...
>> +	xdp_ring = &adapter->tx_ring[qid];
>> +
>> +	/* Other CPU ids might try to send thorugh this queue */
>> +	spin_lock(&xdp_ring->xdp_tx_lock);
>
> I have a feeling that we are not consistent with this locking 
> approach as
> some drivers do that and some don't.
>

Not sure what you mean here, ENA driver uses a lock for XDP xmit 
function because XDP_TX and XDP_REDIRECT flows share the same egress queues. This is a design choice that was 
taken. Some drivers (e.g. mlx5) seem to have separate queues for 
regular TX, XDP_TX, XDP_REDIRECT and RX flows, and saw are able to 
avoid locking.

>> +
>> +	for (i = 0; i < n; i++) {
>> +		err = ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 
>> 0);
>> +		/* The descriptor is freed by ena_xdp_xmit_frame 
>> in case
>> +		 * of an error.
>> +		 */
>> +		if (err)
>> +			drops++;
>> +	}
>> +
>> +	/* Ring doorbell to make device aware of the packets */
>> +	if (flags & XDP_XMIT_FLUSH) {
>> + 
>> ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
>> +		ena_increase_stat(&xdp_ring->tx_stats.doorbells, 
>> 1,
>> +				  &xdp_ring->syncp);
>
> Have you thought of ringing the doorbell once per a batch of 
> xmitted
> frames?
>

For XDP_REDIRECT the packets are indeed batched before sending a 
doorbell. XDP_TX flow would be added the same improvement in 
future patchset.
Thanks for this idea (:

>> +	}
>> +
>> +	spin_unlock(&xdp_ring->xdp_tx_lock);
>> +
>> +	/* Return number of packets sent */
>> +	return n - drops;
>>  }
>> ...
>>  
>> -				   rx_ring->qid + 
>> rx_ring->adapter->num_io_queues);
>> +		/* Find xmit queue */
>> +		qid = rx_ring->qid + 
>> rx_ring->adapter->num_io_queues;
>> +		xdp_ring = &rx_ring->adapter->tx_ring[qid];
>> +
>> +		/* The XDP queues are shared between XDP_TX and 
>> XDP_REDIRECT */
>> +		spin_lock(&xdp_ring->xdp_tx_lock);
>> +
>> +		ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, 
>> xdpf, XDP_XMIT_FLUSH);
>
> Once again you don't check retval over here.
>

ena_xdp_xmit_frame() function handles failure internally (reducing 
the ref-count of the RX page, increasing error stat etc.) 
therefore there is no need for special handling of its failure.
For XDP Redirect flow ena_xdp_xmit() function returns the kernel 
the number of packets that were successfully sent, and so it needs 
to monitor the return value of this function.
This is not the case here though.

>> +
>> +		spin_unlock(&xdp_ring->xdp_tx_lock);
>>  		xdp_stat = &rx_ring->rx_stats.xdp_tx;
>>  		break;
>>  	case XDP_REDIRECT:
>> @@ -644,6 +701,7 @@ static void ena_init_io_rings(struct 
>> ena_adapter *adapter,
>>  		txr->smoothed_interval =
>>  			ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
>>  		txr->disable_meta_caching = 
>>  adapter->disable_meta_caching;
>> +		spin_lock_init(&txr->xdp_tx_lock);
>>  
>>  		/* Don't init RX queues for xdp queues */
>>  		if (!ENA_IS_XDP_INDEX(adapter, i)) {
>> @@ -3236,6 +3294,7 @@ static const struct net_device_ops 
>> ena_netdev_ops = {
>>  	.ndo_set_mac_address	= NULL,
>>  	.ndo_validate_addr	= eth_validate_addr,
>>  	.ndo_bpf		= ena_xdp,
>> +	.ndo_xdp_xmit		= ena_xdp_xmit,
>>  };
>> ...
>> +	spinlock_t xdp_tx_lock;	/* synchronize XDP TX/Redirect 
>> traffic */
>>  
>>  	u16 next_to_use;
>>  	u16 next_to_clean;
>> -- 
>> 2.23.3
>>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 0d077a626604..6c5d8b8c4d13 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -281,20 +281,18 @@  static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
 	return -EINVAL;
 }
 
-static int ena_xdp_xmit_frame(struct net_device *dev,
+static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring,
+			      struct net_device *dev,
 			      struct xdp_frame *xdpf,
-			      int qid)
+			      int flags)
 {
-	struct ena_adapter *adapter = netdev_priv(dev);
 	struct ena_com_tx_ctx ena_tx_ctx = {};
 	struct ena_tx_buffer *tx_info;
-	struct ena_ring *xdp_ring;
 	u16 next_to_use, req_id;
-	int rc;
 	void *push_hdr;
 	u32 push_len;
+	int rc;
 
-	xdp_ring = &adapter->tx_ring[qid];
 	next_to_use = xdp_ring->next_to_use;
 	req_id = xdp_ring->free_ids[next_to_use];
 	tx_info = &xdp_ring->tx_buffer_info[req_id];
@@ -321,25 +319,76 @@  static int ena_xdp_xmit_frame(struct net_device *dev,
 	/* trigger the dma engine. ena_com_write_sq_doorbell()
 	 * has a mb
 	 */
-	ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
-	ena_increase_stat(&xdp_ring->tx_stats.doorbells, 1, &xdp_ring->syncp);
+	if (flags & XDP_XMIT_FLUSH) {
+		ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
+		ena_increase_stat(&xdp_ring->tx_stats.doorbells, 1,
+				  &xdp_ring->syncp);
+	}
 
-	return NETDEV_TX_OK;
+	return rc;
 
 error_unmap_dma:
 	ena_unmap_tx_buff(xdp_ring, tx_info);
 	tx_info->xdpf = NULL;
 error_drop_packet:
 	xdp_return_frame(xdpf);
-	return NETDEV_TX_OK;
+	return rc;
+}
+
+static int ena_xdp_xmit(struct net_device *dev, int n,
+			struct xdp_frame **frames, u32 flags)
+{
+	struct ena_adapter *adapter = netdev_priv(dev);
+	int qid, i, err, drops = 0;
+	struct ena_ring *xdp_ring;
+
+	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+		return -EINVAL;
+
+	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
+		return -ENETDOWN;
+
+	/* We assume that all rings have the same XDP program */
+	if (!READ_ONCE(adapter->rx_ring->xdp_bpf_prog))
+		return -ENXIO;
+
+	qid = smp_processor_id() % adapter->xdp_num_queues;
+	qid += adapter->xdp_first_ring;
+	xdp_ring = &adapter->tx_ring[qid];
+
+	/* Other CPU ids might try to send thorugh this queue */
+	spin_lock(&xdp_ring->xdp_tx_lock);
+
+	for (i = 0; i < n; i++) {
+		err = ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0);
+		/* The descriptor is freed by ena_xdp_xmit_frame in case
+		 * of an error.
+		 */
+		if (err)
+			drops++;
+	}
+
+	/* Ring doorbell to make device aware of the packets */
+	if (flags & XDP_XMIT_FLUSH) {
+		ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
+		ena_increase_stat(&xdp_ring->tx_stats.doorbells, 1,
+				  &xdp_ring->syncp);
+	}
+
+	spin_unlock(&xdp_ring->xdp_tx_lock);
+
+	/* Return number of packets sent */
+	return n - drops;
 }
 
 static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
 {
 	struct bpf_prog *xdp_prog;
+	struct ena_ring *xdp_ring;
 	u32 verdict = XDP_PASS;
 	struct xdp_frame *xdpf;
 	u64 *xdp_stat;
+	int qid;
 
 	rcu_read_lock();
 	xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
@@ -352,8 +401,16 @@  static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp)
 	switch (verdict) {
 	case XDP_TX:
 		xdpf = xdp_convert_buff_to_frame(xdp);
-		ena_xdp_xmit_frame(rx_ring->netdev, xdpf,
-				   rx_ring->qid + rx_ring->adapter->num_io_queues);
+		/* Find xmit queue */
+		qid = rx_ring->qid + rx_ring->adapter->num_io_queues;
+		xdp_ring = &rx_ring->adapter->tx_ring[qid];
+
+		/* The XDP queues are shared between XDP_TX and XDP_REDIRECT */
+		spin_lock(&xdp_ring->xdp_tx_lock);
+
+		ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf, XDP_XMIT_FLUSH);
+
+		spin_unlock(&xdp_ring->xdp_tx_lock);
 		xdp_stat = &rx_ring->rx_stats.xdp_tx;
 		break;
 	case XDP_REDIRECT:
@@ -644,6 +701,7 @@  static void ena_init_io_rings(struct ena_adapter *adapter,
 		txr->smoothed_interval =
 			ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
 		txr->disable_meta_caching = adapter->disable_meta_caching;
+		spin_lock_init(&txr->xdp_tx_lock);
 
 		/* Don't init RX queues for xdp queues */
 		if (!ENA_IS_XDP_INDEX(adapter, i)) {
@@ -3236,6 +3294,7 @@  static const struct net_device_ops ena_netdev_ops = {
 	.ndo_set_mac_address	= NULL,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_bpf		= ena_xdp,
+	.ndo_xdp_xmit		= ena_xdp_xmit,
 };
 
 static int ena_device_validate_params(struct ena_adapter *adapter,
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index fed79c50a870..74af15d62ee1 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -258,6 +258,7 @@  struct ena_ring {
 	struct ena_com_io_sq *ena_com_io_sq;
 	struct bpf_prog *xdp_bpf_prog;
 	struct xdp_rxq_info xdp_rxq;
+	spinlock_t xdp_tx_lock;	/* synchronize XDP TX/Redirect traffic */
 
 	u16 next_to_use;
 	u16 next_to_clean;