diff mbox series

[next-queue,2/3] i40e: i40e_clean_tx_irq returns work done

Message ID 1664958703-4224-3-git-send-email-jdamato@fastly.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series i40e: Add an i40e_napi_poll tracepoint | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply

Commit Message

Joe Damato Oct. 5, 2022, 8:31 a.m. UTC
Adjust i40e_clean_tx_irq to return the actual number of packets cleaned
and adjust the logic in i40e_napi_poll to check this value.

Signed-off-by: Joe Damato <jdamato@fastly.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 24 +++++++++++++-----------
 drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 12 ++++++------
 drivers/net/ethernet/intel/i40e/i40e_xsk.h  |  2 +-
 3 files changed, 20 insertions(+), 18 deletions(-)

Comments

Paul Menzel Oct. 5, 2022, 8:45 a.m. UTC | #1
Dear Joe,


Thank you for the patch.

Am 05.10.22 um 10:31 schrieb Joe Damato:
> Adjust i40e_clean_tx_irq to return the actual number of packets cleaned
> and adjust the logic in i40e_napi_poll to check this value.

Nit for the summary/title:

i40e: Return number of cleaned packets in i40e_clean_tx_irq

> Signed-off-by: Joe Damato <jdamato@fastly.com>
> ---
>   drivers/net/ethernet/intel/i40e/i40e_txrx.c | 24 +++++++++++++-----------
>   drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 12 ++++++------
>   drivers/net/ethernet/intel/i40e/i40e_xsk.h  |  2 +-
>   3 files changed, 20 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> index b97c95f..ed88309 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> @@ -924,10 +924,10 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
>    * @tx_ring: Tx ring to clean
>    * @napi_budget: Used to determine if we are in netpoll
>    *
> - * Returns true if there's any budget left (e.g. the clean is finished)
> + * Returns the number of packets cleaned
>    **/
> -static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> -			      struct i40e_ring *tx_ring, int napi_budget)
> +static int i40e_clean_tx_irq(struct i40e_vsi *vsi,
> +			     struct i40e_ring *tx_ring, int napi_budget)
>   {
>   	int i = tx_ring->next_to_clean;
>   	struct i40e_tx_buffer *tx_buf;
> @@ -1026,7 +1026,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
>   	i40e_arm_wb(tx_ring, vsi, budget);
>   
>   	if (ring_is_xdp(tx_ring))
> -		return !!budget;
> +		return total_packets;
>   
>   	/* notify netdev of completed buffers */
>   	netdev_tx_completed_queue(txring_txq(tx_ring),
> @@ -1048,7 +1048,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
>   		}
>   	}
>   
> -	return !!budget;
> +	return total_packets;
>   }
>   
>   /**
> @@ -2689,10 +2689,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
>   			       container_of(napi, struct i40e_q_vector, napi);
>   	struct i40e_vsi *vsi = q_vector->vsi;
>   	struct i40e_ring *ring;
> +	bool tx_clean_complete = true;
>   	bool clean_complete = true;
>   	bool arm_wb = false;
>   	int budget_per_ring;
>   	int work_done = 0;
> +	int tx_wd = 0;

Is it necessary to initialize the variable?


Kind regards,

Paul

>   
>   	if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
>   		napi_complete(napi);
> @@ -2703,12 +2705,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
>   	 * budget and be more aggressive about cleaning up the Tx descriptors.
>   	 */
>   	i40e_for_each_ring(ring, q_vector->tx) {
> -		bool wd = ring->xsk_pool ?
> -			  i40e_clean_xdp_tx_irq(vsi, ring) :
> -			  i40e_clean_tx_irq(vsi, ring, budget);
> +		tx_wd = ring->xsk_pool ?
> +			i40e_clean_xdp_tx_irq(vsi, ring) :
> +			i40e_clean_tx_irq(vsi, ring, budget);
>   
> -		if (!wd) {
> -			clean_complete = false;
> +		if (tx_wd >= budget) {
> +			tx_clean_complete = false;
>   			continue;
>   		}
>   		arm_wb |= ring->arm_wb;
> @@ -2742,7 +2744,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
>   	}
>   
>   	/* If work not completed, return budget and polling will return */
> -	if (!clean_complete) {
> +	if (!clean_complete || !tx_clean_complete) {
>   		int cpu_id = smp_processor_id();
>   
>   		/* It is possible that the interrupt affinity has changed but,
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> index 790aaeff..925682c 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> @@ -531,9 +531,9 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
>    * @xdp_ring: XDP Tx ring
>    * @budget: NAPI budget
>    *
> - * Returns true if the work is finished.
> + * Returns number of packets cleaned
>    **/
> -static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> +static int i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
>   {
>   	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
>   	u32 nb_pkts, nb_processed = 0;
> @@ -541,7 +541,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
>   
>   	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
>   	if (!nb_pkts)
> -		return true;
> +		return 0;
>   
>   	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
>   		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
> @@ -558,7 +558,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
>   
>   	i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);
>   
> -	return nb_pkts < budget;
> +	return nb_pkts;
>   }
>   
>   /**
> @@ -582,9 +582,9 @@ static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
>    * @vsi: Current VSI
>    * @tx_ring: XDP Tx ring
>    *
> - * Returns true if cleanup/tranmission is done.
> + * Returns number of packets cleaned
>    **/
> -bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
> +int i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
>   {
>   	struct xsk_buff_pool *bp = tx_ring->xsk_pool;
>   	u32 i, completed_frames, xsk_frames = 0;
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> index 821df24..4e810c2 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> @@ -30,7 +30,7 @@ int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
>   bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
>   int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
>   
> -bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
> +int i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
>   int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
>   int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
>   void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
Fijalkowski, Maciej Oct. 5, 2022, 10:46 a.m. UTC | #2
On Wed, Oct 05, 2022 at 01:31:42AM -0700, Joe Damato wrote:
> Adjust i40e_clean_tx_irq to return the actual number of packets cleaned
> and adjust the logic in i40e_napi_poll to check this value.
> 
> Signed-off-by: Joe Damato <jdamato@fastly.com>
> ---
>  drivers/net/ethernet/intel/i40e/i40e_txrx.c | 24 +++++++++++++-----------
>  drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 12 ++++++------
>  drivers/net/ethernet/intel/i40e/i40e_xsk.h  |  2 +-
>  3 files changed, 20 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> index b97c95f..ed88309 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> @@ -924,10 +924,10 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
>   * @tx_ring: Tx ring to clean
>   * @napi_budget: Used to determine if we are in netpoll
>   *
> - * Returns true if there's any budget left (e.g. the clean is finished)
> + * Returns the number of packets cleaned
>   **/
> -static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> -			      struct i40e_ring *tx_ring, int napi_budget)
> +static int i40e_clean_tx_irq(struct i40e_vsi *vsi,
> +			     struct i40e_ring *tx_ring, int napi_budget)
>  {
>  	int i = tx_ring->next_to_clean;
>  	struct i40e_tx_buffer *tx_buf;
> @@ -1026,7 +1026,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
>  	i40e_arm_wb(tx_ring, vsi, budget);
>  
>  	if (ring_is_xdp(tx_ring))
> -		return !!budget;
> +		return total_packets;
>  
>  	/* notify netdev of completed buffers */
>  	netdev_tx_completed_queue(txring_txq(tx_ring),
> @@ -1048,7 +1048,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
>  		}
>  	}
>  
> -	return !!budget;
> +	return total_packets;
>  }
>  
>  /**
> @@ -2689,10 +2689,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
>  			       container_of(napi, struct i40e_q_vector, napi);
>  	struct i40e_vsi *vsi = q_vector->vsi;
>  	struct i40e_ring *ring;
> +	bool tx_clean_complete = true;
>  	bool clean_complete = true;
>  	bool arm_wb = false;
>  	int budget_per_ring;
>  	int work_done = 0;
> +	int tx_wd = 0;
>  
>  	if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
>  		napi_complete(napi);
> @@ -2703,12 +2705,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
>  	 * budget and be more aggressive about cleaning up the Tx descriptors.
>  	 */
>  	i40e_for_each_ring(ring, q_vector->tx) {
> -		bool wd = ring->xsk_pool ?
> -			  i40e_clean_xdp_tx_irq(vsi, ring) :
> -			  i40e_clean_tx_irq(vsi, ring, budget);
> +		tx_wd = ring->xsk_pool ?
> +			i40e_clean_xdp_tx_irq(vsi, ring) :
> +			i40e_clean_tx_irq(vsi, ring, budget);
>  
> -		if (!wd) {
> -			clean_complete = false;
> +		if (tx_wd >= budget) {
> +			tx_clean_complete = false;

This will break for AF_XDP Tx ZC. AF_XDP Tx ZC in intel drivers ignores
budget given by NAPI. If you look at i40e_xmit_zc():

func def:
static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)

callsite:
	return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring));

we give free ring space as a budget and with your change we would be
returning the amount of processed tx descriptors which you will be
comparing against NAPI budget (64, unless you have busy poll enabled with
a different batch size). Say you start with empty ring and your HW rings
are sized to 1k but there was only 512 AF_XDP descriptors ready for Tx.
You produced all of them successfully to ring and you return 512 up to
i40e_napi_poll.

>  			continue;
>  		}
>  		arm_wb |= ring->arm_wb;
> @@ -2742,7 +2744,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
>  	}
>  
>  	/* If work not completed, return budget and polling will return */
> -	if (!clean_complete) {
> +	if (!clean_complete || !tx_clean_complete) {
>  		int cpu_id = smp_processor_id();
>  
>  		/* It is possible that the interrupt affinity has changed but,
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> index 790aaeff..925682c 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> @@ -531,9 +531,9 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
>   * @xdp_ring: XDP Tx ring
>   * @budget: NAPI budget
>   *
> - * Returns true if the work is finished.
> + * Returns number of packets cleaned
>   **/
> -static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> +static int i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
>  {
>  	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
>  	u32 nb_pkts, nb_processed = 0;
> @@ -541,7 +541,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
>  
>  	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
>  	if (!nb_pkts)
> -		return true;
> +		return 0;
>  
>  	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
>  		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
> @@ -558,7 +558,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
>  
>  	i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);
>  
> -	return nb_pkts < budget;
> +	return nb_pkts;
>  }
>  
>  /**
> @@ -582,9 +582,9 @@ static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
>   * @vsi: Current VSI
>   * @tx_ring: XDP Tx ring
>   *
> - * Returns true if cleanup/tranmission is done.
> + * Returns number of packets cleaned
>   **/
> -bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
> +int i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
>  {
>  	struct xsk_buff_pool *bp = tx_ring->xsk_pool;
>  	u32 i, completed_frames, xsk_frames = 0;
> diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> index 821df24..4e810c2 100644
> --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> @@ -30,7 +30,7 @@ int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
>  bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
>  int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
>  
> -bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
> +int i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
>  int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
>  int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
>  void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
> -- 
> 2.7.4
>
Joe Damato Oct. 5, 2022, 5:50 p.m. UTC | #3
On Wed, Oct 05, 2022 at 12:46:31PM +0200, Maciej Fijalkowski wrote:
> On Wed, Oct 05, 2022 at 01:31:42AM -0700, Joe Damato wrote:
> > Adjust i40e_clean_tx_irq to return the actual number of packets cleaned
> > and adjust the logic in i40e_napi_poll to check this value.
> > 
> > Signed-off-by: Joe Damato <jdamato@fastly.com>
> > ---
> >  drivers/net/ethernet/intel/i40e/i40e_txrx.c | 24 +++++++++++++-----------
> >  drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 12 ++++++------
> >  drivers/net/ethernet/intel/i40e/i40e_xsk.h  |  2 +-
> >  3 files changed, 20 insertions(+), 18 deletions(-)
> > 
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > index b97c95f..ed88309 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> > @@ -924,10 +924,10 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
> >   * @tx_ring: Tx ring to clean
> >   * @napi_budget: Used to determine if we are in netpoll
> >   *
> > - * Returns true if there's any budget left (e.g. the clean is finished)
> > + * Returns the number of packets cleaned
> >   **/
> > -static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> > -			      struct i40e_ring *tx_ring, int napi_budget)
> > +static int i40e_clean_tx_irq(struct i40e_vsi *vsi,
> > +			     struct i40e_ring *tx_ring, int napi_budget)
> >  {
> >  	int i = tx_ring->next_to_clean;
> >  	struct i40e_tx_buffer *tx_buf;
> > @@ -1026,7 +1026,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> >  	i40e_arm_wb(tx_ring, vsi, budget);
> >  
> >  	if (ring_is_xdp(tx_ring))
> > -		return !!budget;
> > +		return total_packets;
> >  
> >  	/* notify netdev of completed buffers */
> >  	netdev_tx_completed_queue(txring_txq(tx_ring),
> > @@ -1048,7 +1048,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> >  		}
> >  	}
> >  
> > -	return !!budget;
> > +	return total_packets;
> >  }
> >  
> >  /**
> > @@ -2689,10 +2689,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
> >  			       container_of(napi, struct i40e_q_vector, napi);
> >  	struct i40e_vsi *vsi = q_vector->vsi;
> >  	struct i40e_ring *ring;
> > +	bool tx_clean_complete = true;
> >  	bool clean_complete = true;
> >  	bool arm_wb = false;
> >  	int budget_per_ring;
> >  	int work_done = 0;
> > +	int tx_wd = 0;
> >  
> >  	if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
> >  		napi_complete(napi);
> > @@ -2703,12 +2705,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
> >  	 * budget and be more aggressive about cleaning up the Tx descriptors.
> >  	 */
> >  	i40e_for_each_ring(ring, q_vector->tx) {
> > -		bool wd = ring->xsk_pool ?
> > -			  i40e_clean_xdp_tx_irq(vsi, ring) :
> > -			  i40e_clean_tx_irq(vsi, ring, budget);
> > +		tx_wd = ring->xsk_pool ?
> > +			i40e_clean_xdp_tx_irq(vsi, ring) :
> > +			i40e_clean_tx_irq(vsi, ring, budget);
> >  
> > -		if (!wd) {
> > -			clean_complete = false;
> > +		if (tx_wd >= budget) {
> > +			tx_clean_complete = false;
> 
> This will break for AF_XDP Tx ZC. AF_XDP Tx ZC in intel drivers ignores
> budget given by NAPI. If you look at i40e_xmit_zc():
> 
> func def:
> static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> 
> callsite:
> 	return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring));
> 
> we give free ring space as a budget and with your change we would be
> returning the amount of processed tx descriptors which you will be
> comparing against NAPI budget (64, unless you have busy poll enabled with
> a different batch size). Say you start with empty ring and your HW rings
> are sized to 1k but there was only 512 AF_XDP descriptors ready for Tx.
> You produced all of them successfully to ring and you return 512 up to
> i40e_napi_poll.

Good point, my bad.

I've reworked this for the v2 and have given i40e_clean_tx_irq,
and i40e_clean_xdp_tx_irq an out parameter which will record the number
TXes cleaned.

I tweaked i40e_xmit_zc to return the number of packets (nb_pkts) and moved
the boolean to check if that's under the "budget"
(I40E_DESC_UNUSED(tx_ring)) into i40e_clean_xdp_tx_irq.

I think that might solve the issues you've described.


> >  			continue;
> >  		}
> >  		arm_wb |= ring->arm_wb;
> > @@ -2742,7 +2744,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
> >  	}
> >  
> >  	/* If work not completed, return budget and polling will return */
> > -	if (!clean_complete) {
> > +	if (!clean_complete || !tx_clean_complete) {
> >  		int cpu_id = smp_processor_id();
> >  
> >  		/* It is possible that the interrupt affinity has changed but,
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > index 790aaeff..925682c 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
> > @@ -531,9 +531,9 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
> >   * @xdp_ring: XDP Tx ring
> >   * @budget: NAPI budget
> >   *
> > - * Returns true if the work is finished.
> > + * Returns number of packets cleaned
> >   **/
> > -static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> > +static int i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> >  {
> >  	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
> >  	u32 nb_pkts, nb_processed = 0;
> > @@ -541,7 +541,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> >  
> >  	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
> >  	if (!nb_pkts)
> > -		return true;
> > +		return 0;
> >  
> >  	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
> >  		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
> > @@ -558,7 +558,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> >  
> >  	i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);
> >  
> > -	return nb_pkts < budget;
> > +	return nb_pkts;
> >  }
> >  
> >  /**
> > @@ -582,9 +582,9 @@ static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
> >   * @vsi: Current VSI
> >   * @tx_ring: XDP Tx ring
> >   *
> > - * Returns true if cleanup/tranmission is done.
> > + * Returns number of packets cleaned
> >   **/
> > -bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
> > +int i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
> >  {
> >  	struct xsk_buff_pool *bp = tx_ring->xsk_pool;
> >  	u32 i, completed_frames, xsk_frames = 0;
> > diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> > index 821df24..4e810c2 100644
> > --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> > +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
> > @@ -30,7 +30,7 @@ int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
> >  bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
> >  int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
> >  
> > -bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
> > +int i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
> >  int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
> >  int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
> >  void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
> > -- 
> > 2.7.4
> >
Jesse Brandeburg Oct. 5, 2022, 6:33 p.m. UTC | #4
On 10/5/2022 10:50 AM, Joe Damato wrote:
> On Wed, Oct 05, 2022 at 12:46:31PM +0200, Maciej Fijalkowski wrote:
>> On Wed, Oct 05, 2022 at 01:31:42AM -0700, Joe Damato wrote:
>>> Adjust i40e_clean_tx_irq to return the actual number of packets cleaned
>>> and adjust the logic in i40e_napi_poll to check this value.

it's fine to return the number cleaned but let's keep that data and 
changes to itself instead of changing the flow of the routine.


>>>
>>> Signed-off-by: Joe Damato <jdamato@fastly.com>
>>> ---
>>>   drivers/net/ethernet/intel/i40e/i40e_txrx.c | 24 +++++++++++++-----------
>>>   drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 12 ++++++------
>>>   drivers/net/ethernet/intel/i40e/i40e_xsk.h  |  2 +-
>>>   3 files changed, 20 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
>>> index b97c95f..ed88309 100644
>>> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
>>> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
>>> @@ -924,10 +924,10 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
>>>    * @tx_ring: Tx ring to clean
>>>    * @napi_budget: Used to determine if we are in netpoll
>>>    *
>>> - * Returns true if there's any budget left (e.g. the clean is finished)
>>> + * Returns the number of packets cleaned
>>>    **/
>>> -static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
>>> -			      struct i40e_ring *tx_ring, int napi_budget)
>>> +static int i40e_clean_tx_irq(struct i40e_vsi *vsi,
>>> +			     struct i40e_ring *tx_ring, int napi_budget)
>>>   {
>>>   	int i = tx_ring->next_to_clean;
>>>   	struct i40e_tx_buffer *tx_buf;
>>> @@ -1026,7 +1026,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
>>>   	i40e_arm_wb(tx_ring, vsi, budget);
>>>   
>>>   	if (ring_is_xdp(tx_ring))
>>> -		return !!budget;
>>> +		return total_packets;
>>>   
>>>   	/* notify netdev of completed buffers */
>>>   	netdev_tx_completed_queue(txring_txq(tx_ring),
>>> @@ -1048,7 +1048,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
>>>   		}
>>>   	}
>>>   
>>> -	return !!budget;
>>> +	return total_packets;
>>>   }
>>>   
>>>   /**
>>> @@ -2689,10 +2689,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
>>>   			       container_of(napi, struct i40e_q_vector, napi);
>>>   	struct i40e_vsi *vsi = q_vector->vsi;
>>>   	struct i40e_ring *ring;
>>> +	bool tx_clean_complete = true;
>>>   	bool clean_complete = true;
>>>   	bool arm_wb = false;
>>>   	int budget_per_ring;
>>>   	int work_done = 0;
>>> +	int tx_wd = 0;
>>>   
>>>   	if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
>>>   		napi_complete(napi);
>>> @@ -2703,12 +2705,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
>>>   	 * budget and be more aggressive about cleaning up the Tx descriptors.
>>>   	 */
>>>   	i40e_for_each_ring(ring, q_vector->tx) {
>>> -		bool wd = ring->xsk_pool ?
>>> -			  i40e_clean_xdp_tx_irq(vsi, ring) :
>>> -			  i40e_clean_tx_irq(vsi, ring, budget);
>>> +		tx_wd = ring->xsk_pool ?
>>> +			i40e_clean_xdp_tx_irq(vsi, ring) :
>>> +			i40e_clean_tx_irq(vsi, ring, budget);
>>>   
>>> -		if (!wd) {
>>> -			clean_complete = false;
>>> +		if (tx_wd >= budget) {
>>> +			tx_clean_complete = false;
>>
>> This will break for AF_XDP Tx ZC. AF_XDP Tx ZC in intel drivers ignores
>> budget given by NAPI. If you look at i40e_xmit_zc():
>>
>> func def:
>> static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
>>
>> callsite:
>> 	return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring));
>>
>> we give free ring space as a budget and with your change we would be
>> returning the amount of processed tx descriptors which you will be
>> comparing against NAPI budget (64, unless you have busy poll enabled with
>> a different batch size). Say you start with empty ring and your HW rings
>> are sized to 1k but there was only 512 AF_XDP descriptors ready for Tx.
>> You produced all of them successfully to ring and you return 512 up to
>> i40e_napi_poll.
> 
> Good point, my bad.
> 
> I've reworked this for the v2 and have given i40e_clean_tx_irq,
> and i40e_clean_xdp_tx_irq an out parameter which will record the number
> TXes cleaned.
> 
> I tweaked i40e_xmit_zc to return the number of packets (nb_pkts) and moved
> the boolean to check if that's under the "budget"
> (I40E_DESC_UNUSED(tx_ring)) into i40e_clean_xdp_tx_irq.
> 
> I think that might solve the issues you've described.

Please don't change the flow of this function, transmit clean ups are so 
cheap that we don't bother counting them or limiting them beyond a 
maximum (so they don't clean forever)

Basically transmits should not be counted when exiting NAPI, besides 
that we did "at least one". The only thing that matters to the budget is 
that we "finished" transmit cleanup or not, which would make sure we 
rescheduled napi if we weren't finished cleaning (for instance on a 8160 
entry tx ring) transmits.

I'd much rather you kept this series to a simple return count of tx 
cleaned in "out" as you've said you'd do in v2, and then use that data 
*only* in the context of the new trace event.

That way you're not changing the flow and introducing tough to debug 
issues in the hot path.

Also, mixing new features and introducing refactors makes it very hard 
to unwind if something goes wrong in the future.

Thanks,
Jesse
Joe Damato Oct. 5, 2022, 6:47 p.m. UTC | #5
On Wed, Oct 05, 2022 at 11:33:23AM -0700, Jesse Brandeburg wrote:
> On 10/5/2022 10:50 AM, Joe Damato wrote:
> >On Wed, Oct 05, 2022 at 12:46:31PM +0200, Maciej Fijalkowski wrote:
> >>On Wed, Oct 05, 2022 at 01:31:42AM -0700, Joe Damato wrote:
> >>>Adjust i40e_clean_tx_irq to return the actual number of packets cleaned
> >>>and adjust the logic in i40e_napi_poll to check this value.
> 
> it's fine to return the number cleaned but let's keep that data and changes
> to itself instead of changing the flow of the routine.
> 
> 
> >>>
> >>>Signed-off-by: Joe Damato <jdamato@fastly.com>
> >>>---
> >>>  drivers/net/ethernet/intel/i40e/i40e_txrx.c | 24 +++++++++++++-----------
> >>>  drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 12 ++++++------
> >>>  drivers/net/ethernet/intel/i40e/i40e_xsk.h  |  2 +-
> >>>  3 files changed, 20 insertions(+), 18 deletions(-)
> >>>
> >>>diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> >>>index b97c95f..ed88309 100644
> >>>--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> >>>+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
> >>>@@ -924,10 +924,10 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi)
> >>>   * @tx_ring: Tx ring to clean
> >>>   * @napi_budget: Used to determine if we are in netpoll
> >>>   *
> >>>- * Returns true if there's any budget left (e.g. the clean is finished)
> >>>+ * Returns the number of packets cleaned
> >>>   **/
> >>>-static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> >>>-			      struct i40e_ring *tx_ring, int napi_budget)
> >>>+static int i40e_clean_tx_irq(struct i40e_vsi *vsi,
> >>>+			     struct i40e_ring *tx_ring, int napi_budget)
> >>>  {
> >>>  	int i = tx_ring->next_to_clean;
> >>>  	struct i40e_tx_buffer *tx_buf;
> >>>@@ -1026,7 +1026,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> >>>  	i40e_arm_wb(tx_ring, vsi, budget);
> >>>  	if (ring_is_xdp(tx_ring))
> >>>-		return !!budget;
> >>>+		return total_packets;
> >>>  	/* notify netdev of completed buffers */
> >>>  	netdev_tx_completed_queue(txring_txq(tx_ring),
> >>>@@ -1048,7 +1048,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
> >>>  		}
> >>>  	}
> >>>-	return !!budget;
> >>>+	return total_packets;
> >>>  }
> >>>  /**
> >>>@@ -2689,10 +2689,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
> >>>  			       container_of(napi, struct i40e_q_vector, napi);
> >>>  	struct i40e_vsi *vsi = q_vector->vsi;
> >>>  	struct i40e_ring *ring;
> >>>+	bool tx_clean_complete = true;
> >>>  	bool clean_complete = true;
> >>>  	bool arm_wb = false;
> >>>  	int budget_per_ring;
> >>>  	int work_done = 0;
> >>>+	int tx_wd = 0;
> >>>  	if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
> >>>  		napi_complete(napi);
> >>>@@ -2703,12 +2705,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
> >>>  	 * budget and be more aggressive about cleaning up the Tx descriptors.
> >>>  	 */
> >>>  	i40e_for_each_ring(ring, q_vector->tx) {
> >>>-		bool wd = ring->xsk_pool ?
> >>>-			  i40e_clean_xdp_tx_irq(vsi, ring) :
> >>>-			  i40e_clean_tx_irq(vsi, ring, budget);
> >>>+		tx_wd = ring->xsk_pool ?
> >>>+			i40e_clean_xdp_tx_irq(vsi, ring) :
> >>>+			i40e_clean_tx_irq(vsi, ring, budget);
> >>>-		if (!wd) {
> >>>-			clean_complete = false;
> >>>+		if (tx_wd >= budget) {
> >>>+			tx_clean_complete = false;
> >>
> >>This will break for AF_XDP Tx ZC. AF_XDP Tx ZC in intel drivers ignores
> >>budget given by NAPI. If you look at i40e_xmit_zc():
> >>
> >>func def:
> >>static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
> >>
> >>callsite:
> >>	return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring));
> >>
> >>we give free ring space as a budget and with your change we would be
> >>returning the amount of processed tx descriptors which you will be
> >>comparing against NAPI budget (64, unless you have busy poll enabled with
> >>a different batch size). Say you start with empty ring and your HW rings
> >>are sized to 1k but there was only 512 AF_XDP descriptors ready for Tx.
> >>You produced all of them successfully to ring and you return 512 up to
> >>i40e_napi_poll.
> >
> >Good point, my bad.
> >
> >I've reworked this for the v2 and have given i40e_clean_tx_irq,
> >and i40e_clean_xdp_tx_irq an out parameter which will record the number
> >TXes cleaned.
> >
> >I tweaked i40e_xmit_zc to return the number of packets (nb_pkts) and moved
> >the boolean to check if that's under the "budget"
> >(I40E_DESC_UNUSED(tx_ring)) into i40e_clean_xdp_tx_irq.
> >
> >I think that might solve the issues you've described.
> 
> Please don't change the flow of this function, transmit clean ups are so
> cheap that we don't bother counting them or limiting them beyond a maximum
> (so they don't clean forever)
> 
> Basically transmits should not be counted when exiting NAPI, besides that we
> did "at least one". The only thing that matters to the budget is that we
> "finished" transmit cleanup or not, which would make sure we rescheduled
> napi if we weren't finished cleaning (for instance on a 8160 entry tx ring)
> transmits.
> 
> I'd much rather you kept this series to a simple return count of tx cleaned
> in "out" as you've said you'd do in v2, and then use that data *only* in the
> context of the new trace event.
> 
> That way you're not changing the flow and introducing tough to debug issues
> in the hot path.

In the v2 I've been hacking on I've added out params to i40e_clean_tx_irq and
i40e_clean_xdp_tx_irq, but I avoided adding an out param in i40e_xmit_zc,
since lifting the boolean out seemed pretty straightforward.

I'll drop that though in favor of an out param in i40e_xmit_zc, as well, to
avoid changing the flow of the code.

Thanks for taking a look.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index b97c95f..ed88309 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -924,10 +924,10 @@  void i40e_detect_recover_hung(struct i40e_vsi *vsi)
  * @tx_ring: Tx ring to clean
  * @napi_budget: Used to determine if we are in netpoll
  *
- * Returns true if there's any budget left (e.g. the clean is finished)
+ * Returns the number of packets cleaned
  **/
-static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
-			      struct i40e_ring *tx_ring, int napi_budget)
+static int i40e_clean_tx_irq(struct i40e_vsi *vsi,
+			     struct i40e_ring *tx_ring, int napi_budget)
 {
 	int i = tx_ring->next_to_clean;
 	struct i40e_tx_buffer *tx_buf;
@@ -1026,7 +1026,7 @@  static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 	i40e_arm_wb(tx_ring, vsi, budget);
 
 	if (ring_is_xdp(tx_ring))
-		return !!budget;
+		return total_packets;
 
 	/* notify netdev of completed buffers */
 	netdev_tx_completed_queue(txring_txq(tx_ring),
@@ -1048,7 +1048,7 @@  static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 		}
 	}
 
-	return !!budget;
+	return total_packets;
 }
 
 /**
@@ -2689,10 +2689,12 @@  int i40e_napi_poll(struct napi_struct *napi, int budget)
 			       container_of(napi, struct i40e_q_vector, napi);
 	struct i40e_vsi *vsi = q_vector->vsi;
 	struct i40e_ring *ring;
+	bool tx_clean_complete = true;
 	bool clean_complete = true;
 	bool arm_wb = false;
 	int budget_per_ring;
 	int work_done = 0;
+	int tx_wd = 0;
 
 	if (test_bit(__I40E_VSI_DOWN, vsi->state)) {
 		napi_complete(napi);
@@ -2703,12 +2705,12 @@  int i40e_napi_poll(struct napi_struct *napi, int budget)
 	 * budget and be more aggressive about cleaning up the Tx descriptors.
 	 */
 	i40e_for_each_ring(ring, q_vector->tx) {
-		bool wd = ring->xsk_pool ?
-			  i40e_clean_xdp_tx_irq(vsi, ring) :
-			  i40e_clean_tx_irq(vsi, ring, budget);
+		tx_wd = ring->xsk_pool ?
+			i40e_clean_xdp_tx_irq(vsi, ring) :
+			i40e_clean_tx_irq(vsi, ring, budget);
 
-		if (!wd) {
-			clean_complete = false;
+		if (tx_wd >= budget) {
+			tx_clean_complete = false;
 			continue;
 		}
 		arm_wb |= ring->arm_wb;
@@ -2742,7 +2744,7 @@  int i40e_napi_poll(struct napi_struct *napi, int budget)
 	}
 
 	/* If work not completed, return budget and polling will return */
-	if (!clean_complete) {
+	if (!clean_complete || !tx_clean_complete) {
 		int cpu_id = smp_processor_id();
 
 		/* It is possible that the interrupt affinity has changed but,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 790aaeff..925682c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -531,9 +531,9 @@  static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
  * @xdp_ring: XDP Tx ring
  * @budget: NAPI budget
  *
- * Returns true if the work is finished.
+ * Returns number of packets cleaned
  **/
-static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
+static int i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
 {
 	struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs;
 	u32 nb_pkts, nb_processed = 0;
@@ -541,7 +541,7 @@  static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
 
 	nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget);
 	if (!nb_pkts)
-		return true;
+		return 0;
 
 	if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
 		nb_processed = xdp_ring->count - xdp_ring->next_to_use;
@@ -558,7 +558,7 @@  static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
 
 	i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);
 
-	return nb_pkts < budget;
+	return nb_pkts;
 }
 
 /**
@@ -582,9 +582,9 @@  static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring,
  * @vsi: Current VSI
  * @tx_ring: XDP Tx ring
  *
- * Returns true if cleanup/tranmission is done.
+ * Returns number of packets cleaned
  **/
-bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
+int i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring)
 {
 	struct xsk_buff_pool *bp = tx_ring->xsk_pool;
 	u32 i, completed_frames, xsk_frames = 0;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
index 821df24..4e810c2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -30,7 +30,7 @@  int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool,
 bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
 int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
 
-bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
+int i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring);
 int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
 int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc);
 void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);