Message ID | 1665004913-25656-3-git-send-email-jdamato@fastly.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | i40e: Add an i40e_napi_poll tracepoint | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Guessing tree name failed - patch did not apply |
On 10/5/2022 4:21 PM, Joe Damato wrote: > Update i40e_clean_tx_irq to take an out parameter (tx_cleaned) which stores > the number TXs cleaned. > > Likewise, update i40e_clean_xdp_tx_irq and i40e_xmit_zc to do the same. > > Care has been taken to avoid changing the control flow of any functions > involved. > > Signed-off-by: Joe Damato <jdamato@fastly.com> > --- > drivers/net/ethernet/intel/i40e/i40e_txrx.c | 16 +++++++++++----- > drivers/net/ethernet/intel/i40e/i40e_xsk.c | 15 +++++++++++---- > drivers/net/ethernet/intel/i40e/i40e_xsk.h | 3 ++- > 3 files changed, 24 insertions(+), 10 deletions(-) > > diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c > index b97c95f..a2cc98e 100644 > --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c > +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c > @@ -923,11 +923,13 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi) > * @vsi: the VSI we care about > * @tx_ring: Tx ring to clean > * @napi_budget: Used to determine if we are in netpoll > + * @tx_cleaned: Out parameter set to the number of TXes cleaned > * > * Returns true if there's any budget left (e.g. the clean is finished) > **/ > static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, > - struct i40e_ring *tx_ring, int napi_budget) > + struct i40e_ring *tx_ring, int napi_budget, > + unsigned int *tx_cleaned) > { > int i = tx_ring->next_to_clean; > struct i40e_tx_buffer *tx_buf; > @@ -1026,7 +1028,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, > i40e_arm_wb(tx_ring, vsi, budget); > > if (ring_is_xdp(tx_ring)) > - return !!budget; > + goto out; > > /* notify netdev of completed buffers */ > netdev_tx_completed_queue(txring_txq(tx_ring), > @@ -1048,6 +1050,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, > } > } > > +out: > + *tx_cleaned = total_packets; > return !!budget; > } > > @@ -2689,10 +2693,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) > container_of(napi, struct i40e_q_vector, napi); > struct i40e_vsi *vsi = q_vector->vsi; > struct i40e_ring *ring; > + bool tx_clean_complete = true; > bool clean_complete = true; > bool arm_wb = false; > int budget_per_ring; > int work_done = 0; > + unsigned int tx_cleaned = 0; > > if (test_bit(__I40E_VSI_DOWN, vsi->state)) { > napi_complete(napi); > @@ -2704,11 +2710,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) > */ > i40e_for_each_ring(ring, q_vector->tx) { > bool wd = ring->xsk_pool ? > - i40e_clean_xdp_tx_irq(vsi, ring) : > - i40e_clean_tx_irq(vsi, ring, budget); > + i40e_clean_xdp_tx_irq(vsi, ring, &tx_cleaned) : > + i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned); > > if (!wd) { > - clean_complete = false; > + clean_complete = tx_clean_complete = false; > continue; > } > arm_wb |= ring->arm_wb; > diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c > index 790aaeff..f98ce7e4 100644 > --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c > +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c > @@ -530,18 +530,22 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring) > * i40e_xmit_zc - Performs zero-copy Tx AF_XDP > * @xdp_ring: XDP Tx ring > * @budget: NAPI budget > + * @tx_cleaned: Out parameter of the TX packets processed > * > * Returns true if the work is finished. > **/ > -static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) > +static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget, > + unsigned int *tx_cleaned) > { > struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; > u32 nb_pkts, nb_processed = 0; > unsigned int total_bytes = 0; > > nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); > - if (!nb_pkts) > + if (!nb_pkts) { > + *tx_cleaned = 0; > return true; > + } > > if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { > nb_processed = xdp_ring->count - xdp_ring->next_to_use; > @@ -558,6 +562,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) > > i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes); > > + *tx_cleaned = nb_pkts; With XDP, I don't think we should count these as tx_cleaned packets. These are transmitted packets. The tx_cleaned would be the xsk_frames counter in i40e_clean_xdp_tx_irq May be we need 2 counters for xdp. > return nb_pkts < budget; > } > > @@ -581,10 +586,12 @@ static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring, > * i40e_clean_xdp_tx_irq - Completes AF_XDP entries, and cleans XDP entries > * @vsi: Current VSI > * @tx_ring: XDP Tx ring > + * @tx_cleaned: out parameter of number of TXes cleaned > * > * Returns true if cleanup/tranmission is done. > **/ > -bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring) > +bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring, > + unsigned int *tx_cleaned) > { > struct xsk_buff_pool *bp = tx_ring->xsk_pool; > u32 i, completed_frames, xsk_frames = 0; > @@ -634,7 +641,7 @@ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring) > if (xsk_uses_need_wakeup(tx_ring->xsk_pool)) > xsk_set_tx_need_wakeup(tx_ring->xsk_pool); > > - return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring)); > + return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring), tx_cleaned); > } > > /** > diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h > index 821df24..396ed11 100644 > --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h > +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h > @@ -30,7 +30,8 @@ int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool, > bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count); > int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget); > > -bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring); > +bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring, > + unsigned int *tx_cleaned); > int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); > int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc); > void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
On Wed, Oct 05, 2022 at 07:16:56PM -0500, Samudrala, Sridhar wrote: > On 10/5/2022 4:21 PM, Joe Damato wrote: > >Update i40e_clean_tx_irq to take an out parameter (tx_cleaned) which stores > >the number TXs cleaned. > > > >Likewise, update i40e_clean_xdp_tx_irq and i40e_xmit_zc to do the same. > > > >Care has been taken to avoid changing the control flow of any functions > >involved. > > > >Signed-off-by: Joe Damato <jdamato@fastly.com> > >--- > > drivers/net/ethernet/intel/i40e/i40e_txrx.c | 16 +++++++++++----- > > drivers/net/ethernet/intel/i40e/i40e_xsk.c | 15 +++++++++++---- > > drivers/net/ethernet/intel/i40e/i40e_xsk.h | 3 ++- > > 3 files changed, 24 insertions(+), 10 deletions(-) > > > >diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c > >index b97c95f..a2cc98e 100644 > >--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c > >+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c > >@@ -923,11 +923,13 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi) > > * @vsi: the VSI we care about > > * @tx_ring: Tx ring to clean > > * @napi_budget: Used to determine if we are in netpoll > >+ * @tx_cleaned: Out parameter set to the number of TXes cleaned > > * > > * Returns true if there's any budget left (e.g. the clean is finished) > > **/ > > static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, > >- struct i40e_ring *tx_ring, int napi_budget) > >+ struct i40e_ring *tx_ring, int napi_budget, > >+ unsigned int *tx_cleaned) > > { > > int i = tx_ring->next_to_clean; > > struct i40e_tx_buffer *tx_buf; > >@@ -1026,7 +1028,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, > > i40e_arm_wb(tx_ring, vsi, budget); > > if (ring_is_xdp(tx_ring)) > >- return !!budget; > >+ goto out; > > /* notify netdev of completed buffers */ > > netdev_tx_completed_queue(txring_txq(tx_ring), > >@@ -1048,6 +1050,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, > > } > > } > >+out: > >+ *tx_cleaned = total_packets; > > return !!budget; > > } > >@@ -2689,10 +2693,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) > > container_of(napi, struct i40e_q_vector, napi); > > struct i40e_vsi *vsi = q_vector->vsi; > > struct i40e_ring *ring; > >+ bool tx_clean_complete = true; > > bool clean_complete = true; > > bool arm_wb = false; > > int budget_per_ring; > > int work_done = 0; > >+ unsigned int tx_cleaned = 0; > > if (test_bit(__I40E_VSI_DOWN, vsi->state)) { > > napi_complete(napi); > >@@ -2704,11 +2710,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) > > */ > > i40e_for_each_ring(ring, q_vector->tx) { > > bool wd = ring->xsk_pool ? > >- i40e_clean_xdp_tx_irq(vsi, ring) : > >- i40e_clean_tx_irq(vsi, ring, budget); > >+ i40e_clean_xdp_tx_irq(vsi, ring, &tx_cleaned) : > >+ i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned); > > if (!wd) { > >- clean_complete = false; > >+ clean_complete = tx_clean_complete = false; > > continue; > > } > > arm_wb |= ring->arm_wb; > >diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c > >index 790aaeff..f98ce7e4 100644 > >--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c > >+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c > >@@ -530,18 +530,22 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring) > > * i40e_xmit_zc - Performs zero-copy Tx AF_XDP > > * @xdp_ring: XDP Tx ring > > * @budget: NAPI budget > >+ * @tx_cleaned: Out parameter of the TX packets processed > > * > > * Returns true if the work is finished. > > **/ > >-static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) > >+static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget, > >+ unsigned int *tx_cleaned) > > { > > struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; > > u32 nb_pkts, nb_processed = 0; > > unsigned int total_bytes = 0; > > nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); > >- if (!nb_pkts) > >+ if (!nb_pkts) { > >+ *tx_cleaned = 0; > > return true; > >+ } > > if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { > > nb_processed = xdp_ring->count - xdp_ring->next_to_use; > >@@ -558,6 +562,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) > > i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes); > >+ *tx_cleaned = nb_pkts; > > With XDP, I don't think we should count these as tx_cleaned packets. These are transmitted > packets. The tx_cleaned would be the xsk_frames counter in i40e_clean_xdp_tx_irq > May be we need 2 counters for xdp. I think there's two issues you are describing, which are separate in my mind. 1.) The name "tx_cleaned", and 2.) Whether nb_pkts is the right thing to write as the out param. For #1: I'm OK to change the name if that's the blocker here; please suggest a suitable alternative that you'll accept. For #2: nb_pkts is, IMO, the right value to bubble up to the tracepoint because nb_pkts affects clean_complete in i40e_napi_poll which in turn determines whether or not polling mode is entered. The purpose of the tracepoint is to determine when/why/how you are entering polling mode, so if nb_pkts plays a role in that calculation, it's the right number to output. > > return nb_pkts < budget; > > } > >@@ -581,10 +586,12 @@ static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring, > > * i40e_clean_xdp_tx_irq - Completes AF_XDP entries, and cleans XDP entries > > * @vsi: Current VSI > > * @tx_ring: XDP Tx ring > >+ * @tx_cleaned: out parameter of number of TXes cleaned > > * > > * Returns true if cleanup/tranmission is done. > > **/ > >-bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring) > >+bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring, > >+ unsigned int *tx_cleaned) > > { > > struct xsk_buff_pool *bp = tx_ring->xsk_pool; > > u32 i, completed_frames, xsk_frames = 0; > >@@ -634,7 +641,7 @@ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring) > > if (xsk_uses_need_wakeup(tx_ring->xsk_pool)) > > xsk_set_tx_need_wakeup(tx_ring->xsk_pool); > >- return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring)); > >+ return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring), tx_cleaned); > > } > > /** > >diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h > >index 821df24..396ed11 100644 > >--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h > >+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h > >@@ -30,7 +30,8 @@ int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool, > > bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count); > > int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget); > >-bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring); > >+bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring, > >+ unsigned int *tx_cleaned); > > int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); > > int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc); > > void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring); >
On Wed, Oct 05, 2022 at 05:31:04PM -0700, Joe Damato wrote: > On Wed, Oct 05, 2022 at 07:16:56PM -0500, Samudrala, Sridhar wrote: > > On 10/5/2022 4:21 PM, Joe Damato wrote: > > >Update i40e_clean_tx_irq to take an out parameter (tx_cleaned) which stores > > >the number TXs cleaned. > > > > > >Likewise, update i40e_clean_xdp_tx_irq and i40e_xmit_zc to do the same. > > > > > >Care has been taken to avoid changing the control flow of any functions > > >involved. > > > > > >Signed-off-by: Joe Damato <jdamato@fastly.com> > > >--- > > > drivers/net/ethernet/intel/i40e/i40e_txrx.c | 16 +++++++++++----- > > > drivers/net/ethernet/intel/i40e/i40e_xsk.c | 15 +++++++++++---- > > > drivers/net/ethernet/intel/i40e/i40e_xsk.h | 3 ++- > > > 3 files changed, 24 insertions(+), 10 deletions(-) > > > > > >diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c > > >index b97c95f..a2cc98e 100644 > > >--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c > > >+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c > > >@@ -923,11 +923,13 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi) > > > * @vsi: the VSI we care about > > > * @tx_ring: Tx ring to clean > > > * @napi_budget: Used to determine if we are in netpoll > > >+ * @tx_cleaned: Out parameter set to the number of TXes cleaned > > > * > > > * Returns true if there's any budget left (e.g. the clean is finished) > > > **/ > > > static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, > > >- struct i40e_ring *tx_ring, int napi_budget) > > >+ struct i40e_ring *tx_ring, int napi_budget, > > >+ unsigned int *tx_cleaned) > > > { > > > int i = tx_ring->next_to_clean; > > > struct i40e_tx_buffer *tx_buf; > > >@@ -1026,7 +1028,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, > > > i40e_arm_wb(tx_ring, vsi, budget); > > > if (ring_is_xdp(tx_ring)) > > >- return !!budget; > > >+ goto out; > > > /* notify netdev of completed buffers */ > > > netdev_tx_completed_queue(txring_txq(tx_ring), > > >@@ -1048,6 +1050,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, > > > } > > > } > > >+out: > > >+ *tx_cleaned = total_packets; > > > return !!budget; > > > } > > >@@ -2689,10 +2693,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) > > > container_of(napi, struct i40e_q_vector, napi); > > > struct i40e_vsi *vsi = q_vector->vsi; > > > struct i40e_ring *ring; > > >+ bool tx_clean_complete = true; > > > bool clean_complete = true; > > > bool arm_wb = false; > > > int budget_per_ring; > > > int work_done = 0; > > >+ unsigned int tx_cleaned = 0; > > > if (test_bit(__I40E_VSI_DOWN, vsi->state)) { > > > napi_complete(napi); > > >@@ -2704,11 +2710,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) > > > */ > > > i40e_for_each_ring(ring, q_vector->tx) { > > > bool wd = ring->xsk_pool ? > > >- i40e_clean_xdp_tx_irq(vsi, ring) : > > >- i40e_clean_tx_irq(vsi, ring, budget); > > >+ i40e_clean_xdp_tx_irq(vsi, ring, &tx_cleaned) : > > >+ i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned); > > > if (!wd) { > > >- clean_complete = false; > > >+ clean_complete = tx_clean_complete = false; > > > continue; > > > } > > > arm_wb |= ring->arm_wb; > > >diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c > > >index 790aaeff..f98ce7e4 100644 > > >--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c > > >+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c > > >@@ -530,18 +530,22 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring) > > > * i40e_xmit_zc - Performs zero-copy Tx AF_XDP > > > * @xdp_ring: XDP Tx ring > > > * @budget: NAPI budget > > >+ * @tx_cleaned: Out parameter of the TX packets processed > > > * > > > * Returns true if the work is finished. > > > **/ > > >-static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) > > >+static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget, > > >+ unsigned int *tx_cleaned) > > > { > > > struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; > > > u32 nb_pkts, nb_processed = 0; > > > unsigned int total_bytes = 0; > > > nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); > > >- if (!nb_pkts) > > >+ if (!nb_pkts) { > > >+ *tx_cleaned = 0; > > > return true; > > >+ } > > > if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { > > > nb_processed = xdp_ring->count - xdp_ring->next_to_use; > > >@@ -558,6 +562,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) > > > i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes); > > >+ *tx_cleaned = nb_pkts; > > > > With XDP, I don't think we should count these as tx_cleaned packets. These are transmitted > > packets. The tx_cleaned would be the xsk_frames counter in i40e_clean_xdp_tx_irq > > May be we need 2 counters for xdp. > > I think there's two issues you are describing, which are separate in my > mind. > > 1.) The name "tx_cleaned", and > 2.) Whether nb_pkts is the right thing to write as the out param. > > For #1: I'm OK to change the name if that's the blocker here; please > suggest a suitable alternative that you'll accept. > > For #2: nb_pkts is, IMO, the right value to bubble up to the tracepoint because > nb_pkts affects clean_complete in i40e_napi_poll which in turn determines > whether or not polling mode is entered. > > The purpose of the tracepoint is to determine when/why/how you are entering > polling mode, so if nb_pkts plays a role in that calculation, it's the > right number to output. I suppose the alternative is to only fire the tracepoint when *not* in XDP. Then the changes to the XDP stuff can be dropped and a separate set of tracepoints for XDP can be created in the future. That might reduce the complexity a bit, and will probably still be pretty useful for people tuning their non-XDP workloads.
On Wed, Oct 05, 2022 at 06:00:24PM -0700, Joe Damato wrote: > On Wed, Oct 05, 2022 at 05:31:04PM -0700, Joe Damato wrote: > > On Wed, Oct 05, 2022 at 07:16:56PM -0500, Samudrala, Sridhar wrote: > > > On 10/5/2022 4:21 PM, Joe Damato wrote: > > > >Update i40e_clean_tx_irq to take an out parameter (tx_cleaned) which stores > > > >the number TXs cleaned. > > > > > > > >Likewise, update i40e_clean_xdp_tx_irq and i40e_xmit_zc to do the same. > > > > > > > >Care has been taken to avoid changing the control flow of any functions > > > >involved. > > > > > > > >Signed-off-by: Joe Damato <jdamato@fastly.com> > > > >--- > > > > drivers/net/ethernet/intel/i40e/i40e_txrx.c | 16 +++++++++++----- > > > > drivers/net/ethernet/intel/i40e/i40e_xsk.c | 15 +++++++++++---- > > > > drivers/net/ethernet/intel/i40e/i40e_xsk.h | 3 ++- > > > > 3 files changed, 24 insertions(+), 10 deletions(-) > > > > > > > >diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c > > > >index b97c95f..a2cc98e 100644 > > > >--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c > > > >+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c > > > >@@ -923,11 +923,13 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi) > > > > * @vsi: the VSI we care about > > > > * @tx_ring: Tx ring to clean > > > > * @napi_budget: Used to determine if we are in netpoll > > > >+ * @tx_cleaned: Out parameter set to the number of TXes cleaned > > > > * > > > > * Returns true if there's any budget left (e.g. the clean is finished) > > > > **/ > > > > static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, > > > >- struct i40e_ring *tx_ring, int napi_budget) > > > >+ struct i40e_ring *tx_ring, int napi_budget, > > > >+ unsigned int *tx_cleaned) > > > > { > > > > int i = tx_ring->next_to_clean; > > > > struct i40e_tx_buffer *tx_buf; > > > >@@ -1026,7 +1028,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, > > > > i40e_arm_wb(tx_ring, vsi, budget); > > > > if (ring_is_xdp(tx_ring)) > > > >- return !!budget; > > > >+ goto out; > > > > /* notify netdev of completed buffers */ > > > > netdev_tx_completed_queue(txring_txq(tx_ring), > > > >@@ -1048,6 +1050,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, > > > > } > > > > } > > > >+out: > > > >+ *tx_cleaned = total_packets; > > > > return !!budget; > > > > } > > > >@@ -2689,10 +2693,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) > > > > container_of(napi, struct i40e_q_vector, napi); > > > > struct i40e_vsi *vsi = q_vector->vsi; > > > > struct i40e_ring *ring; > > > >+ bool tx_clean_complete = true; > > > > bool clean_complete = true; > > > > bool arm_wb = false; > > > > int budget_per_ring; > > > > int work_done = 0; > > > >+ unsigned int tx_cleaned = 0; > > > > if (test_bit(__I40E_VSI_DOWN, vsi->state)) { > > > > napi_complete(napi); > > > >@@ -2704,11 +2710,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) > > > > */ > > > > i40e_for_each_ring(ring, q_vector->tx) { > > > > bool wd = ring->xsk_pool ? > > > >- i40e_clean_xdp_tx_irq(vsi, ring) : > > > >- i40e_clean_tx_irq(vsi, ring, budget); > > > >+ i40e_clean_xdp_tx_irq(vsi, ring, &tx_cleaned) : > > > >+ i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned); > > > > if (!wd) { > > > >- clean_complete = false; > > > >+ clean_complete = tx_clean_complete = false; > > > > continue; > > > > } > > > > arm_wb |= ring->arm_wb; > > > >diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c > > > >index 790aaeff..f98ce7e4 100644 > > > >--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c > > > >+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c > > > >@@ -530,18 +530,22 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring) > > > > * i40e_xmit_zc - Performs zero-copy Tx AF_XDP > > > > * @xdp_ring: XDP Tx ring > > > > * @budget: NAPI budget > > > >+ * @tx_cleaned: Out parameter of the TX packets processed > > > > * > > > > * Returns true if the work is finished. > > > > **/ > > > >-static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) > > > >+static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget, > > > >+ unsigned int *tx_cleaned) > > > > { > > > > struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; > > > > u32 nb_pkts, nb_processed = 0; > > > > unsigned int total_bytes = 0; > > > > nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); > > > >- if (!nb_pkts) > > > >+ if (!nb_pkts) { > > > >+ *tx_cleaned = 0; > > > > return true; > > > >+ } > > > > if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { > > > > nb_processed = xdp_ring->count - xdp_ring->next_to_use; > > > >@@ -558,6 +562,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) > > > > i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes); > > > >+ *tx_cleaned = nb_pkts; > > > > > > With XDP, I don't think we should count these as tx_cleaned packets. These are transmitted > > > packets. The tx_cleaned would be the xsk_frames counter in i40e_clean_xdp_tx_irq > > > May be we need 2 counters for xdp. > > > > I think there's two issues you are describing, which are separate in my > > mind. > > > > 1.) The name "tx_cleaned", and > > 2.) Whether nb_pkts is the right thing to write as the out param. > > > > For #1: I'm OK to change the name if that's the blocker here; please > > suggest a suitable alternative that you'll accept. > > > > For #2: nb_pkts is, IMO, the right value to bubble up to the tracepoint because > > nb_pkts affects clean_complete in i40e_napi_poll which in turn determines > > whether or not polling mode is entered. > > > > The purpose of the tracepoint is to determine when/why/how you are entering > > polling mode, so if nb_pkts plays a role in that calculation, it's the > > right number to output. > > I suppose the alternative is to only fire the tracepoint when *not* in XDP. > Then the changes to the XDP stuff can be dropped and a separate set of > tracepoints for XDP can be created in the future. Let's be clear that it's the AF_XDP quirk that we have in here that actual xmit happens within NAPI polling routine. Sridhar is right with having xsk_frames as tx_cleaned but you're also right that nb_pkts affects napi polling. But then if you look at Rx side there is an analogous case with buffer allocation affecting napi polling. > > That might reduce the complexity a bit, and will probably still be pretty > useful for people tuning their non-XDP workloads.
On 10/6/2022 8:03 AM, Maciej Fijalkowski wrote: > On Wed, Oct 05, 2022 at 06:00:24PM -0700, Joe Damato wrote: >> On Wed, Oct 05, 2022 at 05:31:04PM -0700, Joe Damato wrote: >>> On Wed, Oct 05, 2022 at 07:16:56PM -0500, Samudrala, Sridhar wrote: >>>> On 10/5/2022 4:21 PM, Joe Damato wrote: >>>>> Update i40e_clean_tx_irq to take an out parameter (tx_cleaned) which stores >>>>> the number TXs cleaned. >>>>> >>>>> Likewise, update i40e_clean_xdp_tx_irq and i40e_xmit_zc to do the same. >>>>> >>>>> Care has been taken to avoid changing the control flow of any functions >>>>> involved. >>>>> >>>>> Signed-off-by: Joe Damato <jdamato@fastly.com> >>>>> --- >>>>> drivers/net/ethernet/intel/i40e/i40e_txrx.c | 16 +++++++++++----- >>>>> drivers/net/ethernet/intel/i40e/i40e_xsk.c | 15 +++++++++++---- >>>>> drivers/net/ethernet/intel/i40e/i40e_xsk.h | 3 ++- >>>>> 3 files changed, 24 insertions(+), 10 deletions(-) >>>>> >>>>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c >>>>> index b97c95f..a2cc98e 100644 >>>>> --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c >>>>> +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c >>>>> @@ -923,11 +923,13 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi) >>>>> * @vsi: the VSI we care about >>>>> * @tx_ring: Tx ring to clean >>>>> * @napi_budget: Used to determine if we are in netpoll >>>>> + * @tx_cleaned: Out parameter set to the number of TXes cleaned >>>>> * >>>>> * Returns true if there's any budget left (e.g. the clean is finished) >>>>> **/ >>>>> static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, >>>>> - struct i40e_ring *tx_ring, int napi_budget) >>>>> + struct i40e_ring *tx_ring, int napi_budget, >>>>> + unsigned int *tx_cleaned) >>>>> { >>>>> int i = tx_ring->next_to_clean; >>>>> struct i40e_tx_buffer *tx_buf; >>>>> @@ -1026,7 +1028,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, >>>>> i40e_arm_wb(tx_ring, vsi, budget); >>>>> if (ring_is_xdp(tx_ring)) >>>>> - return !!budget; >>>>> + goto out; >>>>> /* notify netdev of completed buffers */ >>>>> netdev_tx_completed_queue(txring_txq(tx_ring), >>>>> @@ -1048,6 +1050,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, >>>>> } >>>>> } >>>>> +out: >>>>> + *tx_cleaned = total_packets; >>>>> return !!budget; >>>>> } >>>>> @@ -2689,10 +2693,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) >>>>> container_of(napi, struct i40e_q_vector, napi); >>>>> struct i40e_vsi *vsi = q_vector->vsi; >>>>> struct i40e_ring *ring; >>>>> + bool tx_clean_complete = true; >>>>> bool clean_complete = true; >>>>> bool arm_wb = false; >>>>> int budget_per_ring; >>>>> int work_done = 0; >>>>> + unsigned int tx_cleaned = 0; >>>>> if (test_bit(__I40E_VSI_DOWN, vsi->state)) { >>>>> napi_complete(napi); >>>>> @@ -2704,11 +2710,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) >>>>> */ >>>>> i40e_for_each_ring(ring, q_vector->tx) { >>>>> bool wd = ring->xsk_pool ? >>>>> - i40e_clean_xdp_tx_irq(vsi, ring) : >>>>> - i40e_clean_tx_irq(vsi, ring, budget); >>>>> + i40e_clean_xdp_tx_irq(vsi, ring, &tx_cleaned) : >>>>> + i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned); >>>>> if (!wd) { >>>>> - clean_complete = false; >>>>> + clean_complete = tx_clean_complete = false; >>>>> continue; >>>>> } >>>>> arm_wb |= ring->arm_wb; >>>>> diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c >>>>> index 790aaeff..f98ce7e4 100644 >>>>> --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c >>>>> +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c >>>>> @@ -530,18 +530,22 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring) >>>>> * i40e_xmit_zc - Performs zero-copy Tx AF_XDP >>>>> * @xdp_ring: XDP Tx ring >>>>> * @budget: NAPI budget >>>>> + * @tx_cleaned: Out parameter of the TX packets processed >>>>> * >>>>> * Returns true if the work is finished. >>>>> **/ >>>>> -static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) >>>>> +static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget, >>>>> + unsigned int *tx_cleaned) >>>>> { >>>>> struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; >>>>> u32 nb_pkts, nb_processed = 0; >>>>> unsigned int total_bytes = 0; >>>>> nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); >>>>> - if (!nb_pkts) >>>>> + if (!nb_pkts) { >>>>> + *tx_cleaned = 0; >>>>> return true; >>>>> + } >>>>> if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { >>>>> nb_processed = xdp_ring->count - xdp_ring->next_to_use; >>>>> @@ -558,6 +562,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) >>>>> i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes); >>>>> + *tx_cleaned = nb_pkts; >>>> With XDP, I don't think we should count these as tx_cleaned packets. These are transmitted >>>> packets. The tx_cleaned would be the xsk_frames counter in i40e_clean_xdp_tx_irq >>>> May be we need 2 counters for xdp. >>> I think there's two issues you are describing, which are separate in my >>> mind. >>> >>> 1.) The name "tx_cleaned", and >>> 2.) Whether nb_pkts is the right thing to write as the out param. >>> >>> For #1: I'm OK to change the name if that's the blocker here; please >>> suggest a suitable alternative that you'll accept. >>> >>> For #2: nb_pkts is, IMO, the right value to bubble up to the tracepoint because >>> nb_pkts affects clean_complete in i40e_napi_poll which in turn determines >>> whether or not polling mode is entered. >>> >>> The purpose of the tracepoint is to determine when/why/how you are entering >>> polling mode, so if nb_pkts plays a role in that calculation, it's the >>> right number to output. >> I suppose the alternative is to only fire the tracepoint when *not* in XDP. >> Then the changes to the XDP stuff can be dropped and a separate set of >> tracepoints for XDP can be created in the future. > Let's be clear that it's the AF_XDP quirk that we have in here that actual > xmit happens within NAPI polling routine. > > Sridhar is right with having xsk_frames as tx_cleaned but you're also > right that nb_pkts affects napi polling. But then if you look at Rx side > there is an analogous case with buffer allocation affecting napi polling. To be correct, I would suggest 2 out parameters to i40e_clean_xdp_tx_irq() tx_cleaned and xdp_transmitted. tx_cleaned should be filled in with xsk_frames. Add xdp_transmitted as an out parameter to i40e_xmit_zc() and fill it with nb_pkts. I am not completely clear on the reasoning behind setting clean_complete based on number of packets transmitted in case of XDP. > >> That might reduce the complexity a bit, and will probably still be pretty >> useful for people tuning their non-XDP workloads. This option is fine too.
On Thu, Oct 06, 2022 at 09:57:19AM -0500, Samudrala, Sridhar wrote: > On 10/6/2022 8:03 AM, Maciej Fijalkowski wrote: > >On Wed, Oct 05, 2022 at 06:00:24PM -0700, Joe Damato wrote: > >>On Wed, Oct 05, 2022 at 05:31:04PM -0700, Joe Damato wrote: > >>>On Wed, Oct 05, 2022 at 07:16:56PM -0500, Samudrala, Sridhar wrote: > >>>>On 10/5/2022 4:21 PM, Joe Damato wrote: > >>>>>Update i40e_clean_tx_irq to take an out parameter (tx_cleaned) which stores > >>>>>the number TXs cleaned. > >>>>> > >>>>>Likewise, update i40e_clean_xdp_tx_irq and i40e_xmit_zc to do the same. > >>>>> > >>>>>Care has been taken to avoid changing the control flow of any functions > >>>>>involved. > >>>>> > >>>>>Signed-off-by: Joe Damato <jdamato@fastly.com> > >>>>>--- > >>>>> drivers/net/ethernet/intel/i40e/i40e_txrx.c | 16 +++++++++++----- > >>>>> drivers/net/ethernet/intel/i40e/i40e_xsk.c | 15 +++++++++++---- > >>>>> drivers/net/ethernet/intel/i40e/i40e_xsk.h | 3 ++- > >>>>> 3 files changed, 24 insertions(+), 10 deletions(-) > >>>>> > >>>>>diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c > >>>>>index b97c95f..a2cc98e 100644 > >>>>>--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c > >>>>>+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c > >>>>>@@ -923,11 +923,13 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi) > >>>>> * @vsi: the VSI we care about > >>>>> * @tx_ring: Tx ring to clean > >>>>> * @napi_budget: Used to determine if we are in netpoll > >>>>>+ * @tx_cleaned: Out parameter set to the number of TXes cleaned > >>>>> * > >>>>> * Returns true if there's any budget left (e.g. the clean is finished) > >>>>> **/ > >>>>> static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, > >>>>>- struct i40e_ring *tx_ring, int napi_budget) > >>>>>+ struct i40e_ring *tx_ring, int napi_budget, > >>>>>+ unsigned int *tx_cleaned) > >>>>> { > >>>>> int i = tx_ring->next_to_clean; > >>>>> struct i40e_tx_buffer *tx_buf; > >>>>>@@ -1026,7 +1028,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, > >>>>> i40e_arm_wb(tx_ring, vsi, budget); > >>>>> if (ring_is_xdp(tx_ring)) > >>>>>- return !!budget; > >>>>>+ goto out; > >>>>> /* notify netdev of completed buffers */ > >>>>> netdev_tx_completed_queue(txring_txq(tx_ring), > >>>>>@@ -1048,6 +1050,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, > >>>>> } > >>>>> } > >>>>>+out: > >>>>>+ *tx_cleaned = total_packets; > >>>>> return !!budget; > >>>>> } > >>>>>@@ -2689,10 +2693,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) > >>>>> container_of(napi, struct i40e_q_vector, napi); > >>>>> struct i40e_vsi *vsi = q_vector->vsi; > >>>>> struct i40e_ring *ring; > >>>>>+ bool tx_clean_complete = true; > >>>>> bool clean_complete = true; > >>>>> bool arm_wb = false; > >>>>> int budget_per_ring; > >>>>> int work_done = 0; > >>>>>+ unsigned int tx_cleaned = 0; > >>>>> if (test_bit(__I40E_VSI_DOWN, vsi->state)) { > >>>>> napi_complete(napi); > >>>>>@@ -2704,11 +2710,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) > >>>>> */ > >>>>> i40e_for_each_ring(ring, q_vector->tx) { > >>>>> bool wd = ring->xsk_pool ? > >>>>>- i40e_clean_xdp_tx_irq(vsi, ring) : > >>>>>- i40e_clean_tx_irq(vsi, ring, budget); > >>>>>+ i40e_clean_xdp_tx_irq(vsi, ring, &tx_cleaned) : > >>>>>+ i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned); > >>>>> if (!wd) { > >>>>>- clean_complete = false; > >>>>>+ clean_complete = tx_clean_complete = false; > >>>>> continue; > >>>>> } > >>>>> arm_wb |= ring->arm_wb; > >>>>>diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c > >>>>>index 790aaeff..f98ce7e4 100644 > >>>>>--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c > >>>>>+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c > >>>>>@@ -530,18 +530,22 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring) > >>>>> * i40e_xmit_zc - Performs zero-copy Tx AF_XDP > >>>>> * @xdp_ring: XDP Tx ring > >>>>> * @budget: NAPI budget > >>>>>+ * @tx_cleaned: Out parameter of the TX packets processed > >>>>> * > >>>>> * Returns true if the work is finished. > >>>>> **/ > >>>>>-static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) > >>>>>+static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget, > >>>>>+ unsigned int *tx_cleaned) > >>>>> { > >>>>> struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; > >>>>> u32 nb_pkts, nb_processed = 0; > >>>>> unsigned int total_bytes = 0; > >>>>> nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); > >>>>>- if (!nb_pkts) > >>>>>+ if (!nb_pkts) { > >>>>>+ *tx_cleaned = 0; > >>>>> return true; > >>>>>+ } > >>>>> if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { > >>>>> nb_processed = xdp_ring->count - xdp_ring->next_to_use; > >>>>>@@ -558,6 +562,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) > >>>>> i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes); > >>>>>+ *tx_cleaned = nb_pkts; > >>>>With XDP, I don't think we should count these as tx_cleaned packets. These are transmitted > >>>>packets. The tx_cleaned would be the xsk_frames counter in i40e_clean_xdp_tx_irq > >>>>May be we need 2 counters for xdp. > >>>I think there's two issues you are describing, which are separate in my > >>>mind. > >>> > >>> 1.) The name "tx_cleaned", and > >>> 2.) Whether nb_pkts is the right thing to write as the out param. > >>> > >>>For #1: I'm OK to change the name if that's the blocker here; please > >>>suggest a suitable alternative that you'll accept. > >>> > >>>For #2: nb_pkts is, IMO, the right value to bubble up to the tracepoint because > >>>nb_pkts affects clean_complete in i40e_napi_poll which in turn determines > >>>whether or not polling mode is entered. > >>> > >>>The purpose of the tracepoint is to determine when/why/how you are entering > >>>polling mode, so if nb_pkts plays a role in that calculation, it's the > >>>right number to output. > >>I suppose the alternative is to only fire the tracepoint when *not* in XDP. > >>Then the changes to the XDP stuff can be dropped and a separate set of > >>tracepoints for XDP can be created in the future. > >Let's be clear that it's the AF_XDP quirk that we have in here that actual > >xmit happens within NAPI polling routine. > > > >Sridhar is right with having xsk_frames as tx_cleaned but you're also > >right that nb_pkts affects napi polling. But then if you look at Rx side > >there is an analogous case with buffer allocation affecting napi polling. > > To be correct, I would suggest 2 out parameters to i40e_clean_xdp_tx_irq() > tx_cleaned and xdp_transmitted. tx_cleaned should be filled in > with xsk_frames. Add xdp_transmitted as an out parameter to i40e_xmit_zc() > and fill it with nb_pkts. Sorry, but I don't see the value in the second param. NAPI decides what to do based on nb_pkts. That's the only parameter that matters for the purpose of NAPI going into poll mode or not, right? If so: I don't see any reason why a second parameter is necessary. As I mentioned earlier: if it's just that the name of the parameter isn't right (e.g., you want it to be 'tx_processed' instead of 'tx_cleaned') then that's an easy fix; I'll just change the name. It doesn't seem helpful to have xsk_frames as an out parameter for i40e_napi_poll tracepoint; that value is not used to determine anything about i40e's NAPI. > I am not completely clear on the reasoning behind setting clean_complete > based on number of packets transmitted in case of XDP. > > > >>That might reduce the complexity a bit, and will probably still be pretty > >>useful for people tuning their non-XDP workloads. > > This option is fine too. I'll give Jesse a chance to weigh in before I proceed with spinning a v3.
On 10/6/2022 10:32 AM, Joe Damato wrote: > Sorry, but I don't see the value in the second param. NAPI decides what to > do based on nb_pkts. That's the only parameter that matters for the purpose > of NAPI going into poll mode or not, right? > > If so: I don't see any reason why a second parameter is necessary. Sridhar and I talked about this offline. We agree now that you can just proceed with the single parameter. > > As I mentioned earlier: if it's just that the name of the parameter isn't > right (e.g., you want it to be 'tx_processed' instead of 'tx_cleaned') then > that's an easy fix; I'll just change the name. I think the name change isn't necessary, since we're not going to extend this patch with full XDP events printed (see below) > > It doesn't seem helpful to have xsk_frames as an out parameter for > i40e_napi_poll tracepoint; that value is not used to determine anything > about i40e's NAPI. > >> I am not completely clear on the reasoning behind setting clean_complete >> based on number of packets transmitted in case of XDP. >>> >>>> That might reduce the complexity a bit, and will probably still be pretty >>>> useful for people tuning their non-XDP workloads. >> >> This option is fine too. > > I'll give Jesse a chance to weigh in before I proceed with spinning a v3. I'm ok with the patch you have now, that shows nb_pkts because it's the input to the polling decision. We can add the detail about XDP transmits cleaned in a later series or patch that is by someone who wants the XDP details in the napi poll context.
On Thu, Oct 06, 2022 at 03:35:36PM -0700, Jesse Brandeburg wrote: > On 10/6/2022 10:32 AM, Joe Damato wrote: > >Sorry, but I don't see the value in the second param. NAPI decides what to > >do based on nb_pkts. That's the only parameter that matters for the purpose > >of NAPI going into poll mode or not, right? > > > >If so: I don't see any reason why a second parameter is necessary. > > Sridhar and I talked about this offline. We agree now that you can just > proceed with the single parameter. OK, thanks. > > > >As I mentioned earlier: if it's just that the name of the parameter isn't > >right (e.g., you want it to be 'tx_processed' instead of 'tx_cleaned') then > >that's an easy fix; I'll just change the name. > > I think the name change isn't necessary, since we're not going to extend > this patch with full XDP events printed (see below) > > > > >It doesn't seem helpful to have xsk_frames as an out parameter for > >i40e_napi_poll tracepoint; that value is not used to determine anything > >about i40e's NAPI. > > > >>I am not completely clear on the reasoning behind setting clean_complete > >>based on number of packets transmitted in case of XDP. > >>> > >>>>That might reduce the complexity a bit, and will probably still be pretty > >>>>useful for people tuning their non-XDP workloads. > >> > >>This option is fine too. > > > >I'll give Jesse a chance to weigh in before I proceed with spinning a v3. > > I'm ok with the patch you have now, that shows nb_pkts because it's the > input to the polling decision. We can add the detail about XDP transmits > cleaned in a later series or patch that is by someone who wants the XDP > details in the napi poll context. Thanks for the detailed and thoughtful feedback, it is much appreciated. I'll leave this patch the way it is then and tweak the RX patch to include an rx_clean_complete boolean as I mentioned in my response to that patch and send out a v3. FWIW, I had assumed that you would suggest dropping the XDP stuff so I pre-emptively spun a branch locally that dropped it... it is a much smaller change of course, but I suspect that this tracepoint might useful for XDP users, so I think the decision to leave it with nb_pkts makes sense. Thanks again for the review. I'll send a v3 shortly.
On Thu, Oct 06, 2022 at 03:56:57PM -0700, Joe Damato wrote: > On Thu, Oct 06, 2022 at 03:35:36PM -0700, Jesse Brandeburg wrote: > > On 10/6/2022 10:32 AM, Joe Damato wrote: > > >Sorry, but I don't see the value in the second param. NAPI decides what to > > >do based on nb_pkts. That's the only parameter that matters for the purpose > > >of NAPI going into poll mode or not, right? > > > > > >If so: I don't see any reason why a second parameter is necessary. > > > > Sridhar and I talked about this offline. We agree now that you can just > > proceed with the single parameter. > > OK, thanks. > > > > > > >As I mentioned earlier: if it's just that the name of the parameter isn't > > >right (e.g., you want it to be 'tx_processed' instead of 'tx_cleaned') then > > >that's an easy fix; I'll just change the name. > > > > I think the name change isn't necessary, since we're not going to extend > > this patch with full XDP events printed (see below) So better to keep the twisted naming? > > > > > > > >It doesn't seem helpful to have xsk_frames as an out parameter for > > >i40e_napi_poll tracepoint; that value is not used to determine anything > > >about i40e's NAPI. > > > > > >>I am not completely clear on the reasoning behind setting clean_complete > > >>based on number of packets transmitted in case of XDP. > > >>> > > >>>>That might reduce the complexity a bit, and will probably still be pretty > > >>>>useful for people tuning their non-XDP workloads. > > >> > > >>This option is fine too. > > > > > >I'll give Jesse a chance to weigh in before I proceed with spinning a v3. > > > > I'm ok with the patch you have now, that shows nb_pkts because it's the > > input to the polling decision. We can add the detail about XDP transmits > > cleaned in a later series or patch that is by someone who wants the XDP > > details in the napi poll context. Please spell out whole AF_XDP instead of referring to XDP. Future readers might get confused. XDP is totally fine with what Joe is doing, I'm trying to bring up whole AF_XDP term and I feel like I'm being ignored. number of produced packets to HW tx ring != number of produced packets to AF_XDP CQ ring. > > Thanks for the detailed and thoughtful feedback, it is much appreciated. > > I'll leave this patch the way it is then and tweak the RX patch to include > an rx_clean_complete boolean as I mentioned in my response to that patch > and send out a v3. > > FWIW, I had assumed that you would suggest dropping the XDP stuff so I > pre-emptively spun a branch locally that dropped it... it is a much smaller > change of course, but I suspect that this tracepoint might useful for XDP > users, so I think the decision to leave it with nb_pkts makes sense. > > Thanks again for the review. I'll send a v3 shortly.
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index b97c95f..a2cc98e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -923,11 +923,13 @@ void i40e_detect_recover_hung(struct i40e_vsi *vsi) * @vsi: the VSI we care about * @tx_ring: Tx ring to clean * @napi_budget: Used to determine if we are in netpoll + * @tx_cleaned: Out parameter set to the number of TXes cleaned * * Returns true if there's any budget left (e.g. the clean is finished) **/ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, - struct i40e_ring *tx_ring, int napi_budget) + struct i40e_ring *tx_ring, int napi_budget, + unsigned int *tx_cleaned) { int i = tx_ring->next_to_clean; struct i40e_tx_buffer *tx_buf; @@ -1026,7 +1028,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, i40e_arm_wb(tx_ring, vsi, budget); if (ring_is_xdp(tx_ring)) - return !!budget; + goto out; /* notify netdev of completed buffers */ netdev_tx_completed_queue(txring_txq(tx_ring), @@ -1048,6 +1050,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, } } +out: + *tx_cleaned = total_packets; return !!budget; } @@ -2689,10 +2693,12 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) container_of(napi, struct i40e_q_vector, napi); struct i40e_vsi *vsi = q_vector->vsi; struct i40e_ring *ring; + bool tx_clean_complete = true; bool clean_complete = true; bool arm_wb = false; int budget_per_ring; int work_done = 0; + unsigned int tx_cleaned = 0; if (test_bit(__I40E_VSI_DOWN, vsi->state)) { napi_complete(napi); @@ -2704,11 +2710,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget) */ i40e_for_each_ring(ring, q_vector->tx) { bool wd = ring->xsk_pool ? - i40e_clean_xdp_tx_irq(vsi, ring) : - i40e_clean_tx_irq(vsi, ring, budget); + i40e_clean_xdp_tx_irq(vsi, ring, &tx_cleaned) : + i40e_clean_tx_irq(vsi, ring, budget, &tx_cleaned); if (!wd) { - clean_complete = false; + clean_complete = tx_clean_complete = false; continue; } arm_wb |= ring->arm_wb; diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 790aaeff..f98ce7e4 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -530,18 +530,22 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring) * i40e_xmit_zc - Performs zero-copy Tx AF_XDP * @xdp_ring: XDP Tx ring * @budget: NAPI budget + * @tx_cleaned: Out parameter of the TX packets processed * * Returns true if the work is finished. **/ -static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) +static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget, + unsigned int *tx_cleaned) { struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; u32 nb_pkts, nb_processed = 0; unsigned int total_bytes = 0; nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); - if (!nb_pkts) + if (!nb_pkts) { + *tx_cleaned = 0; return true; + } if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { nb_processed = xdp_ring->count - xdp_ring->next_to_use; @@ -558,6 +562,7 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget) i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes); + *tx_cleaned = nb_pkts; return nb_pkts < budget; } @@ -581,10 +586,12 @@ static void i40e_clean_xdp_tx_buffer(struct i40e_ring *tx_ring, * i40e_clean_xdp_tx_irq - Completes AF_XDP entries, and cleans XDP entries * @vsi: Current VSI * @tx_ring: XDP Tx ring + * @tx_cleaned: out parameter of number of TXes cleaned * * Returns true if cleanup/tranmission is done. **/ -bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring) +bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring, + unsigned int *tx_cleaned) { struct xsk_buff_pool *bp = tx_ring->xsk_pool; u32 i, completed_frames, xsk_frames = 0; @@ -634,7 +641,7 @@ bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring) if (xsk_uses_need_wakeup(tx_ring->xsk_pool)) xsk_set_tx_need_wakeup(tx_ring->xsk_pool); - return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring)); + return i40e_xmit_zc(tx_ring, I40E_DESC_UNUSED(tx_ring), tx_cleaned); } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h index 821df24..396ed11 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h @@ -30,7 +30,8 @@ int i40e_xsk_pool_setup(struct i40e_vsi *vsi, struct xsk_buff_pool *pool, bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count); int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget); -bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring); +bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi, struct i40e_ring *tx_ring, + unsigned int *tx_cleaned); int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); int i40e_realloc_rx_bi_zc(struct i40e_vsi *vsi, bool zc); void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
Update i40e_clean_tx_irq to take an out parameter (tx_cleaned) which stores the number TXs cleaned. Likewise, update i40e_clean_xdp_tx_irq and i40e_xmit_zc to do the same. Care has been taken to avoid changing the control flow of any functions involved. Signed-off-by: Joe Damato <jdamato@fastly.com> --- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 16 +++++++++++----- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 15 +++++++++++---- drivers/net/ethernet/intel/i40e/i40e_xsk.h | 3 ++- 3 files changed, 24 insertions(+), 10 deletions(-)