Message ID | 20230413032541.885238-2-yoong.siang.song@intel.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | XDP Rx HWTS metadata for stmmac driver | expand |
Context | Check | Description |
---|---|---|
netdev/series_format | success | Posting correctly formatted |
netdev/tree_selection | success | Clearly marked for net-next |
netdev/fixes_present | success | Fixes tag not required for -next series |
netdev/header_inline | success | No static functions without inline keyword in header files |
netdev/build_32bit | success | Errors and warnings before: 43 this patch: 43 |
netdev/cc_maintainers | success | CCed 16 of 16 maintainers |
netdev/build_clang | success | Errors and warnings before: 18 this patch: 18 |
netdev/verify_signedoff | success | Signed-off-by tag matches author and committer |
netdev/deprecated_api | success | None detected |
netdev/check_selftest | success | No net selftest shell script |
netdev/verify_fixes | success | No Fixes tag |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 43 this patch: 43 |
netdev/checkpatch | warning | WARNING: line length of 91 exceeds 80 columns |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/source_inline | success | Was 0 now: 0 |
On 13/04/2023 05.25, Song Yoong Siang wrote: > Introduce struct stmmac_xdp_buff as a preparation to support XDP Rx > metadata via kfuncs. > > Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> > Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com> > --- > drivers/net/ethernet/stmicro/stmmac/stmmac.h | 4 ++++ > .../net/ethernet/stmicro/stmmac/stmmac_main.c | 18 +++++++++--------- > 2 files changed, 13 insertions(+), 9 deletions(-) > > diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h > index 3d15e1e92e18..ac8ccf851708 100644 > --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h > +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h > @@ -92,6 +92,10 @@ struct stmmac_rx_buffer { > dma_addr_t sec_addr; > }; > > +struct stmmac_xdp_buff { > + struct xdp_buff xdp; > +}; > + > struct stmmac_rx_queue { > u32 rx_count_frames; > u32 queue_index; > diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c > index d7fcab057032..6ffce52ca837 100644 > --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c > +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c > @@ -5188,9 +5188,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) > int status = 0, coe = priv->hw->rx_csum; > unsigned int next_entry = rx_q->cur_rx; > enum dma_data_direction dma_dir; > + struct stmmac_xdp_buff ctx = {}; This code trick {} will zero out the struct. Is this done purpose and really needed? On x86_64 this unfortunately generates an asm instruction: rep stos A repeated store string operation, for zeroing out memory, which is slow. (Because[1] it supports be suspended by an exception or interrupt, which requires it to store/restore CPU flags). [1] https://www.felixcloutier.com/x86/rep:repe:repz:repne:repnz#tbl-4-22 > unsigned int desc_size; > struct sk_buff *skb = NULL; > - struct xdp_buff xdp; > int xdp_status = 0; > int buf_sz; > > @@ -5311,17 +5311,17 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) > dma_sync_single_for_cpu(priv->device, buf->addr, > buf1_len, dma_dir); > > - xdp_init_buff(&xdp, buf_sz, &rx_q->xdp_rxq); > - xdp_prepare_buff(&xdp, page_address(buf->page), > + xdp_init_buff(&ctx.xdp, buf_sz, &rx_q->xdp_rxq); > + xdp_prepare_buff(&ctx.xdp, page_address(buf->page), > buf->page_offset, buf1_len, false); > > - pre_len = xdp.data_end - xdp.data_hard_start - > + pre_len = ctx.xdp.data_end - ctx.xdp.data_hard_start - > buf->page_offset; > - skb = stmmac_xdp_run_prog(priv, &xdp); > + skb = stmmac_xdp_run_prog(priv, &ctx.xdp); > /* Due xdp_adjust_tail: DMA sync for_device > * cover max len CPU touch > */ > - sync_len = xdp.data_end - xdp.data_hard_start - > + sync_len = ctx.xdp.data_end - ctx.xdp.data_hard_start - > buf->page_offset; > sync_len = max(sync_len, pre_len); > > @@ -5331,7 +5331,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) > > if (xdp_res & STMMAC_XDP_CONSUMED) { > page_pool_put_page(rx_q->page_pool, > - virt_to_head_page(xdp.data), > + virt_to_head_page(ctx.xdp.data), > sync_len, true); > buf->page = NULL; > priv->dev->stats.rx_dropped++; > @@ -5359,7 +5359,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) > > if (!skb) { > /* XDP program may expand or reduce tail */ > - buf1_len = xdp.data_end - xdp.data; > + buf1_len = ctx.xdp.data_end - ctx.xdp.data; > > skb = napi_alloc_skb(&ch->rx_napi, buf1_len); > if (!skb) { > @@ -5369,7 +5369,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) > } > > /* XDP program may adjust header */ > - skb_copy_to_linear_data(skb, xdp.data, buf1_len); > + skb_copy_to_linear_data(skb, ctx.xdp.data, buf1_len); > skb_put(skb, buf1_len); > > /* Data payload copied into SKB, page ready for recycle */
On Friday, April 14, 2023 1:10 AM , Jesper Dangaard Brouer <jbrouer@redhat.com> wrote: >On 13/04/2023 05.25, Song Yoong Siang wrote: >> Introduce struct stmmac_xdp_buff as a preparation to support XDP Rx >> metadata via kfuncs. >> >> Reviewed-by: Jacob Keller <jacob.e.keller@intel.com> >> Signed-off-by: Song Yoong Siang <yoong.siang.song@intel.com> >> --- >> drivers/net/ethernet/stmicro/stmmac/stmmac.h | 4 ++++ >> .../net/ethernet/stmicro/stmmac/stmmac_main.c | 18 +++++++++--------- >> 2 files changed, 13 insertions(+), 9 deletions(-) >> >> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h >> b/drivers/net/ethernet/stmicro/stmmac/stmmac.h >> index 3d15e1e92e18..ac8ccf851708 100644 >> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h >> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h >> @@ -92,6 +92,10 @@ struct stmmac_rx_buffer { >> dma_addr_t sec_addr; >> }; >> >> +struct stmmac_xdp_buff { >> + struct xdp_buff xdp; >> +}; >> + >> struct stmmac_rx_queue { >> u32 rx_count_frames; >> u32 queue_index; >> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c >> b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c >> index d7fcab057032..6ffce52ca837 100644 >> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c >> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c >> @@ -5188,9 +5188,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int >limit, u32 queue) >> int status = 0, coe = priv->hw->rx_csum; >> unsigned int next_entry = rx_q->cur_rx; >> enum dma_data_direction dma_dir; >> + struct stmmac_xdp_buff ctx = {}; > >This code trick {} will zero out the struct. > >Is this done purpose and really needed? > >On x86_64 this unfortunately generates an asm instruction: rep stos > >A repeated store string operation, for zeroing out memory, which is slow. >(Because[1] it supports be suspended by an exception or interrupt, which requires >it to store/restore CPU flags). > >[1] https://www.felixcloutier.com/x86/rep:repe:repz:repne:repnz#tbl-4-22 Thanks for the useful information and link. I will submit v5 to remove {}. > > >> unsigned int desc_size; >> struct sk_buff *skb = NULL; >> - struct xdp_buff xdp; >> int xdp_status = 0; >> int buf_sz; >> >> @@ -5311,17 +5311,17 @@ static int stmmac_rx(struct stmmac_priv *priv, int >limit, u32 queue) >> dma_sync_single_for_cpu(priv->device, buf->addr, >> buf1_len, dma_dir); >> >> - xdp_init_buff(&xdp, buf_sz, &rx_q->xdp_rxq); >> - xdp_prepare_buff(&xdp, page_address(buf->page), >> + xdp_init_buff(&ctx.xdp, buf_sz, &rx_q->xdp_rxq); >> + xdp_prepare_buff(&ctx.xdp, page_address(buf->page), >> buf->page_offset, buf1_len, false); >> >> - pre_len = xdp.data_end - xdp.data_hard_start - >> + pre_len = ctx.xdp.data_end - ctx.xdp.data_hard_start - >> buf->page_offset; >> - skb = stmmac_xdp_run_prog(priv, &xdp); >> + skb = stmmac_xdp_run_prog(priv, &ctx.xdp); >> /* Due xdp_adjust_tail: DMA sync for_device >> * cover max len CPU touch >> */ >> - sync_len = xdp.data_end - xdp.data_hard_start - >> + sync_len = ctx.xdp.data_end - ctx.xdp.data_hard_start - >> buf->page_offset; >> sync_len = max(sync_len, pre_len); >> >> @@ -5331,7 +5331,7 @@ static int stmmac_rx(struct stmmac_priv *priv, >> int limit, u32 queue) >> >> if (xdp_res & STMMAC_XDP_CONSUMED) { >> page_pool_put_page(rx_q->page_pool, >> - >virt_to_head_page(xdp.data), >> + >virt_to_head_page(ctx.xdp.data), >> sync_len, true); >> buf->page = NULL; >> priv->dev->stats.rx_dropped++; >> @@ -5359,7 +5359,7 @@ static int stmmac_rx(struct stmmac_priv *priv, >> int limit, u32 queue) >> >> if (!skb) { >> /* XDP program may expand or reduce tail */ >> - buf1_len = xdp.data_end - xdp.data; >> + buf1_len = ctx.xdp.data_end - ctx.xdp.data; >> >> skb = napi_alloc_skb(&ch->rx_napi, buf1_len); >> if (!skb) { >> @@ -5369,7 +5369,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int >limit, u32 queue) >> } >> >> /* XDP program may adjust header */ >> - skb_copy_to_linear_data(skb, xdp.data, buf1_len); >> + skb_copy_to_linear_data(skb, ctx.xdp.data, buf1_len); >> skb_put(skb, buf1_len); >> >> /* Data payload copied into SKB, page ready for recycle >*/
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index 3d15e1e92e18..ac8ccf851708 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -92,6 +92,10 @@ struct stmmac_rx_buffer { dma_addr_t sec_addr; }; +struct stmmac_xdp_buff { + struct xdp_buff xdp; +}; + struct stmmac_rx_queue { u32 rx_count_frames; u32 queue_index; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d7fcab057032..6ffce52ca837 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -5188,9 +5188,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) int status = 0, coe = priv->hw->rx_csum; unsigned int next_entry = rx_q->cur_rx; enum dma_data_direction dma_dir; + struct stmmac_xdp_buff ctx = {}; unsigned int desc_size; struct sk_buff *skb = NULL; - struct xdp_buff xdp; int xdp_status = 0; int buf_sz; @@ -5311,17 +5311,17 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) dma_sync_single_for_cpu(priv->device, buf->addr, buf1_len, dma_dir); - xdp_init_buff(&xdp, buf_sz, &rx_q->xdp_rxq); - xdp_prepare_buff(&xdp, page_address(buf->page), + xdp_init_buff(&ctx.xdp, buf_sz, &rx_q->xdp_rxq); + xdp_prepare_buff(&ctx.xdp, page_address(buf->page), buf->page_offset, buf1_len, false); - pre_len = xdp.data_end - xdp.data_hard_start - + pre_len = ctx.xdp.data_end - ctx.xdp.data_hard_start - buf->page_offset; - skb = stmmac_xdp_run_prog(priv, &xdp); + skb = stmmac_xdp_run_prog(priv, &ctx.xdp); /* Due xdp_adjust_tail: DMA sync for_device * cover max len CPU touch */ - sync_len = xdp.data_end - xdp.data_hard_start - + sync_len = ctx.xdp.data_end - ctx.xdp.data_hard_start - buf->page_offset; sync_len = max(sync_len, pre_len); @@ -5331,7 +5331,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) if (xdp_res & STMMAC_XDP_CONSUMED) { page_pool_put_page(rx_q->page_pool, - virt_to_head_page(xdp.data), + virt_to_head_page(ctx.xdp.data), sync_len, true); buf->page = NULL; priv->dev->stats.rx_dropped++; @@ -5359,7 +5359,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) if (!skb) { /* XDP program may expand or reduce tail */ - buf1_len = xdp.data_end - xdp.data; + buf1_len = ctx.xdp.data_end - ctx.xdp.data; skb = napi_alloc_skb(&ch->rx_napi, buf1_len); if (!skb) { @@ -5369,7 +5369,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) } /* XDP program may adjust header */ - skb_copy_to_linear_data(skb, xdp.data, buf1_len); + skb_copy_to_linear_data(skb, ctx.xdp.data, buf1_len); skb_put(skb, buf1_len); /* Data payload copied into SKB, page ready for recycle */