Message ID | f3d2937208eae9644f36d805cd5b30e0985767a6.1607349924.git.lorenzo@kernel.org (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | mvneta: introduce XDP multi-buffer support | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Clearly marked for bpf-next |
netdev/subject_prefix | success | Link |
netdev/source_inline | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | success | Errors and warnings before: 2 this patch: 2 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | warning | WARNING: line length of 81 exceeds 80 columns |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 2 this patch: 2 |
netdev/header_inline | success | Link |
netdev/stable | success | Stable not CCed |
Lorenzo Bianconi <lorenzo@kernel.org> writes: > Introduce the capability to map non-linear xdp buffer running > mvneta_xdp_submit_frame() for XDP_TX and XDP_REDIRECT > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> > --- > drivers/net/ethernet/marvell/mvneta.c | 94 > ++++++++++++++++----------- > 1 file changed, 56 insertions(+), 38 deletions(-) [...] > if (napi && buf->type == > MVNETA_TYPE_XDP_TX) > xdp_return_frame_rx_napi(buf->xdpf); > else > @@ -2054,45 +2054,64 @@ mvneta_xdp_put_buff(struct mvneta_port > *pp, struct mvneta_rx_queue *rxq, > > static int > mvneta_xdp_submit_frame(struct mvneta_port *pp, struct > mvneta_tx_queue *txq, > - struct xdp_frame *xdpf, bool dma_map) > + struct xdp_frame *xdpf, int *nxmit_byte, > bool dma_map) > { > - struct mvneta_tx_desc *tx_desc; > - struct mvneta_tx_buf *buf; > - dma_addr_t dma_addr; > + struct xdp_shared_info *xdp_sinfo = > xdp_get_shared_info_from_frame(xdpf); > + int i, num_frames = xdpf->mb ? xdp_sinfo->nr_frags + 1 : > 1; > + struct mvneta_tx_desc *tx_desc = NULL; > + struct page *page; > > - if (txq->count >= txq->tx_stop_threshold) > + if (txq->count + num_frames >= txq->size) > return MVNETA_XDP_DROPPED; > > - tx_desc = mvneta_txq_next_desc_get(txq); > + for (i = 0; i < num_frames; i++) { > + struct mvneta_tx_buf *buf = > &txq->buf[txq->txq_put_index]; > + skb_frag_t *frag = i ? &xdp_sinfo->frags[i - 1] : > NULL; > + int len = frag ? xdp_get_frag_size(frag) : > xdpf->len; nit, from branch prediction point of view, maybe it would be better to write int len = i ? xdp_get_frag_size(frag) : xdpf->len; since the value of i is checked one line above Disclaimer: I'm far from a compiler expert, and don't know whether the compiler would know to group these two assignments together into a single branch prediction decision, but it feels like using 'i' would make this decision easier for it. Thanks, Shay [...]
On Sat, Dec 19, 2020 at 4:56 PM Shay Agroskin <shayagr@amazon.com> wrote: > > > Lorenzo Bianconi <lorenzo@kernel.org> writes: > > > Introduce the capability to map non-linear xdp buffer running > > mvneta_xdp_submit_frame() for XDP_TX and XDP_REDIRECT > > > > Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> > > --- > > drivers/net/ethernet/marvell/mvneta.c | 94 > > ++++++++++++++++----------- > > 1 file changed, 56 insertions(+), 38 deletions(-) > [...] > > if (napi && buf->type == > > MVNETA_TYPE_XDP_TX) > > xdp_return_frame_rx_napi(buf->xdpf); > > else > > @@ -2054,45 +2054,64 @@ mvneta_xdp_put_buff(struct mvneta_port > > *pp, struct mvneta_rx_queue *rxq, > > > > static int > > mvneta_xdp_submit_frame(struct mvneta_port *pp, struct > > mvneta_tx_queue *txq, > > - struct xdp_frame *xdpf, bool dma_map) > > + struct xdp_frame *xdpf, int *nxmit_byte, > > bool dma_map) > > { > > - struct mvneta_tx_desc *tx_desc; > > - struct mvneta_tx_buf *buf; > > - dma_addr_t dma_addr; > > + struct xdp_shared_info *xdp_sinfo = > > xdp_get_shared_info_from_frame(xdpf); > > + int i, num_frames = xdpf->mb ? xdp_sinfo->nr_frags + 1 : > > 1; > > + struct mvneta_tx_desc *tx_desc = NULL; > > + struct page *page; > > > > - if (txq->count >= txq->tx_stop_threshold) > > + if (txq->count + num_frames >= txq->size) > > return MVNETA_XDP_DROPPED; > > > > - tx_desc = mvneta_txq_next_desc_get(txq); > > + for (i = 0; i < num_frames; i++) { > > + struct mvneta_tx_buf *buf = > > &txq->buf[txq->txq_put_index]; > > + skb_frag_t *frag = i ? &xdp_sinfo->frags[i - 1] : > > NULL; > > + int len = frag ? xdp_get_frag_size(frag) : > > xdpf->len; > > nit, from branch prediction point of view, maybe it would be > better to write > int len = i ? xdp_get_frag_size(frag) : xdpf->len; > ack, I will fix it in v6. Regards, Lorenzo > since the value of i is checked one line above > Disclaimer: I'm far from a compiler expert, and don't know whether > the compiler would know to group these two assignments together > into a single branch prediction decision, but it feels like using > 'i' would make this decision easier for it. > > Thanks, > Shay > > [...] >
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index bac1ae7014eb..dc1f1f25fce0 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1857,8 +1857,8 @@ static void mvneta_txq_bufs_free(struct mvneta_port *pp, bytes_compl += buf->skb->len; pkts_compl++; dev_kfree_skb_any(buf->skb); - } else if (buf->type == MVNETA_TYPE_XDP_TX || - buf->type == MVNETA_TYPE_XDP_NDO) { + } else if ((buf->type == MVNETA_TYPE_XDP_TX || + buf->type == MVNETA_TYPE_XDP_NDO) && buf->xdpf) { if (napi && buf->type == MVNETA_TYPE_XDP_TX) xdp_return_frame_rx_napi(buf->xdpf); else @@ -2054,45 +2054,64 @@ mvneta_xdp_put_buff(struct mvneta_port *pp, struct mvneta_rx_queue *rxq, static int mvneta_xdp_submit_frame(struct mvneta_port *pp, struct mvneta_tx_queue *txq, - struct xdp_frame *xdpf, bool dma_map) + struct xdp_frame *xdpf, int *nxmit_byte, bool dma_map) { - struct mvneta_tx_desc *tx_desc; - struct mvneta_tx_buf *buf; - dma_addr_t dma_addr; + struct xdp_shared_info *xdp_sinfo = xdp_get_shared_info_from_frame(xdpf); + int i, num_frames = xdpf->mb ? xdp_sinfo->nr_frags + 1 : 1; + struct mvneta_tx_desc *tx_desc = NULL; + struct page *page; - if (txq->count >= txq->tx_stop_threshold) + if (txq->count + num_frames >= txq->size) return MVNETA_XDP_DROPPED; - tx_desc = mvneta_txq_next_desc_get(txq); + for (i = 0; i < num_frames; i++) { + struct mvneta_tx_buf *buf = &txq->buf[txq->txq_put_index]; + skb_frag_t *frag = i ? &xdp_sinfo->frags[i - 1] : NULL; + int len = frag ? xdp_get_frag_size(frag) : xdpf->len; + dma_addr_t dma_addr; - buf = &txq->buf[txq->txq_put_index]; - if (dma_map) { - /* ndo_xdp_xmit */ - dma_addr = dma_map_single(pp->dev->dev.parent, xdpf->data, - xdpf->len, DMA_TO_DEVICE); - if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) { - mvneta_txq_desc_put(txq); - return MVNETA_XDP_DROPPED; + tx_desc = mvneta_txq_next_desc_get(txq); + if (dma_map) { + /* ndo_xdp_xmit */ + void *data; + + data = frag ? xdp_get_frag_address(frag) : xdpf->data; + dma_addr = dma_map_single(pp->dev->dev.parent, data, + len, DMA_TO_DEVICE); + if (dma_mapping_error(pp->dev->dev.parent, dma_addr)) { + for (; i >= 0; i--) + mvneta_txq_desc_put(txq); + return MVNETA_XDP_DROPPED; + } + buf->type = MVNETA_TYPE_XDP_NDO; + } else { + page = frag ? xdp_get_frag_page(frag) + : virt_to_page(xdpf->data); + dma_addr = page_pool_get_dma_addr(page); + if (frag) + dma_addr += xdp_get_frag_offset(frag); + else + dma_addr += sizeof(*xdpf) + xdpf->headroom; + dma_sync_single_for_device(pp->dev->dev.parent, + dma_addr, len, + DMA_BIDIRECTIONAL); + buf->type = MVNETA_TYPE_XDP_TX; } - buf->type = MVNETA_TYPE_XDP_NDO; - } else { - struct page *page = virt_to_page(xdpf->data); + buf->xdpf = i ? NULL : xdpf; + + tx_desc->command = !i ? MVNETA_TXD_F_DESC : 0; + tx_desc->buf_phys_addr = dma_addr; + tx_desc->data_size = len; + *nxmit_byte += len; - dma_addr = page_pool_get_dma_addr(page) + - sizeof(*xdpf) + xdpf->headroom; - dma_sync_single_for_device(pp->dev->dev.parent, dma_addr, - xdpf->len, DMA_BIDIRECTIONAL); - buf->type = MVNETA_TYPE_XDP_TX; + mvneta_txq_inc_put(txq); } - buf->xdpf = xdpf; - tx_desc->command = MVNETA_TXD_FLZ_DESC; - tx_desc->buf_phys_addr = dma_addr; - tx_desc->data_size = xdpf->len; + /*last descriptor */ + tx_desc->command |= MVNETA_TXD_L_DESC | MVNETA_TXD_Z_PAD; - mvneta_txq_inc_put(txq); - txq->pending++; - txq->count++; + txq->pending += num_frames; + txq->count += num_frames; return MVNETA_XDP_TX; } @@ -2103,8 +2122,8 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp) struct mvneta_pcpu_stats *stats = this_cpu_ptr(pp->stats); struct mvneta_tx_queue *txq; struct netdev_queue *nq; + int cpu, nxmit_byte = 0; struct xdp_frame *xdpf; - int cpu; u32 ret; xdpf = xdp_convert_buff_to_frame(xdp); @@ -2116,10 +2135,10 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp) nq = netdev_get_tx_queue(pp->dev, txq->id); __netif_tx_lock(nq, cpu); - ret = mvneta_xdp_submit_frame(pp, txq, xdpf, false); + ret = mvneta_xdp_submit_frame(pp, txq, xdpf, &nxmit_byte, false); if (ret == MVNETA_XDP_TX) { u64_stats_update_begin(&stats->syncp); - stats->es.ps.tx_bytes += xdpf->len; + stats->es.ps.tx_bytes += nxmit_byte; stats->es.ps.tx_packets++; stats->es.ps.xdp_tx++; u64_stats_update_end(&stats->syncp); @@ -2158,10 +2177,9 @@ mvneta_xdp_xmit(struct net_device *dev, int num_frame, __netif_tx_lock(nq, cpu); for (i = 0; i < num_frame; i++) { - ret = mvneta_xdp_submit_frame(pp, txq, frames[i], true); - if (ret == MVNETA_XDP_TX) { - nxmit_byte += frames[i]->len; - } else { + ret = mvneta_xdp_submit_frame(pp, txq, frames[i], &nxmit_byte, + true); + if (ret != MVNETA_XDP_TX) { xdp_return_frame_rx_napi(frames[i]); nxmit--; }
Introduce the capability to map non-linear xdp buffer running mvneta_xdp_submit_frame() for XDP_TX and XDP_REDIRECT Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org> --- drivers/net/ethernet/marvell/mvneta.c | 94 ++++++++++++++++----------- 1 file changed, 56 insertions(+), 38 deletions(-)