@@ -23,6 +23,7 @@ void xsk_clear_rx_need_wakeup(struct xsk_buff_pool *pool);
void xsk_clear_tx_need_wakeup(struct xsk_buff_pool *pool);
bool xsk_uses_need_wakeup(struct xsk_buff_pool *pool);
int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
+int xsk_rcv_batch(struct xdp_sock *xs, struct xdp_buff **bufs, int batch_size);
void xsk_flush(struct xdp_sock *xs);
static inline u32 xsk_pool_get_headroom(struct xsk_buff_pool *pool)
@@ -125,6 +126,22 @@ static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_bu
xp_dma_sync_for_cpu(xskb);
}
+static inline void xsk_buff_dma_sync_for_cpu_batch(struct xdp_buff **bufs,
+ struct xsk_buff_pool *pool,
+ int batch_size)
+{
+ struct xdp_buff_xsk *xskb;
+ int i;
+
+ if (!pool->dma_need_sync)
+ return;
+
+ for (i = 0; i < batch_size; i++) {
+ xskb = container_of(*(bufs + i), struct xdp_buff_xsk, xdp);
+ xp_dma_sync_for_cpu(xskb);
+ }
+}
+
static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool,
dma_addr_t dma,
size_t size)
@@ -191,6 +208,11 @@ static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
return 0;
}
+static inline int xsk_rcv_batch(struct xdp_sock *xs, struct xdp_buff **bufs, int batch_size)
+{
+ return 0;
+}
+
static inline void xsk_flush(struct xdp_sock *xs)
{
}
@@ -274,6 +296,12 @@ static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_bu
{
}
+static inline void xsk_buff_dma_sync_for_cpu_batch(struct xdp_buff **bufs,
+ struct xsk_buff_pool *pool,
+ int batch_size)
+{
+}
+
static inline void xsk_buff_raw_dma_sync_for_device(struct xsk_buff_pool *pool,
dma_addr_t dma,
size_t size)
@@ -214,6 +214,28 @@ static inline void xp_release(struct xdp_buff_xsk *xskb)
xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
}
+/* Release a batch of xdp_buffs back to an xdp_buff_pool.
+ * The batch of buffs must all come from the same xdp_buff_pool. This way
+ * it is safe to push the batch to the top of the free_heads stack, because
+ * at least the same amount will have been popped from the stack earlier in
+ * the datapath.
+ */
+static inline void xp_release_batch(struct xdp_buff **bufs, int batch_size)
+{
+ struct xdp_buff_xsk *xskb = container_of(*bufs, struct xdp_buff_xsk, xdp);
+ struct xsk_buff_pool *pool = xskb->pool;
+ u32 tail = pool->free_heads_cnt;
+ u32 i;
+
+ if (pool->unaligned) {
+ for (i = 0; i < batch_size; i++) {
+ xskb = container_of(*(bufs + i), struct xdp_buff_xsk, xdp);
+ pool->free_heads[tail + i] = xskb;
+ }
+ pool->free_heads_cnt += batch_size;
+ }
+}
+
static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb)
{
u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
@@ -151,6 +151,20 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
return 0;
}
+static int __xsk_rcv_zc_batch(struct xdp_sock *xs, struct xdp_buff **bufs, int batch_size)
+{
+ int err;
+
+ err = xskq_prod_reserve_desc_batch(xs->rx, bufs, batch_size);
+ if (err) {
+ xs->rx_queue_full++;
+ return -1;
+ }
+
+ xp_release_batch(bufs, batch_size);
+ return 0;
+}
+
static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len)
{
void *from_buf, *to_buf;
@@ -269,6 +283,21 @@ int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
}
EXPORT_SYMBOL(xsk_rcv);
+int xsk_rcv_batch(struct xdp_sock *xs, struct xdp_buff **bufs, int batch_size)
+{
+ int err;
+
+ err = xsk_rcv_check(xs, *bufs);
+ if (err)
+ return err;
+
+ if ((*bufs)->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL)
+ return -1;
+
+ return __xsk_rcv_zc_batch(xs, bufs, batch_size);
+}
+EXPORT_SYMBOL(xsk_rcv_batch);
+
int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
{
struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
@@ -338,6 +338,11 @@ static inline bool xskq_prod_is_full(struct xsk_queue *q)
return xskq_prod_nb_free(q, 1) ? false : true;
}
+static inline bool xskq_prod_is_full_n(struct xsk_queue *q, u32 n)
+{
+ return xskq_prod_nb_free(q, n) ? false : true;
+}
+
static inline void xskq_prod_cancel(struct xsk_queue *q)
{
q->cached_prod--;
@@ -399,6 +404,32 @@ static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
return 0;
}
+static inline int xskq_prod_reserve_desc_batch(struct xsk_queue *q, struct xdp_buff **bufs,
+ int batch_size)
+{
+ struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
+ struct xdp_buff_xsk *xskb;
+ u64 addr;
+ u32 len;
+ u32 i;
+
+ if (xskq_prod_is_full_n(q, batch_size))
+ return -ENOSPC;
+
+ /* A, matches D */
+ for (i = 0; i < batch_size; i++) {
+ len = (*(bufs + i))->data_end - (*(bufs + i))->data;
+ xskb = container_of(*(bufs + i), struct xdp_buff_xsk, xdp);
+ addr = xp_get_handle(xskb);
+ ring->desc[(q->cached_prod + i) & q->ring_mask].addr = addr;
+ ring->desc[(q->cached_prod + i) & q->ring_mask].len = len;
+ }
+
+ q->cached_prod += batch_size;
+
+ return 0;
+}
+
static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx)
{
smp_store_release(&q->ring->producer, idx); /* B, matches C */
Introduce a batched version of xsk_rcv called xsk_rcv_batch which takes an array of xdp_buffs and pushes them to the Rx ring. Also introduce a batched version of xsk_buff_dma_sync_for_cpu. Signed-off-by: Ciara Loftus <ciara.loftus@intel.com> --- include/net/xdp_sock_drv.h | 28 ++++++++++++++++++++++++++++ include/net/xsk_buff_pool.h | 22 ++++++++++++++++++++++ net/xdp/xsk.c | 29 +++++++++++++++++++++++++++++ net/xdp/xsk_queue.h | 31 +++++++++++++++++++++++++++++++ 4 files changed, 110 insertions(+)