@@ -39,6 +39,8 @@ enum io_uring_cmd_flags {
IO_URING_F_COMPAT = (1 << 12),
};
+struct io_zc_rx_ifq;
+
struct io_wq_work_node {
struct io_wq_work_node *next;
};
@@ -385,6 +387,8 @@ struct io_ring_ctx {
struct io_rsrc_data *file_data;
struct io_rsrc_data *buf_data;
+ struct io_zc_rx_ifq *ifq;
+
/* protected by ->uring_lock */
struct list_head rsrc_ref_list;
struct io_alloc_cache rsrc_node_cache;
@@ -575,6 +575,9 @@ enum {
IORING_REGISTER_NAPI = 27,
IORING_UNREGISTER_NAPI = 28,
+ /* register a network interface queue for zerocopy */
+ IORING_REGISTER_ZC_RX_IFQ = 29,
+
/* this goes last */
IORING_REGISTER_LAST,
@@ -782,6 +785,43 @@ enum {
SOCKET_URING_OP_SETSOCKOPT,
};
+struct io_uring_rbuf_rqe {
+ __u32 off;
+ __u32 len;
+ __u16 region;
+ __u8 __pad[6];
+};
+
+struct io_uring_rbuf_cqe {
+ __u32 off;
+ __u32 len;
+ __u16 region;
+ __u8 __pad[6];
+};
+
+struct io_rbuf_rqring_offsets {
+ __u32 head;
+ __u32 tail;
+ __u32 rqes;
+ __u8 __pad[4];
+};
+
+/*
+ * Argument for IORING_REGISTER_ZC_RX_IFQ
+ */
+struct io_uring_zc_rx_ifq_reg {
+ __u32 if_idx;
+ /* hw rx descriptor ring id */
+ __u32 if_rxq_id;
+ __u32 region_id;
+ __u32 rq_entries;
+ __u32 flags;
+ __u16 cpu;
+
+ __u32 mmap_sz;
+ struct io_rbuf_rqring_offsets rq_off;
+};
+
#ifdef __cplusplus
}
#endif
@@ -8,7 +8,8 @@ obj-$(CONFIG_IO_URING) += io_uring.o xattr.o nop.o fs.o splice.o \
statx.o net.o msg_ring.o timeout.o \
sqpoll.o fdinfo.o tctx.o poll.o \
cancel.o kbuf.o rsrc.o rw.o opdef.o \
- notif.o waitid.o register.o truncate.o
+ notif.o waitid.o register.o truncate.o \
+ zc_rx.o
obj-$(CONFIG_IO_WQ) += io-wq.o
obj-$(CONFIG_FUTEX) += futex.o
obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o
@@ -95,6 +95,7 @@
#include "waitid.h"
#include "futex.h"
#include "napi.h"
+#include "zc_rx.h"
#include "timeout.h"
#include "poll.h"
@@ -2861,6 +2862,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
return;
mutex_lock(&ctx->uring_lock);
+ io_unregister_zc_rx_ifqs(ctx);
if (ctx->buf_data)
__io_sqe_buffers_unregister(ctx);
if (ctx->file_data)
@@ -3032,6 +3034,11 @@ static __cold void io_ring_exit_work(struct work_struct *work)
io_cqring_overflow_kill(ctx);
mutex_unlock(&ctx->uring_lock);
}
+ if (ctx->ifq) {
+ mutex_lock(&ctx->uring_lock);
+ io_shutdown_zc_rx_ifqs(ctx);
+ mutex_unlock(&ctx->uring_lock);
+ }
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
io_move_task_work_from_local(ctx);
@@ -27,6 +27,7 @@
#include "cancel.h"
#include "kbuf.h"
#include "napi.h"
+#include "zc_rx.h"
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
IORING_REGISTER_LAST + IORING_OP_LAST)
@@ -563,6 +564,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
break;
ret = io_unregister_napi(ctx, arg);
break;
+ case IORING_REGISTER_ZC_RX_IFQ:
+ ret = -EINVAL;
+ if (!arg || nr_args != 1)
+ break;
+ ret = io_register_zc_rx_ifq(ctx, arg);
+ break;
default:
ret = -EINVAL;
break;
new file mode 100644
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+#if defined(CONFIG_PAGE_POOL)
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/io_uring.h>
+
+#include <uapi/linux/io_uring.h>
+
+#include "io_uring.h"
+#include "kbuf.h"
+#include "zc_rx.h"
+
+static int io_allocate_rbuf_ring(struct io_zc_rx_ifq *ifq,
+ struct io_uring_zc_rx_ifq_reg *reg)
+{
+ gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP;
+ size_t off, rq_size;
+ void *ptr;
+
+ off = sizeof(struct io_uring);
+ rq_size = reg->rq_entries * sizeof(struct io_uring_rbuf_rqe);
+ ptr = (void *) __get_free_pages(gfp, get_order(off + rq_size));
+ if (!ptr)
+ return -ENOMEM;
+ ifq->rq_ring = (struct io_uring *)ptr;
+ ifq->rqes = (struct io_uring_rbuf_rqe *)((char *)ptr + off);
+ return 0;
+}
+
+static void io_free_rbuf_ring(struct io_zc_rx_ifq *ifq)
+{
+ if (ifq->rq_ring)
+ folio_put(virt_to_folio(ifq->rq_ring));
+}
+
+static struct io_zc_rx_ifq *io_zc_rx_ifq_alloc(struct io_ring_ctx *ctx)
+{
+ struct io_zc_rx_ifq *ifq;
+
+ ifq = kzalloc(sizeof(*ifq), GFP_KERNEL);
+ if (!ifq)
+ return NULL;
+
+ ifq->if_rxq_id = -1;
+ ifq->ctx = ctx;
+ return ifq;
+}
+
+static void io_zc_rx_ifq_free(struct io_zc_rx_ifq *ifq)
+{
+ io_free_rbuf_ring(ifq);
+ kfree(ifq);
+}
+
+int io_register_zc_rx_ifq(struct io_ring_ctx *ctx,
+ struct io_uring_zc_rx_ifq_reg __user *arg)
+{
+ struct io_uring_zc_rx_ifq_reg reg;
+ struct io_zc_rx_ifq *ifq;
+ int ret;
+
+ if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN &&
+ ctx->flags & IORING_SETUP_CQE32))
+ return -EINVAL;
+ if (copy_from_user(®, arg, sizeof(reg)))
+ return -EFAULT;
+ if (ctx->ifq)
+ return -EBUSY;
+ if (reg.if_rxq_id == -1)
+ return -EINVAL;
+
+ ifq = io_zc_rx_ifq_alloc(ctx);
+ if (!ifq)
+ return -ENOMEM;
+
+ ret = io_allocate_rbuf_ring(ifq, ®);
+ if (ret)
+ goto err;
+
+ ifq->rq_entries = reg.rq_entries;
+ ifq->if_rxq_id = reg.if_rxq_id;
+ ctx->ifq = ifq;
+
+ return 0;
+err:
+ io_zc_rx_ifq_free(ifq);
+ return ret;
+}
+
+void io_unregister_zc_rx_ifqs(struct io_ring_ctx *ctx)
+{
+ struct io_zc_rx_ifq *ifq = ctx->ifq;
+
+ lockdep_assert_held(&ctx->uring_lock);
+
+ if (!ifq)
+ return;
+
+ ctx->ifq = NULL;
+ io_zc_rx_ifq_free(ifq);
+}
+
+void io_shutdown_zc_rx_ifqs(struct io_ring_ctx *ctx)
+{
+ lockdep_assert_held(&ctx->uring_lock);
+}
+
+#endif
new file mode 100644
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef IOU_ZC_RX_H
+#define IOU_ZC_RX_H
+
+struct io_zc_rx_ifq {
+ struct io_ring_ctx *ctx;
+ struct net_device *dev;
+ struct io_uring *rq_ring;
+ struct io_uring_rbuf_rqe *rqes;
+ u32 rq_entries;
+
+ /* hw rx descriptor ring id */
+ u32 if_rxq_id;
+};
+
+#if defined(CONFIG_PAGE_POOL)
+int io_register_zc_rx_ifq(struct io_ring_ctx *ctx,
+ struct io_uring_zc_rx_ifq_reg __user *arg);
+void io_unregister_zc_rx_ifqs(struct io_ring_ctx *ctx);
+void io_shutdown_zc_rx_ifqs(struct io_ring_ctx *ctx);
+#else
+static inline int io_register_zc_rx_ifq(struct io_ring_ctx *ctx,
+ struct io_uring_zc_rx_ifq_reg __user *arg)
+{
+ return -EOPNOTSUPP;
+}
+static inline void io_unregister_zc_rx_ifqs(struct io_ring_ctx *ctx)
+{
+}
+static inline void io_shutdown_zc_rx_ifqs(struct io_ring_ctx *ctx)
+{
+}
+#endif
+
+#endif