@@ -12,6 +12,11 @@ struct io_zc_rx_buf {
};
#if defined(CONFIG_IO_URING)
+
+#if defined(CONFIG_PAGE_POOL)
+extern const struct memory_provider_ops io_uring_pp_zc_ops;
+#endif
+
int io_uring_cmd_sock(struct io_uring_cmd *cmd, unsigned int issue_flags);
#else
@@ -227,6 +227,7 @@ netmem_ref page_pool_alloc_frag_netmem(struct page_pool *pool,
struct page_pool *page_pool_create(const struct page_pool_params *params);
struct page_pool *page_pool_create_percpu(const struct page_pool_params *params,
int cpuid);
+void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem);
struct xdp_mem_info;
@@ -8,6 +8,7 @@
#include <linux/nospec.h>
#include <net/tcp.h>
#include <net/af_unix.h>
+#include <trace/events/page_pool.h>
#include <uapi/linux/io_uring.h>
@@ -357,4 +358,205 @@ int io_register_zc_rx_sock(struct io_ring_ctx *ctx,
return 0;
}
+static inline struct io_zc_rx_buf *io_niov_to_buf(struct net_iov *niov)
+{
+ return container_of(niov, struct io_zc_rx_buf, niov);
+}
+
+static inline unsigned io_buf_pgid(struct io_zc_rx_pool *pool,
+ struct io_zc_rx_buf *buf)
+{
+ return buf - pool->bufs;
+}
+
+static __maybe_unused void io_zc_rx_get_buf_uref(struct io_zc_rx_buf *buf)
+{
+ atomic_long_add(IO_ZC_RX_UREF, &buf->niov.pp_ref_count);
+}
+
+static bool io_zc_rx_buf_put(struct io_zc_rx_buf *buf, int nr)
+{
+ return atomic_long_sub_and_test(nr, &buf->niov.pp_ref_count);
+}
+
+static bool io_zc_rx_put_buf_uref(struct io_zc_rx_buf *buf)
+{
+ if (atomic_long_read(&buf->niov.pp_ref_count) < IO_ZC_RX_UREF)
+ return false;
+
+ return io_zc_rx_buf_put(buf, IO_ZC_RX_UREF);
+}
+
+static inline netmem_ref io_zc_buf_to_netmem(struct io_zc_rx_buf *buf)
+{
+ return net_iov_to_netmem(&buf->niov);
+}
+
+static inline void io_zc_add_pp_cache(struct page_pool *pp,
+ struct io_zc_rx_buf *buf)
+{
+ netmem_ref netmem = io_zc_buf_to_netmem(buf);
+
+ page_pool_set_pp_info(pp, netmem);
+ pp->alloc.cache[pp->alloc.count++] = netmem;
+}
+
+static inline u32 io_zc_rx_rqring_entries(struct io_zc_rx_ifq *ifq)
+{
+ u32 entries;
+
+ entries = smp_load_acquire(&ifq->rq_ring->tail) - ifq->cached_rq_head;
+ return min(entries, ifq->rq_entries);
+}
+
+static void io_zc_rx_ring_refill(struct page_pool *pp,
+ struct io_zc_rx_ifq *ifq)
+{
+ unsigned int entries = io_zc_rx_rqring_entries(ifq);
+ unsigned int mask = ifq->rq_entries - 1;
+ struct io_zc_rx_pool *pool = ifq->pool;
+
+ if (unlikely(!entries))
+ return;
+
+ while (entries--) {
+ unsigned int rq_idx = ifq->cached_rq_head++ & mask;
+ struct io_uring_rbuf_rqe *rqe = &ifq->rqes[rq_idx];
+ u32 pgid = rqe->off / PAGE_SIZE;
+ struct io_zc_rx_buf *buf = &pool->bufs[pgid];
+
+ if (!io_zc_rx_put_buf_uref(buf))
+ continue;
+ io_zc_add_pp_cache(pp, buf);
+ if (pp->alloc.count >= PP_ALLOC_CACHE_REFILL)
+ break;
+ }
+ smp_store_release(&ifq->rq_ring->head, ifq->cached_rq_head);
+}
+
+static void io_zc_rx_refill_slow(struct page_pool *pp, struct io_zc_rx_ifq *ifq)
+{
+ struct io_zc_rx_pool *pool = ifq->pool;
+
+ spin_lock_bh(&pool->freelist_lock);
+ while (pool->free_count && pp->alloc.count < PP_ALLOC_CACHE_REFILL) {
+ struct io_zc_rx_buf *buf;
+ u32 pgid;
+
+ pgid = pool->freelist[--pool->free_count];
+ buf = &pool->bufs[pgid];
+
+ io_zc_add_pp_cache(pp, buf);
+ pp->pages_state_hold_cnt++;
+ trace_page_pool_state_hold(pp, io_zc_buf_to_netmem(buf),
+ pp->pages_state_hold_cnt);
+ }
+ spin_unlock_bh(&pool->freelist_lock);
+}
+
+static void io_zc_rx_recycle_buf(struct io_zc_rx_pool *pool,
+ struct io_zc_rx_buf *buf)
+{
+ spin_lock_bh(&pool->freelist_lock);
+ pool->freelist[pool->free_count++] = io_buf_pgid(pool, buf);
+ spin_unlock_bh(&pool->freelist_lock);
+}
+
+static netmem_ref io_pp_zc_alloc_pages(struct page_pool *pp, gfp_t gfp)
+{
+ struct io_zc_rx_ifq *ifq = pp->mp_priv;
+
+ /* pp should already be ensuring that */
+ if (unlikely(pp->alloc.count))
+ goto out_return;
+
+ io_zc_rx_ring_refill(pp, ifq);
+ if (likely(pp->alloc.count))
+ goto out_return;
+
+ io_zc_rx_refill_slow(pp, ifq);
+ if (!pp->alloc.count)
+ return 0;
+out_return:
+ return pp->alloc.cache[--pp->alloc.count];
+}
+
+static bool io_pp_zc_release_page(struct page_pool *pp, netmem_ref netmem)
+{
+ struct io_zc_rx_ifq *ifq = pp->mp_priv;
+ struct io_zc_rx_buf *buf;
+ struct net_iov *niov;
+
+ if (WARN_ON_ONCE(!netmem_is_net_iov(netmem)))
+ return false;
+
+ niov = netmem_to_net_iov(netmem);
+ buf = io_niov_to_buf(niov);
+
+ if (io_zc_rx_buf_put(buf, 1))
+ io_zc_rx_recycle_buf(ifq->pool, buf);
+ return false;
+}
+
+static void io_pp_zc_scrub(struct page_pool *pp)
+{
+ struct io_zc_rx_ifq *ifq = pp->mp_priv;
+ struct io_zc_rx_pool *pool = ifq->pool;
+ int i;
+
+ for (i = 0; i < pool->nr_bufs; i++) {
+ struct io_zc_rx_buf *buf = &pool->bufs[i];
+ int count;
+
+ if (!io_zc_rx_put_buf_uref(buf))
+ continue;
+ io_zc_rx_recycle_buf(pool, buf);
+
+ count = atomic_inc_return_relaxed(&pp->pages_state_release_cnt);
+ trace_page_pool_state_release(pp, io_zc_buf_to_netmem(buf), count);
+ }
+}
+
+static int io_pp_zc_init(struct page_pool *pp)
+{
+ struct io_zc_rx_ifq *ifq = pp->mp_priv;
+
+ if (!ifq)
+ return -EINVAL;
+ if (pp->p.order != 0)
+ return -EINVAL;
+ if (!pp->p.napi)
+ return -EINVAL;
+ if (pp->p.flags & PP_FLAG_DMA_MAP)
+ return -EOPNOTSUPP;
+ if (pp->p.flags & PP_FLAG_DMA_SYNC_DEV)
+ return -EOPNOTSUPP;
+
+ percpu_ref_get(&ifq->ctx->refs);
+ ifq->pp = pp;
+ return 0;
+}
+
+static void io_pp_zc_destroy(struct page_pool *pp)
+{
+ struct io_zc_rx_ifq *ifq = pp->mp_priv;
+ struct io_zc_rx_pool *pool = ifq->pool;
+
+ ifq->pp = NULL;
+
+ if (WARN_ON_ONCE(pool->free_count != pool->nr_bufs))
+ return;
+ percpu_ref_put(&ifq->ctx->refs);
+}
+
+const struct memory_provider_ops io_uring_pp_zc_ops = {
+ .alloc_pages = io_pp_zc_alloc_pages,
+ .release_page = io_pp_zc_release_page,
+ .init = io_pp_zc_init,
+ .destroy = io_pp_zc_destroy,
+ .scrub = io_pp_zc_scrub,
+};
+EXPORT_SYMBOL(io_uring_pp_zc_ops);
+
+
#endif
@@ -10,6 +10,9 @@
#define IO_ZC_IFQ_IDX_OFFSET 16
#define IO_ZC_IFQ_IDX_MASK ((1U << IO_ZC_IFQ_IDX_OFFSET) - 1)
+#define IO_ZC_RX_UREF 0x10000
+#define IO_ZC_RX_KREF_MASK (IO_ZC_RX_UREF - 1)
+
struct io_zc_rx_pool {
struct io_zc_rx_ifq *ifq;
struct io_zc_rx_buf *bufs;
@@ -26,10 +29,12 @@ struct io_zc_rx_ifq {
struct io_ring_ctx *ctx;
struct net_device *dev;
struct io_zc_rx_pool *pool;
+ struct page_pool *pp;
struct io_uring *rq_ring;
struct io_uring_rbuf_rqe *rqes;
u32 rq_entries;
+ u32 cached_rq_head;
/* hw rx descriptor ring id */
u32 if_rxq_id;
@@ -460,7 +460,7 @@ static bool page_pool_dma_map(struct page_pool *pool, netmem_ref netmem)
return false;
}
-static void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
+void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem)
{
netmem_set_pp(netmem, pool);
netmem_or_pp_magic(netmem, PP_SIGNATURE);