@@ -5,6 +5,8 @@
#include <linux/nospec.h>
#include <linux/netdevice.h>
#include <linux/io_uring.h>
+#include <linux/skbuff_ref.h>
+#include <net/busy_poll.h>
#include <net/page_pool/helpers.h>
#include <net/page_pool/memory_provider.h>
#include <trace/events/page_pool.h>
@@ -28,6 +30,11 @@ struct io_zcrx_args {
struct socket *sock;
};
+struct io_zc_refill_data {
+ struct io_zcrx_ifq *ifq;
+ struct net_iov *niov;
+};
+
static const struct memory_provider_ops io_uring_pp_zc_ops;
static inline struct io_zcrx_area *io_zcrx_iov_to_area(const struct net_iov *niov)
@@ -37,6 +44,13 @@ static inline struct io_zcrx_area *io_zcrx_iov_to_area(const struct net_iov *nio
return container_of(owner, struct io_zcrx_area, nia);
}
+static inline struct page *io_zcrx_iov_page(const struct net_iov *niov)
+{
+ struct io_zcrx_area *area = io_zcrx_iov_to_area(niov);
+
+ return area->pages[net_iov_idx(niov)];
+}
+
static int io_open_zc_rxq(struct io_zcrx_ifq *ifq, unsigned ifq_idx)
{
struct netdev_rx_queue *rxq;
@@ -59,6 +73,13 @@ static int io_open_zc_rxq(struct io_zcrx_ifq *ifq, unsigned ifq_idx)
ret = netdev_rx_queue_restart(ifq->dev, ifq->if_rxq);
if (ret)
goto fail;
+
+ if (WARN_ON_ONCE(!ifq->pp)) {
+ ret = -EFAULT;
+ goto fail;
+ }
+ /* grab napi_id while still under rtnl */
+ ifq->napi_id = ifq->pp->p.napi->napi_id;
return 0;
fail:
rxq->mp_params.mp_ops = NULL;
@@ -526,6 +547,7 @@ static void io_pp_zc_destroy(struct page_pool *pp)
page_pool_mp_release_area(pp, &ifq->area->nia);
ifq->pp = NULL;
+ ifq->napi_id = 0;
if (WARN_ON_ONCE(area->free_count != area->nia.num_niovs))
return;
@@ -540,6 +562,34 @@ static const struct memory_provider_ops io_uring_pp_zc_ops = {
.scrub = io_pp_zc_scrub,
};
+static void io_napi_refill(void *data)
+{
+ struct io_zc_refill_data *rd = data;
+ struct io_zcrx_ifq *ifq = rd->ifq;
+ netmem_ref netmem;
+
+ if (WARN_ON_ONCE(!ifq->pp))
+ return;
+
+ netmem = page_pool_alloc_netmem(ifq->pp, GFP_ATOMIC | __GFP_NOWARN);
+ if (!netmem)
+ return;
+ if (WARN_ON_ONCE(!netmem_is_net_iov(netmem)))
+ return;
+
+ rd->niov = netmem_to_net_iov(netmem);
+}
+
+static struct net_iov *io_zc_get_buf_task_safe(struct io_zcrx_ifq *ifq)
+{
+ struct io_zc_refill_data rd = {
+ .ifq = ifq,
+ };
+
+ napi_execute(ifq->napi_id, io_napi_refill, &rd);
+ return rd.niov;
+}
+
static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
struct io_zcrx_ifq *ifq, int off, int len)
{
@@ -563,6 +613,45 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov,
return true;
}
+static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
+ void *data, unsigned int offset, size_t len)
+{
+ size_t copy_size, copied = 0;
+ int ret = 0, off = 0;
+ struct page *page;
+ u8 *vaddr;
+
+ do {
+ struct net_iov *niov;
+
+ niov = io_zc_get_buf_task_safe(ifq);
+ if (!niov) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ page = io_zcrx_iov_page(niov);
+ vaddr = kmap_local_page(page);
+ copy_size = min_t(size_t, PAGE_SIZE, len);
+ memcpy(vaddr, data + offset, copy_size);
+ kunmap_local(vaddr);
+
+ if (!io_zcrx_queue_cqe(req, niov, ifq, off, copy_size)) {
+ napi_pp_put_page(net_iov_to_netmem(niov));
+ return -ENOSPC;
+ }
+
+ io_zcrx_get_buf_uref(niov);
+ napi_pp_put_page(net_iov_to_netmem(niov));
+
+ offset += copy_size;
+ len -= copy_size;
+ copied += copy_size;
+ } while (offset < len);
+
+ return copied ? copied : ret;
+}
+
static int io_zcrx_recv_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
const skb_frag_t *frag, int off, int len)
{
@@ -570,8 +659,24 @@ static int io_zcrx_recv_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq,
off += skb_frag_off(frag);
- if (unlikely(!skb_frag_is_net_iov(frag)))
- return -EOPNOTSUPP;
+ if (unlikely(!skb_frag_is_net_iov(frag))) {
+ struct page *page = skb_frag_page(frag);
+ u32 p_off, p_len, t, copied = 0;
+ u8 *vaddr;
+ int ret = 0;
+
+ skb_frag_foreach_page(frag, off, len,
+ page, p_off, p_len, t) {
+ vaddr = kmap_local_page(page);
+ ret = io_zcrx_copy_chunk(req, ifq, vaddr, p_off, p_len);
+ kunmap_local(vaddr);
+
+ if (ret < 0)
+ return copied ? copied : ret;
+ copied += ret;
+ }
+ return copied;
+ }
niov = netmem_to_net_iov(frag->netmem);
if (niov->pp->mp_ops != &io_uring_pp_zc_ops ||
@@ -592,15 +697,29 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
struct io_zcrx_ifq *ifq = args->ifq;
struct io_kiocb *req = args->req;
struct sk_buff *frag_iter;
- unsigned start, start_off;
+ unsigned start, start_off = offset;
int i, copy, end, off;
int ret = 0;
- start = skb_headlen(skb);
- start_off = offset;
+ if (unlikely(offset < skb_headlen(skb))) {
+ ssize_t copied;
+ size_t to_copy;
- if (offset < start)
- return -EOPNOTSUPP;
+ to_copy = min_t(size_t, skb_headlen(skb) - offset, len);
+ copied = io_zcrx_copy_chunk(req, ifq, skb->data, offset, to_copy);
+ if (copied < 0) {
+ ret = copied;
+ goto out;
+ }
+ offset += copied;
+ len -= copied;
+ if (!len)
+ goto out;
+ if (offset != skb_headlen(skb))
+ goto out;
+ }
+
+ start = skb_headlen(skb);
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
const skb_frag_t *frag;
@@ -39,6 +39,7 @@ struct io_zcrx_ifq {
u32 if_rxq;
netdevice_tracker netdev_tracker;
+ unsigned napi_id;
};
#if defined(CONFIG_IO_URING_ZCRX)