Message ID | 20250215000947.789731-8-dw@davidwei.uk (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | io_uring zero copy rx | expand |
Op 15-02-2025 om 01:09 schreef David Wei: > Set the page pool memory provider for the rx queue configured for zero > copy to io_uring. Then the rx queue is reset using > netdev_rx_queue_restart() and netdev core + page pool will take care of > filling the rx queue from the io_uring zero copy memory provider. > > For now, there is only one ifq so its destruction happens implicitly > during io_uring cleanup. > > Reviewed-by: Jens Axboe <axboe@kernel.dk> > Signed-off-by: David Wei <dw@davidwei.uk> > --- > io_uring/zcrx.c | 49 +++++++++++++++++++++++++++++++++++++++++-------- > 1 file changed, 41 insertions(+), 8 deletions(-) > > diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c > index 8833879d94ba..7d24fc98b306 100644 > --- a/io_uring/zcrx.c > +++ b/io_uring/zcrx.c > [...] > @@ -444,6 +475,8 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx) > > if (ctx->ifq) > io_zcrx_scrub(ctx->ifq); > + > + io_close_queue(ctx->ifq); If ctx->ifq is NULL (which seems to be not unlikely given the if statement above) then you'll get a NULL pointer dereference in io_close_queue(). > } > > static inline u32 io_zcrx_rqring_entries(struct io_zcrx_ifq *ifq)
On 2025-02-18 11:40, Kees Bakker wrote: > Op 15-02-2025 om 01:09 schreef David Wei: >> Set the page pool memory provider for the rx queue configured for zero >> copy to io_uring. Then the rx queue is reset using >> netdev_rx_queue_restart() and netdev core + page pool will take care of >> filling the rx queue from the io_uring zero copy memory provider. >> >> For now, there is only one ifq so its destruction happens implicitly >> during io_uring cleanup. >> >> Reviewed-by: Jens Axboe <axboe@kernel.dk> >> Signed-off-by: David Wei <dw@davidwei.uk> >> --- >> io_uring/zcrx.c | 49 +++++++++++++++++++++++++++++++++++++++++-------- >> 1 file changed, 41 insertions(+), 8 deletions(-) >> >> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c >> index 8833879d94ba..7d24fc98b306 100644 >> --- a/io_uring/zcrx.c >> +++ b/io_uring/zcrx.c >> [...] >> @@ -444,6 +475,8 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx) >> if (ctx->ifq) >> io_zcrx_scrub(ctx->ifq); >> + >> + io_close_queue(ctx->ifq); > If ctx->ifq is NULL (which seems to be not unlikely given the if statement above) > then you'll get a NULL pointer dereference in io_close_queue(). The only caller of io_shutdown_zcrx_ifqs() is io_ring_exit_work() which checks for ctx->ifq first. That does mean the ctx->ifq check is redundant in this function though. >> } >> static inline u32 io_zcrx_rqring_entries(struct io_zcrx_ifq *ifq) >
On 2/18/25 22:06, David Wei wrote: > On 2025-02-18 11:40, Kees Bakker wrote: >> Op 15-02-2025 om 01:09 schreef David Wei: >>> Set the page pool memory provider for the rx queue configured for zero >>> copy to io_uring. Then the rx queue is reset using >>> netdev_rx_queue_restart() and netdev core + page pool will take care of >>> filling the rx queue from the io_uring zero copy memory provider. >>> >>> For now, there is only one ifq so its destruction happens implicitly >>> during io_uring cleanup. >>> >>> Reviewed-by: Jens Axboe <axboe@kernel.dk> >>> Signed-off-by: David Wei <dw@davidwei.uk> >>> --- >>> io_uring/zcrx.c | 49 +++++++++++++++++++++++++++++++++++++++++-------- >>> 1 file changed, 41 insertions(+), 8 deletions(-) >>> >>> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c >>> index 8833879d94ba..7d24fc98b306 100644 >>> --- a/io_uring/zcrx.c >>> +++ b/io_uring/zcrx.c >>> [...] >>> @@ -444,6 +475,8 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx) >>> if (ctx->ifq) >>> io_zcrx_scrub(ctx->ifq); >>> + >>> + io_close_queue(ctx->ifq); >> If ctx->ifq is NULL (which seems to be not unlikely given the if statement above) >> then you'll get a NULL pointer dereference in io_close_queue(). > > The only caller of io_shutdown_zcrx_ifqs() is io_ring_exit_work() which > checks for ctx->ifq first. That does mean the ctx->ifq check is > redundant in this function though. And despite the check being outside of the lock, it's even well synchronised, but in any case we should just make it saner. Thanks for letting know
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 8833879d94ba..7d24fc98b306 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -11,11 +11,12 @@ #include <net/page_pool/helpers.h> #include <net/page_pool/memory_provider.h> #include <net/netlink.h> - -#include <trace/events/page_pool.h> +#include <net/netdev_rx_queue.h> #include <net/tcp.h> #include <net/rps.h> +#include <trace/events/page_pool.h> + #include <uapi/linux/io_uring.h> #include "io_uring.h" @@ -275,8 +276,34 @@ static void io_zcrx_drop_netdev(struct io_zcrx_ifq *ifq) spin_unlock(&ifq->lock); } +static void io_close_queue(struct io_zcrx_ifq *ifq) +{ + struct net_device *netdev; + netdevice_tracker netdev_tracker; + struct pp_memory_provider_params p = { + .mp_ops = &io_uring_pp_zc_ops, + .mp_priv = ifq, + }; + + if (ifq->if_rxq == -1) + return; + + spin_lock(&ifq->lock); + netdev = ifq->netdev; + netdev_tracker = ifq->netdev_tracker; + ifq->netdev = NULL; + spin_unlock(&ifq->lock); + + if (netdev) { + net_mp_close_rxq(netdev, ifq->if_rxq, &p); + netdev_put(netdev, &netdev_tracker); + } + ifq->if_rxq = -1; +} + static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) { + io_close_queue(ifq); io_zcrx_drop_netdev(ifq); if (ifq->area) @@ -291,6 +318,7 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) int io_register_zcrx_ifq(struct io_ring_ctx *ctx, struct io_uring_zcrx_ifq_reg __user *arg) { + struct pp_memory_provider_params mp_param = {}; struct io_uring_zcrx_area_reg area; struct io_uring_zcrx_ifq_reg reg; struct io_uring_region_desc rd; @@ -341,7 +369,6 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, goto err; ifq->rq_entries = reg.rq_entries; - ifq->if_rxq = reg.if_rxq; ret = -ENODEV; ifq->netdev = netdev_get_by_index(current->nsproxy->net_ns, reg.if_idx, @@ -358,16 +385,20 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, if (ret) goto err; + mp_param.mp_ops = &io_uring_pp_zc_ops; + mp_param.mp_priv = ifq; + ret = net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param); + if (ret) + goto err; + ifq->if_rxq = reg.if_rxq; + reg.offsets.rqes = sizeof(struct io_uring); reg.offsets.head = offsetof(struct io_uring, head); reg.offsets.tail = offsetof(struct io_uring, tail); if (copy_to_user(arg, ®, sizeof(reg)) || - copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd))) { - ret = -EFAULT; - goto err; - } - if (copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) { + copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd)) || + copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) { ret = -EFAULT; goto err; } @@ -444,6 +475,8 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx) if (ctx->ifq) io_zcrx_scrub(ctx->ifq); + + io_close_queue(ctx->ifq); } static inline u32 io_zcrx_rqring_entries(struct io_zcrx_ifq *ifq)