diff mbox series

[v14,07/11] io_uring/zcrx: set pp memory provider for an rx queue

Message ID 20250215000947.789731-8-dw@davidwei.uk (mailing list archive)
State Handled Elsewhere
Headers show
Series io_uring zero copy rx | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply, async

Commit Message

David Wei Feb. 15, 2025, 12:09 a.m. UTC
Set the page pool memory provider for the rx queue configured for zero
copy to io_uring. Then the rx queue is reset using
netdev_rx_queue_restart() and netdev core + page pool will take care of
filling the rx queue from the io_uring zero copy memory provider.

For now, there is only one ifq so its destruction happens implicitly
during io_uring cleanup.

Reviewed-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: David Wei <dw@davidwei.uk>
---
 io_uring/zcrx.c | 49 +++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 41 insertions(+), 8 deletions(-)

Comments

Kees Bakker Feb. 18, 2025, 7:40 p.m. UTC | #1
Op 15-02-2025 om 01:09 schreef David Wei:
> Set the page pool memory provider for the rx queue configured for zero
> copy to io_uring. Then the rx queue is reset using
> netdev_rx_queue_restart() and netdev core + page pool will take care of
> filling the rx queue from the io_uring zero copy memory provider.
>
> For now, there is only one ifq so its destruction happens implicitly
> during io_uring cleanup.
>
> Reviewed-by: Jens Axboe <axboe@kernel.dk>
> Signed-off-by: David Wei <dw@davidwei.uk>
> ---
>   io_uring/zcrx.c | 49 +++++++++++++++++++++++++++++++++++++++++--------
>   1 file changed, 41 insertions(+), 8 deletions(-)
>
> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
> index 8833879d94ba..7d24fc98b306 100644
> --- a/io_uring/zcrx.c
> +++ b/io_uring/zcrx.c
> [...]
> @@ -444,6 +475,8 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
>   
>   	if (ctx->ifq)
>   		io_zcrx_scrub(ctx->ifq);
> +
> +	io_close_queue(ctx->ifq);
If ctx->ifq is NULL (which seems to be not unlikely given the if 
statement above)
then you'll get a NULL pointer dereference in io_close_queue().
>   }
>   
>   static inline u32 io_zcrx_rqring_entries(struct io_zcrx_ifq *ifq)
David Wei Feb. 18, 2025, 10:06 p.m. UTC | #2
On 2025-02-18 11:40, Kees Bakker wrote:
> Op 15-02-2025 om 01:09 schreef David Wei:
>> Set the page pool memory provider for the rx queue configured for zero
>> copy to io_uring. Then the rx queue is reset using
>> netdev_rx_queue_restart() and netdev core + page pool will take care of
>> filling the rx queue from the io_uring zero copy memory provider.
>>
>> For now, there is only one ifq so its destruction happens implicitly
>> during io_uring cleanup.
>>
>> Reviewed-by: Jens Axboe <axboe@kernel.dk>
>> Signed-off-by: David Wei <dw@davidwei.uk>
>> ---
>>   io_uring/zcrx.c | 49 +++++++++++++++++++++++++++++++++++++++++--------
>>   1 file changed, 41 insertions(+), 8 deletions(-)
>>
>> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
>> index 8833879d94ba..7d24fc98b306 100644
>> --- a/io_uring/zcrx.c
>> +++ b/io_uring/zcrx.c
>> [...]
>> @@ -444,6 +475,8 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
>>         if (ctx->ifq)
>>           io_zcrx_scrub(ctx->ifq);
>> +
>> +    io_close_queue(ctx->ifq);
> If ctx->ifq is NULL (which seems to be not unlikely given the if statement above)
> then you'll get a NULL pointer dereference in io_close_queue().

The only caller of io_shutdown_zcrx_ifqs() is io_ring_exit_work() which
checks for ctx->ifq first. That does mean the ctx->ifq check is
redundant in this function though.

>>   }
>>     static inline u32 io_zcrx_rqring_entries(struct io_zcrx_ifq *ifq)
>
Pavel Begunkov Feb. 19, 2025, 9:54 a.m. UTC | #3
On 2/18/25 22:06, David Wei wrote:
> On 2025-02-18 11:40, Kees Bakker wrote:
>> Op 15-02-2025 om 01:09 schreef David Wei:
>>> Set the page pool memory provider for the rx queue configured for zero
>>> copy to io_uring. Then the rx queue is reset using
>>> netdev_rx_queue_restart() and netdev core + page pool will take care of
>>> filling the rx queue from the io_uring zero copy memory provider.
>>>
>>> For now, there is only one ifq so its destruction happens implicitly
>>> during io_uring cleanup.
>>>
>>> Reviewed-by: Jens Axboe <axboe@kernel.dk>
>>> Signed-off-by: David Wei <dw@davidwei.uk>
>>> ---
>>>    io_uring/zcrx.c | 49 +++++++++++++++++++++++++++++++++++++++++--------
>>>    1 file changed, 41 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
>>> index 8833879d94ba..7d24fc98b306 100644
>>> --- a/io_uring/zcrx.c
>>> +++ b/io_uring/zcrx.c
>>> [...]
>>> @@ -444,6 +475,8 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
>>>          if (ctx->ifq)
>>>            io_zcrx_scrub(ctx->ifq);
>>> +
>>> +    io_close_queue(ctx->ifq);
>> If ctx->ifq is NULL (which seems to be not unlikely given the if statement above)
>> then you'll get a NULL pointer dereference in io_close_queue().
> 
> The only caller of io_shutdown_zcrx_ifqs() is io_ring_exit_work() which
> checks for ctx->ifq first. That does mean the ctx->ifq check is
> redundant in this function though.

And despite the check being outside of the lock, it's even well
synchronised, but in any case we should just make it saner. Thanks
for letting know
diff mbox series

Patch

diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 8833879d94ba..7d24fc98b306 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -11,11 +11,12 @@ 
 #include <net/page_pool/helpers.h>
 #include <net/page_pool/memory_provider.h>
 #include <net/netlink.h>
-
-#include <trace/events/page_pool.h>
+#include <net/netdev_rx_queue.h>
 #include <net/tcp.h>
 #include <net/rps.h>
 
+#include <trace/events/page_pool.h>
+
 #include <uapi/linux/io_uring.h>
 
 #include "io_uring.h"
@@ -275,8 +276,34 @@  static void io_zcrx_drop_netdev(struct io_zcrx_ifq *ifq)
 	spin_unlock(&ifq->lock);
 }
 
+static void io_close_queue(struct io_zcrx_ifq *ifq)
+{
+	struct net_device *netdev;
+	netdevice_tracker netdev_tracker;
+	struct pp_memory_provider_params p = {
+		.mp_ops = &io_uring_pp_zc_ops,
+		.mp_priv = ifq,
+	};
+
+	if (ifq->if_rxq == -1)
+		return;
+
+	spin_lock(&ifq->lock);
+	netdev = ifq->netdev;
+	netdev_tracker = ifq->netdev_tracker;
+	ifq->netdev = NULL;
+	spin_unlock(&ifq->lock);
+
+	if (netdev) {
+		net_mp_close_rxq(netdev, ifq->if_rxq, &p);
+		netdev_put(netdev, &netdev_tracker);
+	}
+	ifq->if_rxq = -1;
+}
+
 static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
 {
+	io_close_queue(ifq);
 	io_zcrx_drop_netdev(ifq);
 
 	if (ifq->area)
@@ -291,6 +318,7 @@  static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq)
 int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 			  struct io_uring_zcrx_ifq_reg __user *arg)
 {
+	struct pp_memory_provider_params mp_param = {};
 	struct io_uring_zcrx_area_reg area;
 	struct io_uring_zcrx_ifq_reg reg;
 	struct io_uring_region_desc rd;
@@ -341,7 +369,6 @@  int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 		goto err;
 
 	ifq->rq_entries = reg.rq_entries;
-	ifq->if_rxq = reg.if_rxq;
 
 	ret = -ENODEV;
 	ifq->netdev = netdev_get_by_index(current->nsproxy->net_ns, reg.if_idx,
@@ -358,16 +385,20 @@  int io_register_zcrx_ifq(struct io_ring_ctx *ctx,
 	if (ret)
 		goto err;
 
+	mp_param.mp_ops = &io_uring_pp_zc_ops;
+	mp_param.mp_priv = ifq;
+	ret = net_mp_open_rxq(ifq->netdev, reg.if_rxq, &mp_param);
+	if (ret)
+		goto err;
+	ifq->if_rxq = reg.if_rxq;
+
 	reg.offsets.rqes = sizeof(struct io_uring);
 	reg.offsets.head = offsetof(struct io_uring, head);
 	reg.offsets.tail = offsetof(struct io_uring, tail);
 
 	if (copy_to_user(arg, &reg, sizeof(reg)) ||
-	    copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd))) {
-		ret = -EFAULT;
-		goto err;
-	}
-	if (copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) {
+	    copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd)) ||
+	    copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) {
 		ret = -EFAULT;
 		goto err;
 	}
@@ -444,6 +475,8 @@  void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx)
 
 	if (ctx->ifq)
 		io_zcrx_scrub(ctx->ifq);
+
+	io_close_queue(ctx->ifq);
 }
 
 static inline u32 io_zcrx_rqring_entries(struct io_zcrx_ifq *ifq)