diff mbox series

[04/11] io_uring: setup ZC for an RX queue when registering an ifq

Message ID 20230826011954.1801099-5-dw@davidwei.uk (mailing list archive)
State New
Headers show
Series Zero copy network RX using io_uring | expand

Commit Message

David Wei Aug. 26, 2023, 1:19 a.m. UTC
From: David Wei <davidhwei@meta.com>

This patch sets up ZC for an RX queue in a net device when an ifq is
registered with io_uring. The RX queue is specified in the registration
struct. The XDP command added in the previous patch is used to enable or
disable ZC RX.

For now since there is only one ifq, its destruction is implicit during
io_uring cleanup.

Signed-off-by: David Wei <davidhwei@meta.com>
Co-developed-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 io_uring/io_uring.c |  1 +
 io_uring/zc_rx.c    | 62 +++++++++++++++++++++++++++++++++++++++++++--
 io_uring/zc_rx.h    |  1 +
 3 files changed, 62 insertions(+), 2 deletions(-)

Comments

David Ahern Aug. 26, 2023, 2:26 a.m. UTC | #1
On 8/25/23 6:19 PM, David Wei wrote:
> diff --git a/io_uring/zc_rx.c b/io_uring/zc_rx.c
> index 6c57c9b06e05..8cc66731af5b 100644
> --- a/io_uring/zc_rx.c
> +++ b/io_uring/zc_rx.c
> @@ -10,6 +11,35 @@
>  #include "kbuf.h"
>  #include "zc_rx.h"
>  
> +typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
> +
> +static int __io_queue_mgmt(struct net_device *dev, struct io_zc_rx_ifq *ifq,
> +			   u16 queue_id)
> +{
> +	struct netdev_bpf cmd;
> +	bpf_op_t ndo_bpf;
> +
> +	ndo_bpf = dev->netdev_ops->ndo_bpf;

This is not bpf related, so it seems wrong to be overloading this ndo.


> +	if (!ndo_bpf)
> +		return -EINVAL;
> +
> +	cmd.command = XDP_SETUP_ZC_RX;
> +	cmd.zc_rx.ifq = ifq;
> +	cmd.zc_rx.queue_id = queue_id;
> +
> +	return ndo_bpf(dev, &cmd);
> +}
> +
> +static int io_open_zc_rxq(struct io_zc_rx_ifq *ifq)
> +{
> +	return __io_queue_mgmt(ifq->dev, ifq, ifq->if_rxq_id);
> +}
> +
> +static int io_close_zc_rxq(struct io_zc_rx_ifq *ifq)
> +{
> +	return __io_queue_mgmt(ifq->dev, NULL, ifq->if_rxq_id);
> +}
> +
>  static struct io_zc_rx_ifq *io_zc_rx_ifq_alloc(struct io_ring_ctx *ctx)
>  {
>  	struct io_zc_rx_ifq *ifq;
> @@ -19,12 +49,17 @@ static struct io_zc_rx_ifq *io_zc_rx_ifq_alloc(struct io_ring_ctx *ctx)
>  		return NULL;
>  
>  	ifq->ctx = ctx;
> +	ifq->if_rxq_id = -1;
>  
>  	return ifq;
>  }
>  
>  static void io_zc_rx_ifq_free(struct io_zc_rx_ifq *ifq)
>  {
> +	if (ifq->if_rxq_id != -1)
> +		io_close_zc_rxq(ifq);
> +	if (ifq->dev)
> +		dev_put(ifq->dev);
>  	io_free_rbuf_ring(ifq);
>  	kfree(ifq);
>  }
> @@ -41,17 +76,22 @@ int io_register_zc_rx_ifq(struct io_ring_ctx *ctx,
>  		return -EFAULT;
>  	if (ctx->ifq)
>  		return -EBUSY;
> +	if (reg.if_rxq_id == -1)
> +		return -EINVAL;
>  
>  	ifq = io_zc_rx_ifq_alloc(ctx);
>  	if (!ifq)
>  		return -ENOMEM;
>  
> -	/* TODO: initialise network interface */
> -
>  	ret = io_allocate_rbuf_ring(ifq, &reg);
>  	if (ret)
>  		goto err;
>  
> +	ret = -ENODEV;
> +	ifq->dev = dev_get_by_index(&init_net, reg.if_idx);

What's the plan for other namespaces? Ideally the device bind comes from
a socket and that gives you the namespace.

> +	if (!ifq->dev)
> +		goto err;
> +
>  	/* TODO: map zc region and initialise zc pool */
>  
>  	ifq->rq_entries = reg.rq_entries;
David Wei Aug. 26, 2023, 10 p.m. UTC | #2
On 25/08/2023 19:26, David Ahern wrote:
> On 8/25/23 6:19 PM, David Wei wrote:
>> diff --git a/io_uring/zc_rx.c b/io_uring/zc_rx.c
>> index 6c57c9b06e05..8cc66731af5b 100644
>> --- a/io_uring/zc_rx.c
>> +++ b/io_uring/zc_rx.c
>> @@ -10,6 +11,35 @@
>>  #include "kbuf.h"
>>  #include "zc_rx.h"
>>  
>> +typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
>> +
>> +static int __io_queue_mgmt(struct net_device *dev, struct io_zc_rx_ifq *ifq,
>> +			   u16 queue_id)
>> +{
>> +	struct netdev_bpf cmd;
>> +	bpf_op_t ndo_bpf;
>> +
>> +	ndo_bpf = dev->netdev_ops->ndo_bpf;
> 
> This is not bpf related, so it seems wrong to be overloading this ndo.

Agreed. I believe the original author (Jonathan) was inspired by
XDP_SETUP_XSK_POOL. I would also prefer a better way of setting up an RX queue
for ZC. Mina's proposal I think uses netlink for this.

Do you have any other suggestions? We want to keep resource registration
inline, so we need to be able to call it from within io_uring.

> 
> 
>> +	if (!ndo_bpf)
>> +		return -EINVAL;
>> +
>> +	cmd.command = XDP_SETUP_ZC_RX;
>> +	cmd.zc_rx.ifq = ifq;
>> +	cmd.zc_rx.queue_id = queue_id;
>> +
>> +	return ndo_bpf(dev, &cmd);
>> +}
>> +
>> +static int io_open_zc_rxq(struct io_zc_rx_ifq *ifq)
>> +{
>> +	return __io_queue_mgmt(ifq->dev, ifq, ifq->if_rxq_id);
>> +}
>> +
>> +static int io_close_zc_rxq(struct io_zc_rx_ifq *ifq)
>> +{
>> +	return __io_queue_mgmt(ifq->dev, NULL, ifq->if_rxq_id);
>> +}
>> +
>>  static struct io_zc_rx_ifq *io_zc_rx_ifq_alloc(struct io_ring_ctx *ctx)
>>  {
>>  	struct io_zc_rx_ifq *ifq;
>> @@ -19,12 +49,17 @@ static struct io_zc_rx_ifq *io_zc_rx_ifq_alloc(struct io_ring_ctx *ctx)
>>  		return NULL;
>>  
>>  	ifq->ctx = ctx;
>> +	ifq->if_rxq_id = -1;
>>  
>>  	return ifq;
>>  }
>>  
>>  static void io_zc_rx_ifq_free(struct io_zc_rx_ifq *ifq)
>>  {
>> +	if (ifq->if_rxq_id != -1)
>> +		io_close_zc_rxq(ifq);
>> +	if (ifq->dev)
>> +		dev_put(ifq->dev);
>>  	io_free_rbuf_ring(ifq);
>>  	kfree(ifq);
>>  }
>> @@ -41,17 +76,22 @@ int io_register_zc_rx_ifq(struct io_ring_ctx *ctx,
>>  		return -EFAULT;
>>  	if (ctx->ifq)
>>  		return -EBUSY;
>> +	if (reg.if_rxq_id == -1)
>> +		return -EINVAL;
>>  
>>  	ifq = io_zc_rx_ifq_alloc(ctx);
>>  	if (!ifq)
>>  		return -ENOMEM;
>>  
>> -	/* TODO: initialise network interface */
>> -
>>  	ret = io_allocate_rbuf_ring(ifq, &reg);
>>  	if (ret)
>>  		goto err;
>>  
>> +	ret = -ENODEV;
>> +	ifq->dev = dev_get_by_index(&init_net, reg.if_idx);
> 
> What's the plan for other namespaces? Ideally the device bind comes from
> a socket and that gives you the namespace.

Sorry, I did not consider namespaces yet. I'll look into how namespaces work
and then get back to you.

> 
>> +	if (!ifq->dev)
>> +		goto err;
>> +
>>  	/* TODO: map zc region and initialise zc pool */
>>  
>>  	ifq->rq_entries = reg.rq_entries;
> 
>
diff mbox series

Patch

diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 0b6c5508b1ca..f2ec0a454307 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3098,6 +3098,7 @@  static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 	percpu_ref_kill(&ctx->refs);
 	xa_for_each(&ctx->personalities, index, creds)
 		io_unregister_personality(ctx, index);
+	io_unregister_zc_rx_ifq(ctx);
 	if (ctx->rings)
 		io_poll_remove_all(ctx, NULL, true);
 	mutex_unlock(&ctx->uring_lock);
diff --git a/io_uring/zc_rx.c b/io_uring/zc_rx.c
index 6c57c9b06e05..8cc66731af5b 100644
--- a/io_uring/zc_rx.c
+++ b/io_uring/zc_rx.c
@@ -3,6 +3,7 @@ 
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/io_uring.h>
+#include <linux/netdevice.h>
 
 #include <uapi/linux/io_uring.h>
 
@@ -10,6 +11,35 @@ 
 #include "kbuf.h"
 #include "zc_rx.h"
 
+typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf);
+
+static int __io_queue_mgmt(struct net_device *dev, struct io_zc_rx_ifq *ifq,
+			   u16 queue_id)
+{
+	struct netdev_bpf cmd;
+	bpf_op_t ndo_bpf;
+
+	ndo_bpf = dev->netdev_ops->ndo_bpf;
+	if (!ndo_bpf)
+		return -EINVAL;
+
+	cmd.command = XDP_SETUP_ZC_RX;
+	cmd.zc_rx.ifq = ifq;
+	cmd.zc_rx.queue_id = queue_id;
+
+	return ndo_bpf(dev, &cmd);
+}
+
+static int io_open_zc_rxq(struct io_zc_rx_ifq *ifq)
+{
+	return __io_queue_mgmt(ifq->dev, ifq, ifq->if_rxq_id);
+}
+
+static int io_close_zc_rxq(struct io_zc_rx_ifq *ifq)
+{
+	return __io_queue_mgmt(ifq->dev, NULL, ifq->if_rxq_id);
+}
+
 static struct io_zc_rx_ifq *io_zc_rx_ifq_alloc(struct io_ring_ctx *ctx)
 {
 	struct io_zc_rx_ifq *ifq;
@@ -19,12 +49,17 @@  static struct io_zc_rx_ifq *io_zc_rx_ifq_alloc(struct io_ring_ctx *ctx)
 		return NULL;
 
 	ifq->ctx = ctx;
+	ifq->if_rxq_id = -1;
 
 	return ifq;
 }
 
 static void io_zc_rx_ifq_free(struct io_zc_rx_ifq *ifq)
 {
+	if (ifq->if_rxq_id != -1)
+		io_close_zc_rxq(ifq);
+	if (ifq->dev)
+		dev_put(ifq->dev);
 	io_free_rbuf_ring(ifq);
 	kfree(ifq);
 }
@@ -41,17 +76,22 @@  int io_register_zc_rx_ifq(struct io_ring_ctx *ctx,
 		return -EFAULT;
 	if (ctx->ifq)
 		return -EBUSY;
+	if (reg.if_rxq_id == -1)
+		return -EINVAL;
 
 	ifq = io_zc_rx_ifq_alloc(ctx);
 	if (!ifq)
 		return -ENOMEM;
 
-	/* TODO: initialise network interface */
-
 	ret = io_allocate_rbuf_ring(ifq, &reg);
 	if (ret)
 		goto err;
 
+	ret = -ENODEV;
+	ifq->dev = dev_get_by_index(&init_net, reg.if_idx);
+	if (!ifq->dev)
+		goto err;
+
 	/* TODO: map zc region and initialise zc pool */
 
 	ifq->rq_entries = reg.rq_entries;
@@ -59,6 +99,10 @@  int io_register_zc_rx_ifq(struct io_ring_ctx *ctx,
 	ifq->if_rxq_id = reg.if_rxq_id;
 	ctx->ifq = ifq;
 
+	ret = io_open_zc_rxq(ifq);
+	if (ret)
+		goto err;
+
 	ring_sz = sizeof(struct io_rbuf_ring);
 	rqes_sz = sizeof(struct io_uring_rbuf_rqe) * ifq->rq_entries;
 	cqes_sz = sizeof(struct io_uring_rbuf_cqe) * ifq->cq_entries;
@@ -80,3 +124,17 @@  int io_register_zc_rx_ifq(struct io_ring_ctx *ctx,
 	io_zc_rx_ifq_free(ifq);
 	return ret;
 }
+
+int io_unregister_zc_rx_ifq(struct io_ring_ctx *ctx)
+{
+	struct io_zc_rx_ifq *ifq;
+
+	ifq = ctx->ifq;
+	if (!ifq)
+		return -EINVAL;
+
+	ctx->ifq = NULL;
+	io_zc_rx_ifq_free(ifq);
+
+	return 0;
+}
diff --git a/io_uring/zc_rx.h b/io_uring/zc_rx.h
index 4363734f3d98..340ececa9f9c 100644
--- a/io_uring/zc_rx.h
+++ b/io_uring/zc_rx.h
@@ -17,5 +17,6 @@  struct io_zc_rx_ifq {
 
 int io_register_zc_rx_ifq(struct io_ring_ctx *ctx,
 			  struct io_uring_zc_rx_ifq_reg __user *arg);
+int io_unregister_zc_rx_ifq(struct io_ring_ctx *ctx);
 
 #endif