diff mbox series

[v6,2/3] io_uring: add api to set / get napi configuration.

Message ID 20230201222254.744422-3-shr@devkernel.io (mailing list archive)
State Superseded
Headers show
Series io_uring: add napi busy polling support | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Stefan Roesch Feb. 1, 2023, 10:22 p.m. UTC
This adds an api to register the busy poll timeout from liburing. To be
able to use this functionality, the corresponding liburing patch is needed.

Signed-off-by: Stefan Roesch <shr@devkernel.io>
Acked-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/io_uring.h | 11 +++++++
 io_uring/io_uring.c           | 55 +++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

Comments

Ammar Faizi Feb. 1, 2023, 11:15 p.m. UTC | #1
On Wed, Feb 01, 2023 at 02:22:53PM -0800, Stefan Roesch wrote:
> +static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
> +{
> +#ifdef CONFIG_NET_RX_BUSY_POLL
> +	const struct io_uring_napi curr = {
> +		.busy_poll_to = ctx->napi_busy_poll_to,
> +	};
> +
> +	if (arg) {
> +		if (copy_to_user(arg, &curr, sizeof(curr)))
> +			return -EFAULT;
> +	}
> +
> +	WRITE_ONCE(ctx->napi_busy_poll_to, 0);
> +	return 0;
> +#else
> +	return -EINVAL;
> +#endif
> +}

Just to follow the common pattern when a feature is not enabled the
return value is -EOPNOTSUPP instead of -EINVAL. What do you think?

> +	case IORING_UNREGISTER_NAPI:
> +		ret = -EINVAL;
> +		if (!arg)
> +			break;
> +		ret = io_unregister_napi(ctx, arg);
> +		break;

This needs to be corrected. If the @arg var is NULL, you return -EINVAL.
So io_unregister_napi() will always have @arg != NULL. This @arg check
should go, allow the user to pass a NULL pointer to it.

Our previous agreement on this API is to allow the user to pass a NULL
pointer in case the user doesn't care about the old value.

Also, having a liburing test case that verifies this behavior would be
excellent.
Stefan Roesch Feb. 2, 2023, 6:47 p.m. UTC | #2
Ammar Faizi <ammarfaizi2@gnuweeb.org> writes:

> On Wed, Feb 01, 2023 at 02:22:53PM -0800, Stefan Roesch wrote:
>> +static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
>> +{
>> +#ifdef CONFIG_NET_RX_BUSY_POLL
>> +	const struct io_uring_napi curr = {
>> +		.busy_poll_to = ctx->napi_busy_poll_to,
>> +	};
>> +
>> +	if (arg) {
>> +		if (copy_to_user(arg, &curr, sizeof(curr)))
>> +			return -EFAULT;
>> +	}
>> +
>> +	WRITE_ONCE(ctx->napi_busy_poll_to, 0);
>> +	return 0;
>> +#else
>> +	return -EINVAL;
>> +#endif
>> +}
>
> Just to follow the common pattern when a feature is not enabled the
> return value is -EOPNOTSUPP instead of -EINVAL. What do you think?
>

Sounds good, I'll return -EOPNOTSUPP when CONFIG_NET_RX_BUSY_POLL is not
enabled. I'll make the change for the register and the unregister
function.

>> +	case IORING_UNREGISTER_NAPI:
>> +		ret = -EINVAL;
>> +		if (!arg)
>> +			break;
>> +		ret = io_unregister_napi(ctx, arg);
>> +		break;
>
> This needs to be corrected. If the @arg var is NULL, you return -EINVAL.
> So io_unregister_napi() will always have @arg != NULL. This @arg check
> should go, allow the user to pass a NULL pointer to it.
>
> Our previous agreement on this API is to allow the user to pass a NULL
> pointer in case the user doesn't care about the old value.
>
> Also, having a liburing test case that verifies this behavior would be
> excellent.
>

I'll remove the check for the arg parameter. In addition I will output
the busy poll timeout and the prefer busy poll setting in the client
example program if one of the settings has been enabled.
diff mbox series

Patch

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 2780bce62faf..fce4533c81c3 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -516,6 +516,10 @@  enum {
 	/* register a range of fixed file slots for automatic slot allocation */
 	IORING_REGISTER_FILE_ALLOC_RANGE	= 25,
 
+	/* set/clear busy poll settings */
+	IORING_REGISTER_NAPI			= 26,
+	IORING_UNREGISTER_NAPI			= 27,
+
 	/* this goes last */
 	IORING_REGISTER_LAST
 };
@@ -638,6 +642,13 @@  struct io_uring_buf_reg {
 	__u64	resv[3];
 };
 
+/* argument for IORING_(UN)REGISTER_NAPI */
+struct io_uring_napi {
+	__u32	busy_poll_to;
+	__u32	pad;
+	__u64	resv;
+};
+
 /*
  * io_uring_restriction->opcode values
  */
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 96062036db41..f2509f0afc7b 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -4246,6 +4246,49 @@  static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx,
 	return ret;
 }
 
+static int io_register_napi(struct io_ring_ctx *ctx, void __user *arg)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	const struct io_uring_napi curr = {
+		.busy_poll_to = ctx->napi_busy_poll_to,
+	};
+	struct io_uring_napi napi;
+
+	if (copy_from_user(&napi, arg, sizeof(napi)))
+		return -EFAULT;
+	if (napi.pad || napi.resv)
+		return -EINVAL;
+
+	WRITE_ONCE(ctx->napi_busy_poll_to, napi.busy_poll_to);
+
+	if (copy_to_user(arg, &curr, sizeof(curr)))
+		return -EFAULT;
+
+	return 0;
+#else
+	return -EINVAL;
+#endif
+}
+
+static int io_unregister_napi(struct io_ring_ctx *ctx, void __user *arg)
+{
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	const struct io_uring_napi curr = {
+		.busy_poll_to = ctx->napi_busy_poll_to,
+	};
+
+	if (arg) {
+		if (copy_to_user(arg, &curr, sizeof(curr)))
+			return -EFAULT;
+	}
+
+	WRITE_ONCE(ctx->napi_busy_poll_to, 0);
+	return 0;
+#else
+	return -EINVAL;
+#endif
+}
+
 static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			       void __user *arg, unsigned nr_args)
 	__releases(ctx->uring_lock)
@@ -4404,6 +4447,18 @@  static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
 			break;
 		ret = io_register_file_alloc_range(ctx, arg);
 		break;
+	case IORING_REGISTER_NAPI:
+		ret = -EINVAL;
+		if (!arg)
+			break;
+		ret = io_register_napi(ctx, arg);
+		break;
+	case IORING_UNREGISTER_NAPI:
+		ret = -EINVAL;
+		if (!arg)
+			break;
+		ret = io_unregister_napi(ctx, arg);
+		break;
 	default:
 		ret = -EINVAL;
 		break;