diff mbox series

[1/1] io_uring/net: introduce IORING_SEND_ZC_REPORT_USAGE flag

Message ID 8945b01756d902f5d5b0667f20b957ad3f742e5e.1666895626.git.metze@samba.org (mailing list archive)
State New
Headers show
Series [1/1] io_uring/net: introduce IORING_SEND_ZC_REPORT_USAGE flag | expand

Commit Message

Stefan Metzmacher Oct. 27, 2022, 6:34 p.m. UTC
It might be useful for applications to detect if a zero copy
transfer with SEND[MSG]_ZC was actually possible or not.
The application can fallback to plain SEND[MSG] in order
to avoid the overhead of two cqes per request.
Or it can generate a log message that could indicate
to an administrator that no zero copy was possible
and could explain degraded performance.

Link: https://lore.kernel.org/io-uring/fb6a7599-8a9b-15e5-9b64-6cd9d01c6ff4@gmail.com/T/#m2b0d9df94ce43b0e69e6c089bdff0ce6babbdfaa
Cc: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Stefan Metzmacher <metze@samba.org>
---
 include/uapi/linux/io_uring.h | 18 ++++++++++++++++++
 io_uring/net.c                |  6 +++++-
 io_uring/notif.c              | 12 ++++++++++++
 io_uring/notif.h              |  3 +++
 4 files changed, 38 insertions(+), 1 deletion(-)

Comments

Pavel Begunkov Oct. 28, 2022, 1:12 p.m. UTC | #1
On 10/27/22 19:34, Stefan Metzmacher wrote:
> It might be useful for applications to detect if a zero copy
> transfer with SEND[MSG]_ZC was actually possible or not.
> The application can fallback to plain SEND[MSG] in order
> to avoid the overhead of two cqes per request.
> Or it can generate a log message that could indicate
> to an administrator that no zero copy was possible
> and could explain degraded performance.

 From a quick look seems good, I'll test and double check
when I'm back on tuesday


> Link: https://lore.kernel.org/io-uring/fb6a7599-8a9b-15e5-9b64-6cd9d01c6ff4@gmail.com/T/#m2b0d9df94ce43b0e69e6c089bdff0ce6babbdfaa
> Cc: Pavel Begunkov <asml.silence@gmail.com>
> Signed-off-by: Stefan Metzmacher <metze@samba.org>
> ---
>   include/uapi/linux/io_uring.h | 18 ++++++++++++++++++
>   io_uring/net.c                |  6 +++++-
>   io_uring/notif.c              | 12 ++++++++++++
>   io_uring/notif.h              |  3 +++
>   4 files changed, 38 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index ab7458033ee3..423f98781a20 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -296,10 +296,28 @@ enum io_uring_op {
>    *
>    * IORING_RECVSEND_FIXED_BUF	Use registered buffers, the index is stored in
>    *				the buf_index field.
> + *
> + * IORING_SEND_ZC_REPORT_USAGE
> + *				If set, SEND[MSG]_ZC should report
> + *				the zerocopy usage in cqe.res
> + *				for the IORING_CQE_F_NOTIF cqe.
> + *				0 is reported if zerocopy was actually possible.
> + *				IORING_NOTIF_USAGE_ZC_COPIED if data was copied
> + *				(at least partially).
>    */
>   #define IORING_RECVSEND_POLL_FIRST	(1U << 0)
>   #define IORING_RECV_MULTISHOT		(1U << 1)
>   #define IORING_RECVSEND_FIXED_BUF	(1U << 2)
> +#define IORING_SEND_ZC_REPORT_USAGE	(1U << 3)
> +
> +/*
> + * cqe.res for IORING_CQE_F_NOTIF if
> + * IORING_SEND_ZC_REPORT_USAGE was requested
> + *
> + * It should be treated as a flag, all other
> + * bits of cqe.res should be treated as reserved!
> + */
> +#define IORING_NOTIF_USAGE_ZC_COPIED    (1U << 31)
>   
>   /*
>    * accept flags stored in sqe->ioprio
> diff --git a/io_uring/net.c b/io_uring/net.c
> index 15dea91625e2..0a8cdc5ae7af 100644
> --- a/io_uring/net.c
> +++ b/io_uring/net.c
> @@ -939,7 +939,8 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>   
>   	zc->flags = READ_ONCE(sqe->ioprio);
>   	if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST |
> -			  IORING_RECVSEND_FIXED_BUF))
> +			  IORING_RECVSEND_FIXED_BUF |
> +			  IORING_SEND_ZC_REPORT_USAGE))
>   		return -EINVAL;
>   	notif = zc->notif = io_alloc_notif(ctx);
>   	if (!notif)
> @@ -957,6 +958,9 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>   		req->imu = READ_ONCE(ctx->user_bufs[idx]);
>   		io_req_set_rsrc_node(notif, ctx, 0);
>   	}
> +	if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
> +		io_notif_to_data(notif)->zc_report = true;
> +	}
>   
>   	if (req->opcode == IORING_OP_SEND_ZC) {
>   		if (READ_ONCE(sqe->__pad3[0]))
> diff --git a/io_uring/notif.c b/io_uring/notif.c
> index e37c6569d82e..4bfef10161fa 100644
> --- a/io_uring/notif.c
> +++ b/io_uring/notif.c
> @@ -18,6 +18,10 @@ static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
>   		__io_unaccount_mem(ctx->user, nd->account_pages);
>   		nd->account_pages = 0;
>   	}
> +
> +	if (nd->zc_report && (nd->zc_copied || !nd->zc_used))
> +		notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED;
> +
>   	io_req_task_complete(notif, locked);
>   }
>   
> @@ -28,6 +32,13 @@ static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
>   	struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg);
>   	struct io_kiocb *notif = cmd_to_io_kiocb(nd);
>   
> +	if (nd->zc_report) {
> +		if (success && !nd->zc_used && skb)
> +			WRITE_ONCE(nd->zc_used, true);
> +		else if (!success && !nd->zc_copied)
> +			WRITE_ONCE(nd->zc_copied, true);
> +	}
> +
>   	if (refcount_dec_and_test(&uarg->refcnt)) {
>   		notif->io_task_work.func = __io_notif_complete_tw;
>   		io_req_task_work_add(notif);
> @@ -55,6 +66,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
>   	nd->account_pages = 0;
>   	nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
>   	nd->uarg.callback = io_uring_tx_zerocopy_callback;
> +	nd->zc_report = nd->zc_used = nd->zc_copied = false;
>   	refcount_set(&nd->uarg.refcnt, 1);
>   	return notif;
>   }
> diff --git a/io_uring/notif.h b/io_uring/notif.h
> index 5b4d710c8ca5..4ae696273c78 100644
> --- a/io_uring/notif.h
> +++ b/io_uring/notif.h
> @@ -13,6 +13,9 @@ struct io_notif_data {
>   	struct file		*file;
>   	struct ubuf_info	uarg;
>   	unsigned long		account_pages;
> +	bool			zc_report;
> +	bool			zc_used;
> +	bool			zc_copied;
>   };
>   
>   void io_notif_flush(struct io_kiocb *notif);
Pavel Begunkov Nov. 2, 2022, 12:33 p.m. UTC | #2
On 10/27/22 19:34, Stefan Metzmacher wrote:
> It might be useful for applications to detect if a zero copy
> transfer with SEND[MSG]_ZC was actually possible or not.
> The application can fallback to plain SEND[MSG] in order
> to avoid the overhead of two cqes per request.
> Or it can generate a log message that could indicate
> to an administrator that no zero copy was possible
> and could explain degraded performance.

Looks good,

Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>



> Link: https://lore.kernel.org/io-uring/fb6a7599-8a9b-15e5-9b64-6cd9d01c6ff4@gmail.com/T/#m2b0d9df94ce43b0e69e6c089bdff0ce6babbdfaa
> Cc: Pavel Begunkov <asml.silence@gmail.com>
> Signed-off-by: Stefan Metzmacher <metze@samba.org>
> ---
>   include/uapi/linux/io_uring.h | 18 ++++++++++++++++++
>   io_uring/net.c                |  6 +++++-
>   io_uring/notif.c              | 12 ++++++++++++
>   io_uring/notif.h              |  3 +++
>   4 files changed, 38 insertions(+), 1 deletion(-)
> 
> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
> index ab7458033ee3..423f98781a20 100644
> --- a/include/uapi/linux/io_uring.h
> +++ b/include/uapi/linux/io_uring.h
> @@ -296,10 +296,28 @@ enum io_uring_op {
>    *
>    * IORING_RECVSEND_FIXED_BUF	Use registered buffers, the index is stored in
>    *				the buf_index field.
> + *
> + * IORING_SEND_ZC_REPORT_USAGE
> + *				If set, SEND[MSG]_ZC should report
> + *				the zerocopy usage in cqe.res
> + *				for the IORING_CQE_F_NOTIF cqe.
> + *				0 is reported if zerocopy was actually possible.
> + *				IORING_NOTIF_USAGE_ZC_COPIED if data was copied
> + *				(at least partially).
>    */
>   #define IORING_RECVSEND_POLL_FIRST	(1U << 0)
>   #define IORING_RECV_MULTISHOT		(1U << 1)
>   #define IORING_RECVSEND_FIXED_BUF	(1U << 2)
> +#define IORING_SEND_ZC_REPORT_USAGE	(1U << 3)
> +
> +/*
> + * cqe.res for IORING_CQE_F_NOTIF if
> + * IORING_SEND_ZC_REPORT_USAGE was requested
> + *
> + * It should be treated as a flag, all other
> + * bits of cqe.res should be treated as reserved!
> + */
> +#define IORING_NOTIF_USAGE_ZC_COPIED    (1U << 31)
>   
>   /*
>    * accept flags stored in sqe->ioprio
> diff --git a/io_uring/net.c b/io_uring/net.c
> index 15dea91625e2..0a8cdc5ae7af 100644
> --- a/io_uring/net.c
> +++ b/io_uring/net.c
> @@ -939,7 +939,8 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>   
>   	zc->flags = READ_ONCE(sqe->ioprio);
>   	if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST |
> -			  IORING_RECVSEND_FIXED_BUF))
> +			  IORING_RECVSEND_FIXED_BUF |
> +			  IORING_SEND_ZC_REPORT_USAGE))
>   		return -EINVAL;
>   	notif = zc->notif = io_alloc_notif(ctx);
>   	if (!notif)
> @@ -957,6 +958,9 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
>   		req->imu = READ_ONCE(ctx->user_bufs[idx]);
>   		io_req_set_rsrc_node(notif, ctx, 0);
>   	}
> +	if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
> +		io_notif_to_data(notif)->zc_report = true;
> +	}
>   
>   	if (req->opcode == IORING_OP_SEND_ZC) {
>   		if (READ_ONCE(sqe->__pad3[0]))
> diff --git a/io_uring/notif.c b/io_uring/notif.c
> index e37c6569d82e..4bfef10161fa 100644
> --- a/io_uring/notif.c
> +++ b/io_uring/notif.c
> @@ -18,6 +18,10 @@ static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
>   		__io_unaccount_mem(ctx->user, nd->account_pages);
>   		nd->account_pages = 0;
>   	}
> +
> +	if (nd->zc_report && (nd->zc_copied || !nd->zc_used))
> +		notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED;
> +
>   	io_req_task_complete(notif, locked);
>   }
>   
> @@ -28,6 +32,13 @@ static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
>   	struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg);
>   	struct io_kiocb *notif = cmd_to_io_kiocb(nd);
>   
> +	if (nd->zc_report) {
> +		if (success && !nd->zc_used && skb)
> +			WRITE_ONCE(nd->zc_used, true);
> +		else if (!success && !nd->zc_copied)
> +			WRITE_ONCE(nd->zc_copied, true);
> +	}
> +
>   	if (refcount_dec_and_test(&uarg->refcnt)) {
>   		notif->io_task_work.func = __io_notif_complete_tw;
>   		io_req_task_work_add(notif);
> @@ -55,6 +66,7 @@ struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
>   	nd->account_pages = 0;
>   	nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
>   	nd->uarg.callback = io_uring_tx_zerocopy_callback;
> +	nd->zc_report = nd->zc_used = nd->zc_copied = false;
>   	refcount_set(&nd->uarg.refcnt, 1);
>   	return notif;
>   }
> diff --git a/io_uring/notif.h b/io_uring/notif.h
> index 5b4d710c8ca5..4ae696273c78 100644
> --- a/io_uring/notif.h
> +++ b/io_uring/notif.h
> @@ -13,6 +13,9 @@ struct io_notif_data {
>   	struct file		*file;
>   	struct ubuf_info	uarg;
>   	unsigned long		account_pages;
> +	bool			zc_report;
> +	bool			zc_used;
> +	bool			zc_copied;
>   };
>   
>   void io_notif_flush(struct io_kiocb *notif);
Jens Axboe Nov. 2, 2022, 2:02 p.m. UTC | #3
On Thu, 27 Oct 2022 20:34:45 +0200, Stefan Metzmacher wrote:
> It might be useful for applications to detect if a zero copy
> transfer with SEND[MSG]_ZC was actually possible or not.
> The application can fallback to plain SEND[MSG] in order
> to avoid the overhead of two cqes per request.
> Or it can generate a log message that could indicate
> to an administrator that no zero copy was possible
> and could explain degraded performance.
> 
> [...]

Applied, thanks!

[1/1] io_uring/net: introduce IORING_SEND_ZC_REPORT_USAGE flag
      commit: 4847a0eae62976ac27f192cd59b9de72b390eff3

Best regards,
diff mbox series

Patch

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ab7458033ee3..423f98781a20 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -296,10 +296,28 @@  enum io_uring_op {
  *
  * IORING_RECVSEND_FIXED_BUF	Use registered buffers, the index is stored in
  *				the buf_index field.
+ *
+ * IORING_SEND_ZC_REPORT_USAGE
+ *				If set, SEND[MSG]_ZC should report
+ *				the zerocopy usage in cqe.res
+ *				for the IORING_CQE_F_NOTIF cqe.
+ *				0 is reported if zerocopy was actually possible.
+ *				IORING_NOTIF_USAGE_ZC_COPIED if data was copied
+ *				(at least partially).
  */
 #define IORING_RECVSEND_POLL_FIRST	(1U << 0)
 #define IORING_RECV_MULTISHOT		(1U << 1)
 #define IORING_RECVSEND_FIXED_BUF	(1U << 2)
+#define IORING_SEND_ZC_REPORT_USAGE	(1U << 3)
+
+/*
+ * cqe.res for IORING_CQE_F_NOTIF if
+ * IORING_SEND_ZC_REPORT_USAGE was requested
+ *
+ * It should be treated as a flag, all other
+ * bits of cqe.res should be treated as reserved!
+ */
+#define IORING_NOTIF_USAGE_ZC_COPIED    (1U << 31)
 
 /*
  * accept flags stored in sqe->ioprio
diff --git a/io_uring/net.c b/io_uring/net.c
index 15dea91625e2..0a8cdc5ae7af 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -939,7 +939,8 @@  int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 	zc->flags = READ_ONCE(sqe->ioprio);
 	if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST |
-			  IORING_RECVSEND_FIXED_BUF))
+			  IORING_RECVSEND_FIXED_BUF |
+			  IORING_SEND_ZC_REPORT_USAGE))
 		return -EINVAL;
 	notif = zc->notif = io_alloc_notif(ctx);
 	if (!notif)
@@ -957,6 +958,9 @@  int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 		req->imu = READ_ONCE(ctx->user_bufs[idx]);
 		io_req_set_rsrc_node(notif, ctx, 0);
 	}
+	if (zc->flags & IORING_SEND_ZC_REPORT_USAGE) {
+		io_notif_to_data(notif)->zc_report = true;
+	}
 
 	if (req->opcode == IORING_OP_SEND_ZC) {
 		if (READ_ONCE(sqe->__pad3[0]))
diff --git a/io_uring/notif.c b/io_uring/notif.c
index e37c6569d82e..4bfef10161fa 100644
--- a/io_uring/notif.c
+++ b/io_uring/notif.c
@@ -18,6 +18,10 @@  static void __io_notif_complete_tw(struct io_kiocb *notif, bool *locked)
 		__io_unaccount_mem(ctx->user, nd->account_pages);
 		nd->account_pages = 0;
 	}
+
+	if (nd->zc_report && (nd->zc_copied || !nd->zc_used))
+		notif->cqe.res |= IORING_NOTIF_USAGE_ZC_COPIED;
+
 	io_req_task_complete(notif, locked);
 }
 
@@ -28,6 +32,13 @@  static void io_uring_tx_zerocopy_callback(struct sk_buff *skb,
 	struct io_notif_data *nd = container_of(uarg, struct io_notif_data, uarg);
 	struct io_kiocb *notif = cmd_to_io_kiocb(nd);
 
+	if (nd->zc_report) {
+		if (success && !nd->zc_used && skb)
+			WRITE_ONCE(nd->zc_used, true);
+		else if (!success && !nd->zc_copied)
+			WRITE_ONCE(nd->zc_copied, true);
+	}
+
 	if (refcount_dec_and_test(&uarg->refcnt)) {
 		notif->io_task_work.func = __io_notif_complete_tw;
 		io_req_task_work_add(notif);
@@ -55,6 +66,7 @@  struct io_kiocb *io_alloc_notif(struct io_ring_ctx *ctx)
 	nd->account_pages = 0;
 	nd->uarg.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN;
 	nd->uarg.callback = io_uring_tx_zerocopy_callback;
+	nd->zc_report = nd->zc_used = nd->zc_copied = false;
 	refcount_set(&nd->uarg.refcnt, 1);
 	return notif;
 }
diff --git a/io_uring/notif.h b/io_uring/notif.h
index 5b4d710c8ca5..4ae696273c78 100644
--- a/io_uring/notif.h
+++ b/io_uring/notif.h
@@ -13,6 +13,9 @@  struct io_notif_data {
 	struct file		*file;
 	struct ubuf_info	uarg;
 	unsigned long		account_pages;
+	bool			zc_report;
+	bool			zc_used;
+	bool			zc_copied;
 };
 
 void io_notif_flush(struct io_kiocb *notif);