diff mbox series

[v6,1/6] QIOChannel: Add io_writev_zero_copy & io_flush_zero_copy callbacks

Message ID 20211209093923.1293701-2-leobras@redhat.com (mailing list archive)
State New, archived
Headers show
Series MSG_ZEROCOPY + multifd | expand

Commit Message

Leonardo Bras Dec. 9, 2021, 9:39 a.m. UTC
Adds io_writev_zero_copy and io_flush_zero_copy as optional callback to QIOChannelClass,
allowing the implementation of zero copy writes by subclasses.

How to use them:
- Write data using qio_channel_writev_zero_copy(),
- Wait write completion with qio_channel_flush_zero_copy().

Notes:
As some zero copy implementations work asynchronously, it's
recommended to keep the write buffer untouched until the return of
qio_channel_flush_zero_copy(), to avoid the risk of sending an updated
buffer instead of the one at the write.

As the new callbacks are optional, if a subclass does not implement them, then:
- io_writev_zero_copy will return -1,
- io_flush_zero_copy will return 0 without changing anything.

Also, some functions like qio_channel_writev_full_all() were adapted to
receive a flag parameter. That allows shared code between zero copy and
non-zero copy writev, and also an easier implementation on new flags.

Signed-off-by: Leonardo Bras <leobras@redhat.com>
---
 include/io/channel.h | 98 +++++++++++++++++++++++++++++++++++++++-----
 io/channel.c         | 66 +++++++++++++++++++++++------
 2 files changed, 142 insertions(+), 22 deletions(-)

Comments

Daniel P. Berrangé Dec. 10, 2021, 12:15 p.m. UTC | #1
On Thu, Dec 09, 2021 at 06:39:19AM -0300, Leonardo Bras wrote:
> Adds io_writev_zero_copy and io_flush_zero_copy as optional callback to QIOChannelClass,
> allowing the implementation of zero copy writes by subclasses.
> 
> How to use them:
> - Write data using qio_channel_writev_zero_copy(),
> - Wait write completion with qio_channel_flush_zero_copy().
> 
> Notes:
> As some zero copy implementations work asynchronously, it's
> recommended to keep the write buffer untouched until the return of
> qio_channel_flush_zero_copy(), to avoid the risk of sending an updated
> buffer instead of the one at the write.
> 
> As the new callbacks are optional, if a subclass does not implement them, then:
> - io_writev_zero_copy will return -1,
> - io_flush_zero_copy will return 0 without changing anything.
> 
> Also, some functions like qio_channel_writev_full_all() were adapted to
> receive a flag parameter. That allows shared code between zero copy and
> non-zero copy writev, and also an easier implementation on new flags.
> 
> Signed-off-by: Leonardo Bras <leobras@redhat.com>
> ---
>  include/io/channel.h | 98 +++++++++++++++++++++++++++++++++++++++-----
>  io/channel.c         | 66 +++++++++++++++++++++++------
>  2 files changed, 142 insertions(+), 22 deletions(-)
> 
> diff --git a/include/io/channel.h b/include/io/channel.h
> index 88988979f8..83fa970a19 100644
> --- a/include/io/channel.h
> +++ b/include/io/channel.h
> @@ -32,12 +32,15 @@ OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
>  
>  #define QIO_CHANNEL_ERR_BLOCK -2
>  
> +#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
> +
>  typedef enum QIOChannelFeature QIOChannelFeature;
>  
>  enum QIOChannelFeature {
>      QIO_CHANNEL_FEATURE_FD_PASS,
>      QIO_CHANNEL_FEATURE_SHUTDOWN,
>      QIO_CHANNEL_FEATURE_LISTEN,
> +    QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
>  };
>  
>  
> @@ -136,6 +139,12 @@ struct QIOChannelClass {
>                                    IOHandler *io_read,
>                                    IOHandler *io_write,
>                                    void *opaque);
> +    ssize_t (*io_writev_zero_copy)(QIOChannel *ioc,
> +                                   const struct iovec *iov,
> +                                   size_t niov,
> +                                   Error **errp);
> +    int (*io_flush_zero_copy)(QIOChannel *ioc,
> +                              Error **errp);
>  };

I've still got the same feedback as previous iterations. It
does not make sense to having both separate callbacks / APIs
and also add flags to existing methods. It just solves thue
same problem twice which si redundant.

I had suggested separate callbacks originally because I
thought we would need to have different signature with
ability to get completions. We've done completions with
a separate API call though.

So the separate zero_copy methods aren't so compelling
as an idea, and we could just use flags only in
retrospect.


Regards,
Daniel
diff mbox series

Patch

diff --git a/include/io/channel.h b/include/io/channel.h
index 88988979f8..83fa970a19 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -32,12 +32,15 @@  OBJECT_DECLARE_TYPE(QIOChannel, QIOChannelClass,
 
 #define QIO_CHANNEL_ERR_BLOCK -2
 
+#define QIO_CHANNEL_WRITE_FLAG_ZERO_COPY 0x1
+
 typedef enum QIOChannelFeature QIOChannelFeature;
 
 enum QIOChannelFeature {
     QIO_CHANNEL_FEATURE_FD_PASS,
     QIO_CHANNEL_FEATURE_SHUTDOWN,
     QIO_CHANNEL_FEATURE_LISTEN,
+    QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY,
 };
 
 
@@ -136,6 +139,12 @@  struct QIOChannelClass {
                                   IOHandler *io_read,
                                   IOHandler *io_write,
                                   void *opaque);
+    ssize_t (*io_writev_zero_copy)(QIOChannel *ioc,
+                                   const struct iovec *iov,
+                                   size_t niov,
+                                   Error **errp);
+    int (*io_flush_zero_copy)(QIOChannel *ioc,
+                              Error **errp);
 };
 
 /* General I/O handling functions */
@@ -321,10 +330,11 @@  int qio_channel_readv_all(QIOChannel *ioc,
 
 
 /**
- * qio_channel_writev_all:
+ * qio_channel_writev_all_flags:
  * @ioc: the channel object
  * @iov: the array of memory regions to write data from
  * @niov: the length of the @iov array
+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
  * @errp: pointer to a NULL-initialized error object
  *
  * Write data to the IO channel, reading it from the
@@ -337,12 +347,23 @@  int qio_channel_readv_all(QIOChannel *ioc,
  * to be written, yielding from the current coroutine
  * if required.
  *
+ * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags,
+ * instead of waiting for all requested data to be written,
+ * this function will wait until it's all queued for writing.
+ * In this case, if the buffer gets changed between queueing and
+ * sending, the updated buffer will be sent. If this is not a
+ * desired behavior, it's suggested to call qio_channel_flush_zero_copy()
+ * before reusing the buffer.
+ *
  * Returns: 0 if all bytes were written, or -1 on error
  */
-int qio_channel_writev_all(QIOChannel *ioc,
-                           const struct iovec *iov,
-                           size_t niov,
-                           Error **erp);
+int qio_channel_writev_all_flags(QIOChannel *ioc,
+                                 const struct iovec *iov,
+                                 size_t niov,
+                                 int flags,
+                                 Error **errp);
+#define qio_channel_writev_all(ioc, iov, niov, errp) \
+    qio_channel_writev_all_flags(ioc, iov, niov, 0, errp)
 
 /**
  * qio_channel_readv:
@@ -831,12 +852,13 @@  int qio_channel_readv_full_all(QIOChannel *ioc,
                                Error **errp);
 
 /**
- * qio_channel_writev_full_all:
+ * qio_channel_writev_full_all_flags:
  * @ioc: the channel object
  * @iov: the array of memory regions to write data from
  * @niov: the length of the @iov array
  * @fds: an array of file handles to send
  * @nfds: number of file handles in @fds
+ * @flags: write flags (QIO_CHANNEL_WRITE_FLAG_*)
  * @errp: pointer to a NULL-initialized error object
  *
  *
@@ -846,13 +868,69 @@  int qio_channel_readv_full_all(QIOChannel *ioc,
  * to be written, yielding from the current coroutine
  * if required.
  *
+ * If QIO_CHANNEL_WRITE_FLAG_ZERO_COPY is passed in flags,
+ * instead of waiting for all requested data to be written,
+ * this function will wait until it's all queued for writing.
+ * In this case, if the buffer gets changed between queueing and
+ * sending, the updated buffer will be sent. If this is not a
+ * desired behavior, it's suggested to call qio_channel_flush_zero_copy()
+ * before reusing the buffer.
+ *
  * Returns: 0 if all bytes were written, or -1 on error
  */
 
-int qio_channel_writev_full_all(QIOChannel *ioc,
-                                const struct iovec *iov,
-                                size_t niov,
-                                int *fds, size_t nfds,
+int qio_channel_writev_full_all_flags(QIOChannel *ioc,
+                                      const struct iovec *iov,
+                                      size_t niov,
+                                      int *fds, size_t nfds,
+                                      int flags, Error **errp);
+#define qio_channel_writev_full_all(ioc, iov, niov, fds, nfds, errp) \
+    qio_channel_writev_full_all_flags(ioc, iov, niov, fds, nfds, 0, errp)
+
+/**
+ * qio_channel_writev_zero_copy:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to write data from
+ * @niov: the length of the @iov array
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Behaves like qio_channel_writev, but may write
+ * data asynchronously while avoiding unnecessary data copy.
+ * This function may return before any data is actually written,
+ * but will queue every buffer for writing.
+ *
+ * Some implementations require the buffer region to be locked,
+ * so if there is not enough locked memory available to the process
+ * this function will fail.
+ *
+ * If at some point it's necessary to wait for all data to be
+ * written, use qio_channel_flush_zero_copy().
+ *
+ * If zero copy is not available, returns -1 and set errp.
+ */
+
+ssize_t qio_channel_writev_zero_copy(QIOChannel *ioc,
+                                     const struct iovec *iov,
+                                     size_t niov,
+                                     Error **errp);
+
+/**
+ * qio_channel_flush_zero_copy:
+ * @ioc: the channel object
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Will block until every packet queued with
+ * qio_channel_writev_zero_copy() is sent, or return
+ * in case of any error.
+ *
+ * If not implemented, acts as a no-op, and returns 0.
+ *
+ * Returns -1 if any error is found,
+ *          1 if every send failed to use zero copy.
+ *          0 otherwise.
+ */
+
+int qio_channel_flush_zero_copy(QIOChannel *ioc,
                                 Error **errp);
 
 #endif /* QIO_CHANNEL_H */
diff --git a/io/channel.c b/io/channel.c
index e8b019dc36..d04a6772c8 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -212,19 +212,21 @@  int qio_channel_readv_full_all(QIOChannel *ioc,
     return ret;
 }
 
-int qio_channel_writev_all(QIOChannel *ioc,
-                           const struct iovec *iov,
-                           size_t niov,
-                           Error **errp)
+int qio_channel_writev_all_flags(QIOChannel *ioc,
+                                 const struct iovec *iov,
+                                 size_t niov,
+                                 int flags,
+                                 Error **errp)
 {
-    return qio_channel_writev_full_all(ioc, iov, niov, NULL, 0, errp);
+    return qio_channel_writev_full_all_flags(ioc, iov, niov, NULL, 0, flags,
+                                             errp);
 }
 
-int qio_channel_writev_full_all(QIOChannel *ioc,
-                                const struct iovec *iov,
-                                size_t niov,
-                                int *fds, size_t nfds,
-                                Error **errp)
+int qio_channel_writev_full_all_flags(QIOChannel *ioc,
+                                      const struct iovec *iov,
+                                      size_t niov,
+                                      int *fds, size_t nfds,
+                                      int flags, Error **errp)
 {
     int ret = -1;
     struct iovec *local_iov = g_new(struct iovec, niov);
@@ -237,8 +239,16 @@  int qio_channel_writev_full_all(QIOChannel *ioc,
 
     while (nlocal_iov > 0) {
         ssize_t len;
-        len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds,
-                                      errp);
+
+        if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
+            assert(fds == NULL && nfds == 0);
+            len = qio_channel_writev_zero_copy(ioc, local_iov, nlocal_iov,
+                                               errp);
+        } else {
+            len = qio_channel_writev_full(ioc, local_iov, nlocal_iov, fds, nfds,
+                                          errp);
+        }
+
         if (len == QIO_CHANNEL_ERR_BLOCK) {
             if (qemu_in_coroutine()) {
                 qio_channel_yield(ioc, G_IO_OUT);
@@ -474,6 +484,38 @@  off_t qio_channel_io_seek(QIOChannel *ioc,
 }
 
 
+ssize_t qio_channel_writev_zero_copy(QIOChannel *ioc,
+                                     const struct iovec *iov,
+                                     size_t niov,
+                                     Error **errp)
+{
+    QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
+
+    if (!klass->io_writev_zero_copy ||
+        !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
+        error_setg_errno(errp, EINVAL,
+                         "Channel does not support zero copy writev");
+        return -1;
+    }
+
+    return klass->io_writev_zero_copy(ioc, iov, niov, errp);
+}
+
+
+int qio_channel_flush_zero_copy(QIOChannel *ioc,
+                                Error **errp)
+{
+    QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
+
+    if (!klass->io_flush_zero_copy ||
+        !qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY)) {
+        return 0;
+    }
+
+    return klass->io_flush_zero_copy(ioc, errp);
+}
+
+
 static void qio_channel_restart_read(void *opaque)
 {
     QIOChannel *ioc = opaque;