diff mbox series

migration: check magic value for deciding the mapping of channels

Message ID 20221101143029.14246-1-manish.mishra@nutanix.com (mailing list archive)
State New, archived
Headers show
Series migration: check magic value for deciding the mapping of channels | expand

Commit Message

manish.mishra Nov. 1, 2022, 2:30 p.m. UTC
Current logic assumes that channel connections on the destination side are
always established in the same order as the source and the first one will
always be the default channel followed by the multifid or post-copy
preemption channel. This may not be always true, as even if a channel has a
connection established on the source side it can be in the pending state on
the destination side and a newer connection can be established first.
Basically causing out of order mapping of channels on the destination side.
Currently, all channels except post-copy preempt send a magic number, this
patch uses that magic number to decide the type of channel. This logic is
applicable only for precopy(multifd) live migration, as mentioned, the
post-copy preempt channel does not send any magic number. Also, this patch
uses MSG_PEEK to check the magic number of channels so that current
data/control stream management remains un-effected.

Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
---
 include/io/channel.h     | 25 +++++++++++++++++++++++++
 io/channel-socket.c      | 27 +++++++++++++++++++++++++++
 io/channel.c             | 39 +++++++++++++++++++++++++++++++++++++++
 migration/migration.c    | 33 +++++++++++++++++++++------------
 migration/multifd.c      | 12 ++++--------
 migration/multifd.h      |  2 +-
 migration/postcopy-ram.c |  5 +----
 migration/postcopy-ram.h |  2 +-
 8 files changed, 119 insertions(+), 26 deletions(-)

Comments

Daniel P. Berrangé Nov. 1, 2022, 2:51 p.m. UTC | #1
On Tue, Nov 01, 2022 at 02:30:29PM +0000, manish.mishra wrote:
> Current logic assumes that channel connections on the destination side are
> always established in the same order as the source and the first one will
> always be the default channel followed by the multifid or post-copy
> preemption channel. This may not be always true, as even if a channel has a
> connection established on the source side it can be in the pending state on
> the destination side and a newer connection can be established first.
> Basically causing out of order mapping of channels on the destination side.
> Currently, all channels except post-copy preempt send a magic number, this
> patch uses that magic number to decide the type of channel. This logic is
> applicable only for precopy(multifd) live migration, as mentioned, the
> post-copy preempt channel does not send any magic number. Also, this patch
> uses MSG_PEEK to check the magic number of channels so that current
> data/control stream management remains un-effected.
> 
> Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
> ---
>  include/io/channel.h     | 25 +++++++++++++++++++++++++
>  io/channel-socket.c      | 27 +++++++++++++++++++++++++++
>  io/channel.c             | 39 +++++++++++++++++++++++++++++++++++++++
>  migration/migration.c    | 33 +++++++++++++++++++++------------
>  migration/multifd.c      | 12 ++++--------
>  migration/multifd.h      |  2 +-
>  migration/postcopy-ram.c |  5 +----
>  migration/postcopy-ram.h |  2 +-
>  8 files changed, 119 insertions(+), 26 deletions(-)
> 
> diff --git a/include/io/channel.h b/include/io/channel.h
> index c680ee7480..74177aeeea 100644
> --- a/include/io/channel.h
> +++ b/include/io/channel.h
> @@ -115,6 +115,10 @@ struct QIOChannelClass {
>                          int **fds,
>                          size_t *nfds,
>                          Error **errp);
> +    ssize_t (*io_read_peek)(QIOChannel *ioc,
> +                            void *buf,
> +                            size_t nbytes,
> +                            Error **errp);
>      int (*io_close)(QIOChannel *ioc,
>                      Error **errp);
>      GSource * (*io_create_watch)(QIOChannel *ioc,
> @@ -475,6 +479,27 @@ int qio_channel_write_all(QIOChannel *ioc,
>                            size_t buflen,
>                            Error **errp);
>  
> +/**
> + * qio_channel_read_peek_all:
> + * @ioc: the channel object
> + * @buf: the memory region to read in data
> + * @nbytes: the number of bytes to read
> + * @errp: pointer to a NULL-initialized error object
> + *
> + * Read given @nbytes data from peek of channel into
> + * memory region @buf.
> + *
> + * The function will be blocked until read size is
> + * equal to requested size.
> + *
> + * Returns: 1 if all bytes were read, 0 if end-of-file
> + *          occurs without data, or -1 on error
> + */
> +int qio_channel_read_peek_all(QIOChannel *ioc,
> +                              void* buf,
> +                              size_t nbytes,
> +                              Error **errp);
> +
>  /**
>   * qio_channel_set_blocking:
>   * @ioc: the channel object
> diff --git a/io/channel-socket.c b/io/channel-socket.c
> index b76dca9cc1..b99f5dfda6 100644
> --- a/io/channel-socket.c
> +++ b/io/channel-socket.c
> @@ -705,6 +705,32 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
>  }
>  #endif /* WIN32 */
>  
> +static ssize_t qio_channel_socket_read_peek(QIOChannel *ioc,
> +                                            void *buf,
> +                                            size_t nbytes,
> +                                            Error **errp)
> +{
> +    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
> +    ssize_t bytes = 0;
> +
> +retry:
> +    bytes = recv(sioc->fd, buf, nbytes, MSG_PEEK);
> +
> +    if (bytes < 0) {
> +        if (errno == EINTR) {
> +            goto retry;
> +        }
> +        if (errno == EAGAIN) {
> +            return QIO_CHANNEL_ERR_BLOCK;
> +        }
> +
> +        error_setg_errno(errp, errno,
> +                         "Unable to read from peek of socket");
> +        return -1;
> +    }
> +
> +    return bytes;
> +}
>  
>  #ifdef QEMU_MSG_ZEROCOPY
>  static int qio_channel_socket_flush(QIOChannel *ioc,
> @@ -902,6 +928,7 @@ static void qio_channel_socket_class_init(ObjectClass *klass,
>  
>      ioc_klass->io_writev = qio_channel_socket_writev;
>      ioc_klass->io_readv = qio_channel_socket_readv;
> +    ioc_klass->io_read_peek = qio_channel_socket_read_peek;
>      ioc_klass->io_set_blocking = qio_channel_socket_set_blocking;
>      ioc_klass->io_close = qio_channel_socket_close;
>      ioc_klass->io_shutdown = qio_channel_socket_shutdown;
> diff --git a/io/channel.c b/io/channel.c
> index 0640941ac5..a2d9b96f3f 100644
> --- a/io/channel.c
> +++ b/io/channel.c
> @@ -346,6 +346,45 @@ int qio_channel_write_all(QIOChannel *ioc,
>      return qio_channel_writev_all(ioc, &iov, 1, errp);
>  }
>  
> +int qio_channel_read_peek_all(QIOChannel *ioc,
> +                              void* buf,
> +                              size_t nbytes,
> +                              Error **errp)
> +{
> +   QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
> +   ssize_t bytes = 0;
> +
> +   if (!klass->io_read_peek) {
> +       error_setg(errp, "Channel does not support read peek");
> +       return -1;
> +   }

There's no io_read_peek for  QIOChannelTLS, so we'll hit this
error...


> diff --git a/migration/migration.c b/migration/migration.c
> index 739bb683f3..f4b6f278a9 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -733,31 +733,40 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
>  {
>      MigrationIncomingState *mis = migration_incoming_get_current();
>      Error *local_err = NULL;
> -    bool start_migration;
>      QEMUFile *f;
> +    bool default_channel = true;
> +    uint32_t channel_magic = 0;
> +    int ret = 0;
>  
> -    if (!mis->from_src_file) {
> -        /* The first connection (multifd may have multiple) */
> +    if (migrate_use_multifd() && !migration_in_postcopy()) {
> +        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
> +                                        sizeof(channel_magic), &local_err);
> +
> +        if (ret != 1) {
> +            error_propagate(errp, local_err);
> +            return;
> +        }

....and thus this will fail for TLS channels AFAICT.


> +
> +        default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
> +    } else {
> +        default_channel = !mis->from_src_file;
> +    }

With regards,
Daniel
manish.mishra Nov. 1, 2022, 3:40 p.m. UTC | #2
On 01/11/22 8:21 pm, Daniel P. Berrangé wrote:
> On Tue, Nov 01, 2022 at 02:30:29PM +0000, manish.mishra wrote:
>> Current logic assumes that channel connections on the destination side are
>> always established in the same order as the source and the first one will
>> always be the default channel followed by the multifid or post-copy
>> preemption channel. This may not be always true, as even if a channel has a
>> connection established on the source side it can be in the pending state on
>> the destination side and a newer connection can be established first.
>> Basically causing out of order mapping of channels on the destination side.
>> Currently, all channels except post-copy preempt send a magic number, this
>> patch uses that magic number to decide the type of channel. This logic is
>> applicable only for precopy(multifd) live migration, as mentioned, the
>> post-copy preempt channel does not send any magic number. Also, this patch
>> uses MSG_PEEK to check the magic number of channels so that current
>> data/control stream management remains un-effected.
>>
>> Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
>> ---
>>   include/io/channel.h     | 25 +++++++++++++++++++++++++
>>   io/channel-socket.c      | 27 +++++++++++++++++++++++++++
>>   io/channel.c             | 39 +++++++++++++++++++++++++++++++++++++++
>>   migration/migration.c    | 33 +++++++++++++++++++++------------
>>   migration/multifd.c      | 12 ++++--------
>>   migration/multifd.h      |  2 +-
>>   migration/postcopy-ram.c |  5 +----
>>   migration/postcopy-ram.h |  2 +-
>>   8 files changed, 119 insertions(+), 26 deletions(-)
>>
>> diff --git a/include/io/channel.h b/include/io/channel.h
>> index c680ee7480..74177aeeea 100644
>> --- a/include/io/channel.h
>> +++ b/include/io/channel.h
>> @@ -115,6 +115,10 @@ struct QIOChannelClass {
>>                           int **fds,
>>                           size_t *nfds,
>>                           Error **errp);
>> +    ssize_t (*io_read_peek)(QIOChannel *ioc,
>> +                            void *buf,
>> +                            size_t nbytes,
>> +                            Error **errp);
>>       int (*io_close)(QIOChannel *ioc,
>>                       Error **errp);
>>       GSource * (*io_create_watch)(QIOChannel *ioc,
>> @@ -475,6 +479,27 @@ int qio_channel_write_all(QIOChannel *ioc,
>>                             size_t buflen,
>>                             Error **errp);
>>   
>> +/**
>> + * qio_channel_read_peek_all:
>> + * @ioc: the channel object
>> + * @buf: the memory region to read in data
>> + * @nbytes: the number of bytes to read
>> + * @errp: pointer to a NULL-initialized error object
>> + *
>> + * Read given @nbytes data from peek of channel into
>> + * memory region @buf.
>> + *
>> + * The function will be blocked until read size is
>> + * equal to requested size.
>> + *
>> + * Returns: 1 if all bytes were read, 0 if end-of-file
>> + *          occurs without data, or -1 on error
>> + */
>> +int qio_channel_read_peek_all(QIOChannel *ioc,
>> +                              void* buf,
>> +                              size_t nbytes,
>> +                              Error **errp);
>> +
>>   /**
>>    * qio_channel_set_blocking:
>>    * @ioc: the channel object
>> diff --git a/io/channel-socket.c b/io/channel-socket.c
>> index b76dca9cc1..b99f5dfda6 100644
>> --- a/io/channel-socket.c
>> +++ b/io/channel-socket.c
>> @@ -705,6 +705,32 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
>>   }
>>   #endif /* WIN32 */
>>   
>> +static ssize_t qio_channel_socket_read_peek(QIOChannel *ioc,
>> +                                            void *buf,
>> +                                            size_t nbytes,
>> +                                            Error **errp)
>> +{
>> +    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
>> +    ssize_t bytes = 0;
>> +
>> +retry:
>> +    bytes = recv(sioc->fd, buf, nbytes, MSG_PEEK);
>> +
>> +    if (bytes < 0) {
>> +        if (errno == EINTR) {
>> +            goto retry;
>> +        }
>> +        if (errno == EAGAIN) {
>> +            return QIO_CHANNEL_ERR_BLOCK;
>> +        }
>> +
>> +        error_setg_errno(errp, errno,
>> +                         "Unable to read from peek of socket");
>> +        return -1;
>> +    }
>> +
>> +    return bytes;
>> +}
>>   
>>   #ifdef QEMU_MSG_ZEROCOPY
>>   static int qio_channel_socket_flush(QIOChannel *ioc,
>> @@ -902,6 +928,7 @@ static void qio_channel_socket_class_init(ObjectClass *klass,
>>   
>>       ioc_klass->io_writev = qio_channel_socket_writev;
>>       ioc_klass->io_readv = qio_channel_socket_readv;
>> +    ioc_klass->io_read_peek = qio_channel_socket_read_peek;
>>       ioc_klass->io_set_blocking = qio_channel_socket_set_blocking;
>>       ioc_klass->io_close = qio_channel_socket_close;
>>       ioc_klass->io_shutdown = qio_channel_socket_shutdown;
>> diff --git a/io/channel.c b/io/channel.c
>> index 0640941ac5..a2d9b96f3f 100644
>> --- a/io/channel.c
>> +++ b/io/channel.c
>> @@ -346,6 +346,45 @@ int qio_channel_write_all(QIOChannel *ioc,
>>       return qio_channel_writev_all(ioc, &iov, 1, errp);
>>   }
>>   
>> +int qio_channel_read_peek_all(QIOChannel *ioc,
>> +                              void* buf,
>> +                              size_t nbytes,
>> +                              Error **errp)
>> +{
>> +   QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
>> +   ssize_t bytes = 0;
>> +
>> +   if (!klass->io_read_peek) {
>> +       error_setg(errp, "Channel does not support read peek");
>> +       return -1;
>> +   }
> There's no io_read_peek for  QIOChannelTLS, so we'll hit this
> error...
>
>
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 739bb683f3..f4b6f278a9 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -733,31 +733,40 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
>>   {
>>       MigrationIncomingState *mis = migration_incoming_get_current();
>>       Error *local_err = NULL;
>> -    bool start_migration;
>>       QEMUFile *f;
>> +    bool default_channel = true;
>> +    uint32_t channel_magic = 0;
>> +    int ret = 0;
>>   
>> -    if (!mis->from_src_file) {
>> -        /* The first connection (multifd may have multiple) */
>> +    if (migrate_use_multifd() && !migration_in_postcopy()) {
>> +        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
>> +                                        sizeof(channel_magic), &local_err);
>> +
>> +        if (ret != 1) {
>> +            error_propagate(errp, local_err);
>> +            return;
>> +        }
> ....and thus this will fail for TLS channels AFAICT.

Yes, thanks for quick review Daniel. You pointed this earlier too, sorry missed it, will put another check !migrate_use_tls() in V2.

thanks

Manish Mishra

>
>> +
>> +        default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
>> +    } else {
>> +        default_channel = !mis->from_src_file;
>> +    }
> With regards,
> Daniel
Daniel P. Berrangé Nov. 1, 2022, 3:45 p.m. UTC | #3
On Tue, Nov 01, 2022 at 09:10:14PM +0530, manish.mishra wrote:
> 
> On 01/11/22 8:21 pm, Daniel P. Berrangé wrote:
> > On Tue, Nov 01, 2022 at 02:30:29PM +0000, manish.mishra wrote:
> > > Current logic assumes that channel connections on the destination side are
> > > always established in the same order as the source and the first one will
> > > always be the default channel followed by the multifid or post-copy
> > > preemption channel. This may not be always true, as even if a channel has a
> > > connection established on the source side it can be in the pending state on
> > > the destination side and a newer connection can be established first.
> > > Basically causing out of order mapping of channels on the destination side.
> > > Currently, all channels except post-copy preempt send a magic number, this
> > > patch uses that magic number to decide the type of channel. This logic is
> > > applicable only for precopy(multifd) live migration, as mentioned, the
> > > post-copy preempt channel does not send any magic number. Also, this patch
> > > uses MSG_PEEK to check the magic number of channels so that current
> > > data/control stream management remains un-effected.
> > > 
> > > Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
> > > ---
> > >   include/io/channel.h     | 25 +++++++++++++++++++++++++
> > >   io/channel-socket.c      | 27 +++++++++++++++++++++++++++
> > >   io/channel.c             | 39 +++++++++++++++++++++++++++++++++++++++
> > >   migration/migration.c    | 33 +++++++++++++++++++++------------
> > >   migration/multifd.c      | 12 ++++--------
> > >   migration/multifd.h      |  2 +-
> > >   migration/postcopy-ram.c |  5 +----
> > >   migration/postcopy-ram.h |  2 +-
> > >   8 files changed, 119 insertions(+), 26 deletions(-)
> > > 
> > > diff --git a/include/io/channel.h b/include/io/channel.h
> > > index c680ee7480..74177aeeea 100644
> > > --- a/include/io/channel.h
> > > +++ b/include/io/channel.h
> > > @@ -115,6 +115,10 @@ struct QIOChannelClass {
> > >                           int **fds,
> > >                           size_t *nfds,
> > >                           Error **errp);
> > > +    ssize_t (*io_read_peek)(QIOChannel *ioc,
> > > +                            void *buf,
> > > +                            size_t nbytes,
> > > +                            Error **errp);
> > >       int (*io_close)(QIOChannel *ioc,
> > >                       Error **errp);
> > >       GSource * (*io_create_watch)(QIOChannel *ioc,
> > > @@ -475,6 +479,27 @@ int qio_channel_write_all(QIOChannel *ioc,
> > >                             size_t buflen,
> > >                             Error **errp);
> > > +/**
> > > + * qio_channel_read_peek_all:
> > > + * @ioc: the channel object
> > > + * @buf: the memory region to read in data
> > > + * @nbytes: the number of bytes to read
> > > + * @errp: pointer to a NULL-initialized error object
> > > + *
> > > + * Read given @nbytes data from peek of channel into
> > > + * memory region @buf.
> > > + *
> > > + * The function will be blocked until read size is
> > > + * equal to requested size.
> > > + *
> > > + * Returns: 1 if all bytes were read, 0 if end-of-file
> > > + *          occurs without data, or -1 on error
> > > + */
> > > +int qio_channel_read_peek_all(QIOChannel *ioc,
> > > +                              void* buf,
> > > +                              size_t nbytes,
> > > +                              Error **errp);
> > > +
> > >   /**
> > >    * qio_channel_set_blocking:
> > >    * @ioc: the channel object
> > > diff --git a/io/channel-socket.c b/io/channel-socket.c
> > > index b76dca9cc1..b99f5dfda6 100644
> > > --- a/io/channel-socket.c
> > > +++ b/io/channel-socket.c
> > > @@ -705,6 +705,32 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
> > >   }
> > >   #endif /* WIN32 */
> > > +static ssize_t qio_channel_socket_read_peek(QIOChannel *ioc,
> > > +                                            void *buf,
> > > +                                            size_t nbytes,
> > > +                                            Error **errp)
> > > +{
> > > +    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
> > > +    ssize_t bytes = 0;
> > > +
> > > +retry:
> > > +    bytes = recv(sioc->fd, buf, nbytes, MSG_PEEK);
> > > +
> > > +    if (bytes < 0) {
> > > +        if (errno == EINTR) {
> > > +            goto retry;
> > > +        }
> > > +        if (errno == EAGAIN) {
> > > +            return QIO_CHANNEL_ERR_BLOCK;
> > > +        }
> > > +
> > > +        error_setg_errno(errp, errno,
> > > +                         "Unable to read from peek of socket");
> > > +        return -1;
> > > +    }
> > > +
> > > +    return bytes;
> > > +}
> > >   #ifdef QEMU_MSG_ZEROCOPY
> > >   static int qio_channel_socket_flush(QIOChannel *ioc,
> > > @@ -902,6 +928,7 @@ static void qio_channel_socket_class_init(ObjectClass *klass,
> > >       ioc_klass->io_writev = qio_channel_socket_writev;
> > >       ioc_klass->io_readv = qio_channel_socket_readv;
> > > +    ioc_klass->io_read_peek = qio_channel_socket_read_peek;
> > >       ioc_klass->io_set_blocking = qio_channel_socket_set_blocking;
> > >       ioc_klass->io_close = qio_channel_socket_close;
> > >       ioc_klass->io_shutdown = qio_channel_socket_shutdown;
> > > diff --git a/io/channel.c b/io/channel.c
> > > index 0640941ac5..a2d9b96f3f 100644
> > > --- a/io/channel.c
> > > +++ b/io/channel.c
> > > @@ -346,6 +346,45 @@ int qio_channel_write_all(QIOChannel *ioc,
> > >       return qio_channel_writev_all(ioc, &iov, 1, errp);
> > >   }
> > > +int qio_channel_read_peek_all(QIOChannel *ioc,
> > > +                              void* buf,
> > > +                              size_t nbytes,
> > > +                              Error **errp)
> > > +{
> > > +   QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
> > > +   ssize_t bytes = 0;
> > > +
> > > +   if (!klass->io_read_peek) {
> > > +       error_setg(errp, "Channel does not support read peek");
> > > +       return -1;
> > > +   }
> > There's no io_read_peek for  QIOChannelTLS, so we'll hit this
> > error...
> > 
> > 
> > > diff --git a/migration/migration.c b/migration/migration.c
> > > index 739bb683f3..f4b6f278a9 100644
> > > --- a/migration/migration.c
> > > +++ b/migration/migration.c
> > > @@ -733,31 +733,40 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
> > >   {
> > >       MigrationIncomingState *mis = migration_incoming_get_current();
> > >       Error *local_err = NULL;
> > > -    bool start_migration;
> > >       QEMUFile *f;
> > > +    bool default_channel = true;
> > > +    uint32_t channel_magic = 0;
> > > +    int ret = 0;
> > > -    if (!mis->from_src_file) {
> > > -        /* The first connection (multifd may have multiple) */
> > > +    if (migrate_use_multifd() && !migration_in_postcopy()) {
> > > +        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
> > > +                                        sizeof(channel_magic), &local_err);
> > > +
> > > +        if (ret != 1) {
> > > +            error_propagate(errp, local_err);
> > > +            return;
> > > +        }
> > ....and thus this will fail for TLS channels AFAICT.
> 
> Yes, thanks for quick review Daniel. You pointed this earlier too, sorry missed it, will put another check !migrate_use_tls() in V2.

But we need this problem fixed with TLS too, so just excluding it
isn't right. IMHO we need to modify the migration code so we can
read the magic earlier, instead of peeking.


With regards,
Daniel
manish.mishra Nov. 1, 2022, 3:49 p.m. UTC | #4
On 01/11/22 9:15 pm, Daniel P. Berrangé wrote:
> On Tue, Nov 01, 2022 at 09:10:14PM +0530, manish.mishra wrote:
>> On 01/11/22 8:21 pm, Daniel P. Berrangé wrote:
>>> On Tue, Nov 01, 2022 at 02:30:29PM +0000, manish.mishra wrote:
>>>> Current logic assumes that channel connections on the destination side are
>>>> always established in the same order as the source and the first one will
>>>> always be the default channel followed by the multifid or post-copy
>>>> preemption channel. This may not be always true, as even if a channel has a
>>>> connection established on the source side it can be in the pending state on
>>>> the destination side and a newer connection can be established first.
>>>> Basically causing out of order mapping of channels on the destination side.
>>>> Currently, all channels except post-copy preempt send a magic number, this
>>>> patch uses that magic number to decide the type of channel. This logic is
>>>> applicable only for precopy(multifd) live migration, as mentioned, the
>>>> post-copy preempt channel does not send any magic number. Also, this patch
>>>> uses MSG_PEEK to check the magic number of channels so that current
>>>> data/control stream management remains un-effected.
>>>>
>>>> Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
>>>> ---
>>>>    include/io/channel.h     | 25 +++++++++++++++++++++++++
>>>>    io/channel-socket.c      | 27 +++++++++++++++++++++++++++
>>>>    io/channel.c             | 39 +++++++++++++++++++++++++++++++++++++++
>>>>    migration/migration.c    | 33 +++++++++++++++++++++------------
>>>>    migration/multifd.c      | 12 ++++--------
>>>>    migration/multifd.h      |  2 +-
>>>>    migration/postcopy-ram.c |  5 +----
>>>>    migration/postcopy-ram.h |  2 +-
>>>>    8 files changed, 119 insertions(+), 26 deletions(-)
>>>>
>>>> diff --git a/include/io/channel.h b/include/io/channel.h
>>>> index c680ee7480..74177aeeea 100644
>>>> --- a/include/io/channel.h
>>>> +++ b/include/io/channel.h
>>>> @@ -115,6 +115,10 @@ struct QIOChannelClass {
>>>>                            int **fds,
>>>>                            size_t *nfds,
>>>>                            Error **errp);
>>>> +    ssize_t (*io_read_peek)(QIOChannel *ioc,
>>>> +                            void *buf,
>>>> +                            size_t nbytes,
>>>> +                            Error **errp);
>>>>        int (*io_close)(QIOChannel *ioc,
>>>>                        Error **errp);
>>>>        GSource * (*io_create_watch)(QIOChannel *ioc,
>>>> @@ -475,6 +479,27 @@ int qio_channel_write_all(QIOChannel *ioc,
>>>>                              size_t buflen,
>>>>                              Error **errp);
>>>> +/**
>>>> + * qio_channel_read_peek_all:
>>>> + * @ioc: the channel object
>>>> + * @buf: the memory region to read in data
>>>> + * @nbytes: the number of bytes to read
>>>> + * @errp: pointer to a NULL-initialized error object
>>>> + *
>>>> + * Read given @nbytes data from peek of channel into
>>>> + * memory region @buf.
>>>> + *
>>>> + * The function will be blocked until read size is
>>>> + * equal to requested size.
>>>> + *
>>>> + * Returns: 1 if all bytes were read, 0 if end-of-file
>>>> + *          occurs without data, or -1 on error
>>>> + */
>>>> +int qio_channel_read_peek_all(QIOChannel *ioc,
>>>> +                              void* buf,
>>>> +                              size_t nbytes,
>>>> +                              Error **errp);
>>>> +
>>>>    /**
>>>>     * qio_channel_set_blocking:
>>>>     * @ioc: the channel object
>>>> diff --git a/io/channel-socket.c b/io/channel-socket.c
>>>> index b76dca9cc1..b99f5dfda6 100644
>>>> --- a/io/channel-socket.c
>>>> +++ b/io/channel-socket.c
>>>> @@ -705,6 +705,32 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
>>>>    }
>>>>    #endif /* WIN32 */
>>>> +static ssize_t qio_channel_socket_read_peek(QIOChannel *ioc,
>>>> +                                            void *buf,
>>>> +                                            size_t nbytes,
>>>> +                                            Error **errp)
>>>> +{
>>>> +    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
>>>> +    ssize_t bytes = 0;
>>>> +
>>>> +retry:
>>>> +    bytes = recv(sioc->fd, buf, nbytes, MSG_PEEK);
>>>> +
>>>> +    if (bytes < 0) {
>>>> +        if (errno == EINTR) {
>>>> +            goto retry;
>>>> +        }
>>>> +        if (errno == EAGAIN) {
>>>> +            return QIO_CHANNEL_ERR_BLOCK;
>>>> +        }
>>>> +
>>>> +        error_setg_errno(errp, errno,
>>>> +                         "Unable to read from peek of socket");
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    return bytes;
>>>> +}
>>>>    #ifdef QEMU_MSG_ZEROCOPY
>>>>    static int qio_channel_socket_flush(QIOChannel *ioc,
>>>> @@ -902,6 +928,7 @@ static void qio_channel_socket_class_init(ObjectClass *klass,
>>>>        ioc_klass->io_writev = qio_channel_socket_writev;
>>>>        ioc_klass->io_readv = qio_channel_socket_readv;
>>>> +    ioc_klass->io_read_peek = qio_channel_socket_read_peek;
>>>>        ioc_klass->io_set_blocking = qio_channel_socket_set_blocking;
>>>>        ioc_klass->io_close = qio_channel_socket_close;
>>>>        ioc_klass->io_shutdown = qio_channel_socket_shutdown;
>>>> diff --git a/io/channel.c b/io/channel.c
>>>> index 0640941ac5..a2d9b96f3f 100644
>>>> --- a/io/channel.c
>>>> +++ b/io/channel.c
>>>> @@ -346,6 +346,45 @@ int qio_channel_write_all(QIOChannel *ioc,
>>>>        return qio_channel_writev_all(ioc, &iov, 1, errp);
>>>>    }
>>>> +int qio_channel_read_peek_all(QIOChannel *ioc,
>>>> +                              void* buf,
>>>> +                              size_t nbytes,
>>>> +                              Error **errp)
>>>> +{
>>>> +   QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
>>>> +   ssize_t bytes = 0;
>>>> +
>>>> +   if (!klass->io_read_peek) {
>>>> +       error_setg(errp, "Channel does not support read peek");
>>>> +       return -1;
>>>> +   }
>>> There's no io_read_peek for  QIOChannelTLS, so we'll hit this
>>> error...
>>>
>>>
>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>> index 739bb683f3..f4b6f278a9 100644
>>>> --- a/migration/migration.c
>>>> +++ b/migration/migration.c
>>>> @@ -733,31 +733,40 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
>>>>    {
>>>>        MigrationIncomingState *mis = migration_incoming_get_current();
>>>>        Error *local_err = NULL;
>>>> -    bool start_migration;
>>>>        QEMUFile *f;
>>>> +    bool default_channel = true;
>>>> +    uint32_t channel_magic = 0;
>>>> +    int ret = 0;
>>>> -    if (!mis->from_src_file) {
>>>> -        /* The first connection (multifd may have multiple) */
>>>> +    if (migrate_use_multifd() && !migration_in_postcopy()) {
>>>> +        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
>>>> +                                        sizeof(channel_magic), &local_err);
>>>> +
>>>> +        if (ret != 1) {
>>>> +            error_propagate(errp, local_err);
>>>> +            return;
>>>> +        }
>>> ....and thus this will fail for TLS channels AFAICT.
>> Yes, thanks for quick review Daniel. You pointed this earlier too, sorry missed it, will put another check !migrate_use_tls() in V2.
> But we need this problem fixed with TLS too, so just excluding it
> isn't right. IMHO we need to modify the migration code so we can
> read the magic earlier, instead of peeking.
>
>
> With regards,
> Daniel
oh ok got it, sure will make that change instead of using message peek.
manish.mishra Nov. 3, 2022, 9:20 a.m. UTC | #5
On 01/11/22 9:15 pm, Daniel P. Berrangé wrote:
> On Tue, Nov 01, 2022 at 09:10:14PM +0530, manish.mishra wrote:
>> On 01/11/22 8:21 pm, Daniel P. Berrangé wrote:
>>> On Tue, Nov 01, 2022 at 02:30:29PM +0000, manish.mishra wrote:
>>>> Current logic assumes that channel connections on the destination side are
>>>> always established in the same order as the source and the first one will
>>>> always be the default channel followed by the multifid or post-copy
>>>> preemption channel. This may not be always true, as even if a channel has a
>>>> connection established on the source side it can be in the pending state on
>>>> the destination side and a newer connection can be established first.
>>>> Basically causing out of order mapping of channels on the destination side.
>>>> Currently, all channels except post-copy preempt send a magic number, this
>>>> patch uses that magic number to decide the type of channel. This logic is
>>>> applicable only for precopy(multifd) live migration, as mentioned, the
>>>> post-copy preempt channel does not send any magic number. Also, this patch
>>>> uses MSG_PEEK to check the magic number of channels so that current
>>>> data/control stream management remains un-effected.
>>>>
>>>> Signed-off-by: manish.mishra <manish.mishra@nutanix.com>
>>>> ---
>>>>    include/io/channel.h     | 25 +++++++++++++++++++++++++
>>>>    io/channel-socket.c      | 27 +++++++++++++++++++++++++++
>>>>    io/channel.c             | 39 +++++++++++++++++++++++++++++++++++++++
>>>>    migration/migration.c    | 33 +++++++++++++++++++++------------
>>>>    migration/multifd.c      | 12 ++++--------
>>>>    migration/multifd.h      |  2 +-
>>>>    migration/postcopy-ram.c |  5 +----
>>>>    migration/postcopy-ram.h |  2 +-
>>>>    8 files changed, 119 insertions(+), 26 deletions(-)
>>>>
>>>> diff --git a/include/io/channel.h b/include/io/channel.h
>>>> index c680ee7480..74177aeeea 100644
>>>> --- a/include/io/channel.h
>>>> +++ b/include/io/channel.h
>>>> @@ -115,6 +115,10 @@ struct QIOChannelClass {
>>>>                            int **fds,
>>>>                            size_t *nfds,
>>>>                            Error **errp);
>>>> +    ssize_t (*io_read_peek)(QIOChannel *ioc,
>>>> +                            void *buf,
>>>> +                            size_t nbytes,
>>>> +                            Error **errp);
>>>>        int (*io_close)(QIOChannel *ioc,
>>>>                        Error **errp);
>>>>        GSource * (*io_create_watch)(QIOChannel *ioc,
>>>> @@ -475,6 +479,27 @@ int qio_channel_write_all(QIOChannel *ioc,
>>>>                              size_t buflen,
>>>>                              Error **errp);
>>>> +/**
>>>> + * qio_channel_read_peek_all:
>>>> + * @ioc: the channel object
>>>> + * @buf: the memory region to read in data
>>>> + * @nbytes: the number of bytes to read
>>>> + * @errp: pointer to a NULL-initialized error object
>>>> + *
>>>> + * Read given @nbytes data from peek of channel into
>>>> + * memory region @buf.
>>>> + *
>>>> + * The function will be blocked until read size is
>>>> + * equal to requested size.
>>>> + *
>>>> + * Returns: 1 if all bytes were read, 0 if end-of-file
>>>> + *          occurs without data, or -1 on error
>>>> + */
>>>> +int qio_channel_read_peek_all(QIOChannel *ioc,
>>>> +                              void* buf,
>>>> +                              size_t nbytes,
>>>> +                              Error **errp);
>>>> +
>>>>    /**
>>>>     * qio_channel_set_blocking:
>>>>     * @ioc: the channel object
>>>> diff --git a/io/channel-socket.c b/io/channel-socket.c
>>>> index b76dca9cc1..b99f5dfda6 100644
>>>> --- a/io/channel-socket.c
>>>> +++ b/io/channel-socket.c
>>>> @@ -705,6 +705,32 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
>>>>    }
>>>>    #endif /* WIN32 */
>>>> +static ssize_t qio_channel_socket_read_peek(QIOChannel *ioc,
>>>> +                                            void *buf,
>>>> +                                            size_t nbytes,
>>>> +                                            Error **errp)
>>>> +{
>>>> +    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
>>>> +    ssize_t bytes = 0;
>>>> +
>>>> +retry:
>>>> +    bytes = recv(sioc->fd, buf, nbytes, MSG_PEEK);
>>>> +
>>>> +    if (bytes < 0) {
>>>> +        if (errno == EINTR) {
>>>> +            goto retry;
>>>> +        }
>>>> +        if (errno == EAGAIN) {
>>>> +            return QIO_CHANNEL_ERR_BLOCK;
>>>> +        }
>>>> +
>>>> +        error_setg_errno(errp, errno,
>>>> +                         "Unable to read from peek of socket");
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    return bytes;
>>>> +}
>>>>    #ifdef QEMU_MSG_ZEROCOPY
>>>>    static int qio_channel_socket_flush(QIOChannel *ioc,
>>>> @@ -902,6 +928,7 @@ static void qio_channel_socket_class_init(ObjectClass *klass,
>>>>        ioc_klass->io_writev = qio_channel_socket_writev;
>>>>        ioc_klass->io_readv = qio_channel_socket_readv;
>>>> +    ioc_klass->io_read_peek = qio_channel_socket_read_peek;
>>>>        ioc_klass->io_set_blocking = qio_channel_socket_set_blocking;
>>>>        ioc_klass->io_close = qio_channel_socket_close;
>>>>        ioc_klass->io_shutdown = qio_channel_socket_shutdown;
>>>> diff --git a/io/channel.c b/io/channel.c
>>>> index 0640941ac5..a2d9b96f3f 100644
>>>> --- a/io/channel.c
>>>> +++ b/io/channel.c
>>>> @@ -346,6 +346,45 @@ int qio_channel_write_all(QIOChannel *ioc,
>>>>        return qio_channel_writev_all(ioc, &iov, 1, errp);
>>>>    }
>>>> +int qio_channel_read_peek_all(QIOChannel *ioc,
>>>> +                              void* buf,
>>>> +                              size_t nbytes,
>>>> +                              Error **errp)
>>>> +{
>>>> +   QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
>>>> +   ssize_t bytes = 0;
>>>> +
>>>> +   if (!klass->io_read_peek) {
>>>> +       error_setg(errp, "Channel does not support read peek");
>>>> +       return -1;
>>>> +   }
>>> There's no io_read_peek for  QIOChannelTLS, so we'll hit this
>>> error...
>>>
>>>
>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>> index 739bb683f3..f4b6f278a9 100644
>>>> --- a/migration/migration.c
>>>> +++ b/migration/migration.c
>>>> @@ -733,31 +733,40 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
>>>>    {
>>>>        MigrationIncomingState *mis = migration_incoming_get_current();
>>>>        Error *local_err = NULL;
>>>> -    bool start_migration;
>>>>        QEMUFile *f;
>>>> +    bool default_channel = true;
>>>> +    uint32_t channel_magic = 0;
>>>> +    int ret = 0;
>>>> -    if (!mis->from_src_file) {
>>>> -        /* The first connection (multifd may have multiple) */
>>>> +    if (migrate_use_multifd() && !migration_in_postcopy()) {
>>>> +        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
>>>> +                                        sizeof(channel_magic), &local_err);
>>>> +
>>>> +        if (ret != 1) {
>>>> +            error_propagate(errp, local_err);
>>>> +            return;
>>>> +        }
>>> ....and thus this will fail for TLS channels AFAICT.
>> Yes, thanks for quick review Daniel. You pointed this earlier too, sorry missed it, will put another check !migrate_use_tls() in V2.
> But we need this problem fixed with TLS too, so just excluding it
> isn't right. IMHO we need to modify the migration code so we can
> read the magic earlier, instead of peeking.
>
>
> With regards,
> Daniel

Hi Daniel, I was trying tls migrations. What i see is that tls session creation does handshake. So if we read ahead in ioc_process_incoming for default channel. Because client sends magic only after multiFD channels are setup, which too requires tls handshake. So basically on destination side we can not recieve default channel magic until multiFD tls handshake is done, so it kind of creates a dead lock. I can not think of anyway to re-arrange it. Also if in tls we do this handshake is this bug even possible for tls case? If not will just !migrate_use_tls() check be fine. I do not have much understanding of tls, if you think this bug is possible even with tls handshakes, I will check for another way.

Thanks

Manish Mishra
Daniel P. Berrangé Nov. 3, 2022, 9:29 a.m. UTC | #6
On Thu, Nov 03, 2022 at 02:50:25PM +0530, manish.mishra wrote:
> 
> On 01/11/22 9:15 pm, Daniel P. Berrangé wrote:
> > On Tue, Nov 01, 2022 at 09:10:14PM +0530, manish.mishra wrote:
> > > On 01/11/22 8:21 pm, Daniel P. Berrangé wrote:
> > > > On Tue, Nov 01, 2022 at 02:30:29PM +0000, manish.mishra wrote:
> > > > > diff --git a/migration/migration.c b/migration/migration.c
> > > > > index 739bb683f3..f4b6f278a9 100644
> > > > > --- a/migration/migration.c
> > > > > +++ b/migration/migration.c
> > > > > @@ -733,31 +733,40 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
> > > > >    {
> > > > >        MigrationIncomingState *mis = migration_incoming_get_current();
> > > > >        Error *local_err = NULL;
> > > > > -    bool start_migration;
> > > > >        QEMUFile *f;
> > > > > +    bool default_channel = true;
> > > > > +    uint32_t channel_magic = 0;
> > > > > +    int ret = 0;
> > > > > -    if (!mis->from_src_file) {
> > > > > -        /* The first connection (multifd may have multiple) */
> > > > > +    if (migrate_use_multifd() && !migration_in_postcopy()) {
> > > > > +        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
> > > > > +                                        sizeof(channel_magic), &local_err);
> > > > > +
> > > > > +        if (ret != 1) {
> > > > > +            error_propagate(errp, local_err);
> > > > > +            return;
> > > > > +        }
> > > > ....and thus this will fail for TLS channels AFAICT.
> > > Yes, thanks for quick review Daniel. You pointed this earlier too, sorry missed it, will put another check !migrate_use_tls() in V2.
> > But we need this problem fixed with TLS too, so just excluding it
> > isn't right. IMHO we need to modify the migration code so we can
> > read the magic earlier, instead of peeking.
> > 
> > 
> > With regards,
> > Daniel
> 
> Hi Daniel, I was trying tls migrations. What i see is that tls session
> creation does handshake. So if we read ahead in ioc_process_incoming
> for default channel. Because client sends magic only after multiFD
> channels are setup, which too requires tls handshake.

By the time we get to migrate_ioc_process_incoming, the TLS handshake
has already been performed.

migration_channel_process_incoming
    -> migration_ioc_process_incoming

vs

migration_channel_process_incoming
    -> migration_tls_channel_process_incoming
        -> migration_tls_incoming_handshake
	     -> migration_channel_process_incoming
	         ->  migration_ioc_process_incoming


With regards,
Daniel
manish.mishra Nov. 3, 2022, 4:34 p.m. UTC | #7
On 03/11/22 2:59 pm, Daniel P. Berrangé wrote:
> On Thu, Nov 03, 2022 at 02:50:25PM +0530, manish.mishra wrote:
>> On 01/11/22 9:15 pm, Daniel P. Berrangé wrote:
>>> On Tue, Nov 01, 2022 at 09:10:14PM +0530, manish.mishra wrote:
>>>> On 01/11/22 8:21 pm, Daniel P. Berrangé wrote:
>>>>> On Tue, Nov 01, 2022 at 02:30:29PM +0000, manish.mishra wrote:
>>>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>>>> index 739bb683f3..f4b6f278a9 100644
>>>>>> --- a/migration/migration.c
>>>>>> +++ b/migration/migration.c
>>>>>> @@ -733,31 +733,40 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
>>>>>>     {
>>>>>>         MigrationIncomingState *mis = migration_incoming_get_current();
>>>>>>         Error *local_err = NULL;
>>>>>> -    bool start_migration;
>>>>>>         QEMUFile *f;
>>>>>> +    bool default_channel = true;
>>>>>> +    uint32_t channel_magic = 0;
>>>>>> +    int ret = 0;
>>>>>> -    if (!mis->from_src_file) {
>>>>>> -        /* The first connection (multifd may have multiple) */
>>>>>> +    if (migrate_use_multifd() && !migration_in_postcopy()) {
>>>>>> +        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
>>>>>> +                                        sizeof(channel_magic), &local_err);
>>>>>> +
>>>>>> +        if (ret != 1) {
>>>>>> +            error_propagate(errp, local_err);
>>>>>> +            return;
>>>>>> +        }
>>>>> ....and thus this will fail for TLS channels AFAICT.
>>>> Yes, thanks for quick review Daniel. You pointed this earlier too, sorry missed it, will put another check !migrate_use_tls() in V2.
>>> But we need this problem fixed with TLS too, so just excluding it
>>> isn't right. IMHO we need to modify the migration code so we can
>>> read the magic earlier, instead of peeking.
>>>
>>>
>>> With regards,
>>> Daniel
>> Hi Daniel, I was trying tls migrations. What i see is that tls session
>> creation does handshake. So if we read ahead in ioc_process_incoming
>> for default channel. Because client sends magic only after multiFD
>> channels are setup, which too requires tls handshake.
> By the time we get to migrate_ioc_process_incoming, the TLS handshake
> has already been performed.
>
> migration_channel_process_incoming
>      -> migration_ioc_process_incoming
>
> vs
>
> migration_channel_process_incoming
>      -> migration_tls_channel_process_incoming
>          -> migration_tls_incoming_handshake
> 	     -> migration_channel_process_incoming
> 	         ->  migration_ioc_process_incoming
>

Yes sorry i thought we block on source side till handshake is done but that is not true. I checked then why that deadlock is happening. So this where the dealock is happening.

static int ram_save_setup(QEMUFile *f, void *opaque) {
+
+
     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
     ram_control_after_iterate(f, RAM_CONTROL_SETUP);

     ret =  multifd_send_sync_main(f);
     if (ret < 0) {
         return ret;
     }

     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
     qemu_fflush(f);

     return 0;
}

Now if we block in migration_ioc_process_incoming for reading magic value from channel, which is actually sent by client when this qemu_fflush is done. Before this qemu_fflush we wait for multifd_send_sync_main which actually requires that tls handshake is done for multiFD channels as it blocks on sem_sync which posted by multifd_send_thread which is called after handshake||. But then on destination side we are blocked in migration_ioc_process_incoming() waiting to read something from default channel hence handshake for multiFD channels can not happen. This to me looks unresolvable whatever way we try to manipulate stream until we do some changes on source side.

> With regards,
> Daniel
Daniel P. Berrangé Nov. 3, 2022, 5:27 p.m. UTC | #8
On Thu, Nov 03, 2022 at 10:04:54PM +0530, manish.mishra wrote:
> 
> On 03/11/22 2:59 pm, Daniel P. Berrangé wrote:
> > On Thu, Nov 03, 2022 at 02:50:25PM +0530, manish.mishra wrote:
> > > On 01/11/22 9:15 pm, Daniel P. Berrangé wrote:
> > > > On Tue, Nov 01, 2022 at 09:10:14PM +0530, manish.mishra wrote:
> > > > > On 01/11/22 8:21 pm, Daniel P. Berrangé wrote:
> > > > > > On Tue, Nov 01, 2022 at 02:30:29PM +0000, manish.mishra wrote:
> > > > > > > diff --git a/migration/migration.c b/migration/migration.c
> > > > > > > index 739bb683f3..f4b6f278a9 100644
> > > > > > > --- a/migration/migration.c
> > > > > > > +++ b/migration/migration.c
> > > > > > > @@ -733,31 +733,40 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
> > > > > > >     {
> > > > > > >         MigrationIncomingState *mis = migration_incoming_get_current();
> > > > > > >         Error *local_err = NULL;
> > > > > > > -    bool start_migration;
> > > > > > >         QEMUFile *f;
> > > > > > > +    bool default_channel = true;
> > > > > > > +    uint32_t channel_magic = 0;
> > > > > > > +    int ret = 0;
> > > > > > > -    if (!mis->from_src_file) {
> > > > > > > -        /* The first connection (multifd may have multiple) */
> > > > > > > +    if (migrate_use_multifd() && !migration_in_postcopy()) {
> > > > > > > +        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
> > > > > > > +                                        sizeof(channel_magic), &local_err);
> > > > > > > +
> > > > > > > +        if (ret != 1) {
> > > > > > > +            error_propagate(errp, local_err);
> > > > > > > +            return;
> > > > > > > +        }
> > > > > > ....and thus this will fail for TLS channels AFAICT.
> > > > > Yes, thanks for quick review Daniel. You pointed this earlier too, sorry missed it, will put another check !migrate_use_tls() in V2.
> > > > But we need this problem fixed with TLS too, so just excluding it
> > > > isn't right. IMHO we need to modify the migration code so we can
> > > > read the magic earlier, instead of peeking.
> > > > 
> > > > 
> > > > With regards,
> > > > Daniel
> > > Hi Daniel, I was trying tls migrations. What i see is that tls session
> > > creation does handshake. So if we read ahead in ioc_process_incoming
> > > for default channel. Because client sends magic only after multiFD
> > > channels are setup, which too requires tls handshake.
> > By the time we get to migrate_ioc_process_incoming, the TLS handshake
> > has already been performed.
> > 
> > migration_channel_process_incoming
> >      -> migration_ioc_process_incoming
> > 
> > vs
> > 
> > migration_channel_process_incoming
> >      -> migration_tls_channel_process_incoming
> >          -> migration_tls_incoming_handshake
> > 	     -> migration_channel_process_incoming
> > 	         ->  migration_ioc_process_incoming
> > 
> 
> Yes sorry i thought we block on source side till handshake is done but that is not true. I checked then why that deadlock is happening. So this where the dealock is happening.
> 
> static int ram_save_setup(QEMUFile *f, void *opaque) {
> +
> +
>     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
>     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
> 
>     ret =  multifd_send_sync_main(f);
>     if (ret < 0) {
>         return ret;
>     }
> 
>     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>     qemu_fflush(f);
> 
>     return 0;
> }
> 
> Now if we block in migration_ioc_process_incoming for reading magic
> value from channel, which is actually sent by client when this
> qemu_fflush is done. Before this qemu_fflush we wait for
> multifd_send_sync_main which actually requires that tls handshake is
> done for multiFD channels as it blocks on sem_sync which posted by
> multifd_send_thread which is called after handshake||. But then on
> destination side we are blocked in migration_ioc_process_incoming()
> waiting to read something from default channel hence handshake for
> multiFD channels can not happen. This to me looks unresolvable
> whatever way we try to manipulate stream until we do some changes
> on source side.

The TLS handshake is already complete when migration_ioc_process_incoming
is blocking on read.

Regardless of which channel we're talking about, thue TLS handshake is
always performed & finished before we try to either send or recv any
data.

With regards,
Daniel
manish.mishra Nov. 3, 2022, 5:36 p.m. UTC | #9
On 03/11/22 10:57 pm, Daniel P. Berrangé wrote:
> On Thu, Nov 03, 2022 at 10:04:54PM +0530, manish.mishra wrote:
>> On 03/11/22 2:59 pm, Daniel P. Berrangé wrote:
>>> On Thu, Nov 03, 2022 at 02:50:25PM +0530, manish.mishra wrote:
>>>> On 01/11/22 9:15 pm, Daniel P. Berrangé wrote:
>>>>> On Tue, Nov 01, 2022 at 09:10:14PM +0530, manish.mishra wrote:
>>>>>> On 01/11/22 8:21 pm, Daniel P. Berrangé wrote:
>>>>>>> On Tue, Nov 01, 2022 at 02:30:29PM +0000, manish.mishra wrote:
>>>>>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>>>>>> index 739bb683f3..f4b6f278a9 100644
>>>>>>>> --- a/migration/migration.c
>>>>>>>> +++ b/migration/migration.c
>>>>>>>> @@ -733,31 +733,40 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
>>>>>>>>      {
>>>>>>>>          MigrationIncomingState *mis = migration_incoming_get_current();
>>>>>>>>          Error *local_err = NULL;
>>>>>>>> -    bool start_migration;
>>>>>>>>          QEMUFile *f;
>>>>>>>> +    bool default_channel = true;
>>>>>>>> +    uint32_t channel_magic = 0;
>>>>>>>> +    int ret = 0;
>>>>>>>> -    if (!mis->from_src_file) {
>>>>>>>> -        /* The first connection (multifd may have multiple) */
>>>>>>>> +    if (migrate_use_multifd() && !migration_in_postcopy()) {
>>>>>>>> +        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
>>>>>>>> +                                        sizeof(channel_magic), &local_err);
>>>>>>>> +
>>>>>>>> +        if (ret != 1) {
>>>>>>>> +            error_propagate(errp, local_err);
>>>>>>>> +            return;
>>>>>>>> +        }
>>>>>>> ....and thus this will fail for TLS channels AFAICT.
>>>>>> Yes, thanks for quick review Daniel. You pointed this earlier too, sorry missed it, will put another check !migrate_use_tls() in V2.
>>>>> But we need this problem fixed with TLS too, so just excluding it
>>>>> isn't right. IMHO we need to modify the migration code so we can
>>>>> read the magic earlier, instead of peeking.
>>>>>
>>>>>
>>>>> With regards,
>>>>> Daniel
>>>> Hi Daniel, I was trying tls migrations. What i see is that tls session
>>>> creation does handshake. So if we read ahead in ioc_process_incoming
>>>> for default channel. Because client sends magic only after multiFD
>>>> channels are setup, which too requires tls handshake.
>>> By the time we get to migrate_ioc_process_incoming, the TLS handshake
>>> has already been performed.
>>>
>>> migration_channel_process_incoming
>>>       -> migration_ioc_process_incoming
>>>
>>> vs
>>>
>>> migration_channel_process_incoming
>>>       -> migration_tls_channel_process_incoming
>>>           -> migration_tls_incoming_handshake
>>> 	     -> migration_channel_process_incoming
>>> 	         ->  migration_ioc_process_incoming
>>>
>> Yes sorry i thought we block on source side till handshake is done but that is not true. I checked then why that deadlock is happening. So this where the dealock is happening.
>>
>> static int ram_save_setup(QEMUFile *f, void *opaque) {
>> +
>> +
>>      ram_control_before_iterate(f, RAM_CONTROL_SETUP);
>>      ram_control_after_iterate(f, RAM_CONTROL_SETUP);
>>
>>      ret =  multifd_send_sync_main(f);
>>      if (ret < 0) {
>>          return ret;
>>      }
>>
>>      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>>      qemu_fflush(f);
>>
>>      return 0;
>> }
>>
>> Now if we block in migration_ioc_process_incoming for reading magic
>> value from channel, which is actually sent by client when this
>> qemu_fflush is done. Before this qemu_fflush we wait for
>> multifd_send_sync_main which actually requires that tls handshake is
>> done for multiFD channels as it blocks on sem_sync which posted by
>> multifd_send_thread which is called after handshake||. But then on
>> destination side we are blocked in migration_ioc_process_incoming()
>> waiting to read something from default channel hence handshake for
>> multiFD channels can not happen. This to me looks unresolvable
>> whatever way we try to manipulate stream until we do some changes
>> on source side.
> The TLS handshake is already complete when migration_ioc_process_incoming
> is blocking on read.
>
> Regardless of which channel we're talking about, thue TLS handshake is
> always performed & finished before we try to either send or recv any
> data.
>
> With regards,
> Daniel

Yes Daniel, agree on that, in this case tls handshake is done for default channel so we went in migration_ioc_process_incoming for default channel. But if we try to read some data there, it does not get because data on default channel is not yet flushed by source.  From source side data in flushed in above function i pointed. Which blocks for multiFD channels to be setup with handshake, before flushing data. I mean data is sent only when buffer is full or explicitly flushed, till then it is just in buffer. But multiFD handshake can not complete until we return from migration_ioc_process_incoming for default channel which infintely waits because nothing is sent yet on channel.

Thanks

Manish Mishra
Daniel P. Berrangé Nov. 3, 2022, 5:57 p.m. UTC | #10
On Thu, Nov 03, 2022 at 11:06:23PM +0530, manish.mishra wrote:
> 
> On 03/11/22 10:57 pm, Daniel P. Berrangé wrote:
> > On Thu, Nov 03, 2022 at 10:04:54PM +0530, manish.mishra wrote:
> > > On 03/11/22 2:59 pm, Daniel P. Berrangé wrote:
> > > > On Thu, Nov 03, 2022 at 02:50:25PM +0530, manish.mishra wrote:
> > > > > On 01/11/22 9:15 pm, Daniel P. Berrangé wrote:
> > > > > > On Tue, Nov 01, 2022 at 09:10:14PM +0530, manish.mishra wrote:
> > > > > > > On 01/11/22 8:21 pm, Daniel P. Berrangé wrote:
> > > > > > > > On Tue, Nov 01, 2022 at 02:30:29PM +0000, manish.mishra wrote:
> > > > > > > > > diff --git a/migration/migration.c b/migration/migration.c
> > > > > > > > > index 739bb683f3..f4b6f278a9 100644
> > > > > > > > > --- a/migration/migration.c
> > > > > > > > > +++ b/migration/migration.c
> > > > > > > > > @@ -733,31 +733,40 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
> > > > > > > > >      {
> > > > > > > > >          MigrationIncomingState *mis = migration_incoming_get_current();
> > > > > > > > >          Error *local_err = NULL;
> > > > > > > > > -    bool start_migration;
> > > > > > > > >          QEMUFile *f;
> > > > > > > > > +    bool default_channel = true;
> > > > > > > > > +    uint32_t channel_magic = 0;
> > > > > > > > > +    int ret = 0;
> > > > > > > > > -    if (!mis->from_src_file) {
> > > > > > > > > -        /* The first connection (multifd may have multiple) */
> > > > > > > > > +    if (migrate_use_multifd() && !migration_in_postcopy()) {
> > > > > > > > > +        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
> > > > > > > > > +                                        sizeof(channel_magic), &local_err);
> > > > > > > > > +
> > > > > > > > > +        if (ret != 1) {
> > > > > > > > > +            error_propagate(errp, local_err);
> > > > > > > > > +            return;
> > > > > > > > > +        }
> > > > > > > > ....and thus this will fail for TLS channels AFAICT.
> > > > > > > Yes, thanks for quick review Daniel. You pointed this earlier too, sorry missed it, will put another check !migrate_use_tls() in V2.
> > > > > > But we need this problem fixed with TLS too, so just excluding it
> > > > > > isn't right. IMHO we need to modify the migration code so we can
> > > > > > read the magic earlier, instead of peeking.
> > > > > > 
> > > > > > 
> > > > > > With regards,
> > > > > > Daniel
> > > > > Hi Daniel, I was trying tls migrations. What i see is that tls session
> > > > > creation does handshake. So if we read ahead in ioc_process_incoming
> > > > > for default channel. Because client sends magic only after multiFD
> > > > > channels are setup, which too requires tls handshake.
> > > > By the time we get to migrate_ioc_process_incoming, the TLS handshake
> > > > has already been performed.
> > > > 
> > > > migration_channel_process_incoming
> > > >       -> migration_ioc_process_incoming
> > > > 
> > > > vs
> > > > 
> > > > migration_channel_process_incoming
> > > >       -> migration_tls_channel_process_incoming
> > > >           -> migration_tls_incoming_handshake
> > > > 	     -> migration_channel_process_incoming
> > > > 	         ->  migration_ioc_process_incoming
> > > > 
> > > Yes sorry i thought we block on source side till handshake is done but that is not true. I checked then why that deadlock is happening. So this where the dealock is happening.
> > > 
> > > static int ram_save_setup(QEMUFile *f, void *opaque) {
> > > +
> > > +
> > >      ram_control_before_iterate(f, RAM_CONTROL_SETUP);
> > >      ram_control_after_iterate(f, RAM_CONTROL_SETUP);
> > > 
> > >      ret =  multifd_send_sync_main(f);
> > >      if (ret < 0) {
> > >          return ret;
> > >      }
> > > 
> > >      qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
> > >      qemu_fflush(f);
> > > 
> > >      return 0;
> > > }
> > > 
> > > Now if we block in migration_ioc_process_incoming for reading magic
> > > value from channel, which is actually sent by client when this
> > > qemu_fflush is done. Before this qemu_fflush we wait for
> > > multifd_send_sync_main which actually requires that tls handshake is
> > > done for multiFD channels as it blocks on sem_sync which posted by
> > > multifd_send_thread which is called after handshake||. But then on
> > > destination side we are blocked in migration_ioc_process_incoming()
> > > waiting to read something from default channel hence handshake for
> > > multiFD channels can not happen. This to me looks unresolvable
> > > whatever way we try to manipulate stream until we do some changes
> > > on source side.
> > The TLS handshake is already complete when migration_ioc_process_incoming
> > is blocking on read.
> > 
> > Regardless of which channel we're talking about, thue TLS handshake is
> > always performed & finished before we try to either send or recv any
> > data.
> 
> Yes Daniel, agree on that, in this case tls handshake is done for
> default channel so we went in migration_ioc_process_incoming for
> default channel. But if we try to read some data there, it does not
> get because data on default channel is not yet flushed by source. 
> From source side data in flushed in above function i pointed. Which
> blocks for multiFD channels to be setup with handshake, before
> flushing data. I mean data is sent only when buffer is full or
> explicitly flushed, till then it is just in buffer. But multiFD
> handshake can not complete until we return from
> migration_ioc_process_incoming for default channel which infintely
> waits because nothing is sent yet on channel.

On the source side, if we're in ram_save_setup then the TLS
handshake is already complete for the main channel. In fact
I think the TLS handshake should act as a serialization
point that prevents the entire bug. It should be guaranteed
that the main channel is fully received by the dest, and
transferring data, before we even get to establishing the
multifd channels.

All we need do is read the magic bytes early, regardless of
whether its plain or TLS channel, and it should to the right
thing AFAICT.


With regards,
Daniel
manish.mishra Nov. 3, 2022, 6:17 p.m. UTC | #11
On 03/11/22 11:27 pm, Daniel P. Berrangé wrote:
> On Thu, Nov 03, 2022 at 11:06:23PM +0530, manish.mishra wrote:
>> On 03/11/22 10:57 pm, Daniel P. Berrangé wrote:
>>> On Thu, Nov 03, 2022 at 10:04:54PM +0530, manish.mishra wrote:
>>>> On 03/11/22 2:59 pm, Daniel P. Berrangé wrote:
>>>>> On Thu, Nov 03, 2022 at 02:50:25PM +0530, manish.mishra wrote:
>>>>>> On 01/11/22 9:15 pm, Daniel P. Berrangé wrote:
>>>>>>> On Tue, Nov 01, 2022 at 09:10:14PM +0530, manish.mishra wrote:
>>>>>>>> On 01/11/22 8:21 pm, Daniel P. Berrangé wrote:
>>>>>>>>> On Tue, Nov 01, 2022 at 02:30:29PM +0000, manish.mishra wrote:
>>>>>>>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>>>>>>>> index 739bb683f3..f4b6f278a9 100644
>>>>>>>>>> --- a/migration/migration.c
>>>>>>>>>> +++ b/migration/migration.c
>>>>>>>>>> @@ -733,31 +733,40 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
>>>>>>>>>>       {
>>>>>>>>>>           MigrationIncomingState *mis = migration_incoming_get_current();
>>>>>>>>>>           Error *local_err = NULL;
>>>>>>>>>> -    bool start_migration;
>>>>>>>>>>           QEMUFile *f;
>>>>>>>>>> +    bool default_channel = true;
>>>>>>>>>> +    uint32_t channel_magic = 0;
>>>>>>>>>> +    int ret = 0;
>>>>>>>>>> -    if (!mis->from_src_file) {
>>>>>>>>>> -        /* The first connection (multifd may have multiple) */
>>>>>>>>>> +    if (migrate_use_multifd() && !migration_in_postcopy()) {
>>>>>>>>>> +        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
>>>>>>>>>> +                                        sizeof(channel_magic), &local_err);
>>>>>>>>>> +
>>>>>>>>>> +        if (ret != 1) {
>>>>>>>>>> +            error_propagate(errp, local_err);
>>>>>>>>>> +            return;
>>>>>>>>>> +        }
>>>>>>>>> ....and thus this will fail for TLS channels AFAICT.
>>>>>>>> Yes, thanks for quick review Daniel. You pointed this earlier too, sorry missed it, will put another check !migrate_use_tls() in V2.
>>>>>>> But we need this problem fixed with TLS too, so just excluding it
>>>>>>> isn't right. IMHO we need to modify the migration code so we can
>>>>>>> read the magic earlier, instead of peeking.
>>>>>>>
>>>>>>>
>>>>>>> With regards,
>>>>>>> Daniel
>>>>>> Hi Daniel, I was trying tls migrations. What i see is that tls session
>>>>>> creation does handshake. So if we read ahead in ioc_process_incoming
>>>>>> for default channel. Because client sends magic only after multiFD
>>>>>> channels are setup, which too requires tls handshake.
>>>>> By the time we get to migrate_ioc_process_incoming, the TLS handshake
>>>>> has already been performed.
>>>>>
>>>>> migration_channel_process_incoming
>>>>>        -> migration_ioc_process_incoming
>>>>>
>>>>> vs
>>>>>
>>>>> migration_channel_process_incoming
>>>>>        -> migration_tls_channel_process_incoming
>>>>>            -> migration_tls_incoming_handshake
>>>>> 	     -> migration_channel_process_incoming
>>>>> 	         ->  migration_ioc_process_incoming
>>>>>
>>>> Yes sorry i thought we block on source side till handshake is done but that is not true. I checked then why that deadlock is happening. So this where the dealock is happening.
>>>>
>>>> static int ram_save_setup(QEMUFile *f, void *opaque) {
>>>> +
>>>> +
>>>>       ram_control_before_iterate(f, RAM_CONTROL_SETUP);
>>>>       ram_control_after_iterate(f, RAM_CONTROL_SETUP);
>>>>
>>>>       ret =  multifd_send_sync_main(f);
>>>>       if (ret < 0) {
>>>>           return ret;
>>>>       }
>>>>
>>>>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>>>>       qemu_fflush(f);
>>>>
>>>>       return 0;
>>>> }
>>>>
>>>> Now if we block in migration_ioc_process_incoming for reading magic
>>>> value from channel, which is actually sent by client when this
>>>> qemu_fflush is done. Before this qemu_fflush we wait for
>>>> multifd_send_sync_main which actually requires that tls handshake is
>>>> done for multiFD channels as it blocks on sem_sync which posted by
>>>> multifd_send_thread which is called after handshake||. But then on
>>>> destination side we are blocked in migration_ioc_process_incoming()
>>>> waiting to read something from default channel hence handshake for
>>>> multiFD channels can not happen. This to me looks unresolvable
>>>> whatever way we try to manipulate stream until we do some changes
>>>> on source side.
>>> The TLS handshake is already complete when migration_ioc_process_incoming
>>> is blocking on read.
>>>
>>> Regardless of which channel we're talking about, thue TLS handshake is
>>> always performed & finished before we try to either send or recv any
>>> data.
>> Yes Daniel, agree on that, in this case tls handshake is done for
>> default channel so we went in migration_ioc_process_incoming for
>> default channel. But if we try to read some data there, it does not
>> get because data on default channel is not yet flushed by source.
>>  From source side data in flushed in above function i pointed. Which
>> blocks for multiFD channels to be setup with handshake, before
>> flushing data. I mean data is sent only when buffer is full or
>> explicitly flushed, till then it is just in buffer. But multiFD
>> handshake can not complete until we return from
>> migration_ioc_process_incoming for default channel which infintely
>> waits because nothing is sent yet on channel.
> On the source side, if we're in ram_save_setup then the TLS
> handshake is already complete for the main channel. In fact
> I think the TLS handshake should act as a serialization
> point that prevents the entire bug. It should be guaranteed
> that the main channel is fully received by the dest, and
> transferring data, before we even get to establishing the
> multifd channels.


Yes, Daniel, tls handshake could make things serielized, but issue is that from source side handshake is done in background with another thread we do not actually block, so it is still possible that multiFD connection is accepted first on destination side.

>
> All we need do is read the magic bytes early, regardless of
> whether its plain or TLS channel, and it should to the right
> thing AFAICT.
>

Yes, but if we try to read early on main channel with tls enabled case it is an issue. Sorry i may not have put above comment cleary. I will try to put scenario step wise.
1. main channel is created and tls handshake is done for main channel.
2. Destionation side tries to read magic early on main channel in migration_ioc_process_incoming but it is not yet sent by source.
3. Source has written magic to main channel file buffer but it is not yet flushed, it is flushed first time in ram_save_setup, i mean data is sent on channel only if qemu file buffer is full or explicitly flushed.
4. Source side blocks on multifd_send_sync_main in ram_save_setup before flushing qemu file. But multifd_send_sync_main is blocked for sem_sync until handshake is done for multiFD channels.
5. Destination side is still waiting for reading magic on main channel, so unless we return from migration_ioc_process_incoming we can not accept new channel, so handshake of multiFD channel is blocked.
6. So basically source is blocked on multiFD channels handshake before sending data on main channel, but destination is blocked waiting for data before it can acknowledge multiFD channels and do handshake, so it kind of creates a deadlock situation.

I am still not sure if i could put it clearly :)

Thanks

Manish Mishra

> With regards,
> Daniel
manish.mishra Nov. 3, 2022, 6:53 p.m. UTC | #12
On 03/11/22 11:47 pm, manish.mishra wrote:
>
> On 03/11/22 11:27 pm, Daniel P. Berrangé wrote:
>> On Thu, Nov 03, 2022 at 11:06:23PM +0530, manish.mishra wrote:
>>> On 03/11/22 10:57 pm, Daniel P. Berrangé wrote:
>>>> On Thu, Nov 03, 2022 at 10:04:54PM +0530, manish.mishra wrote:
>>>>> On 03/11/22 2:59 pm, Daniel P. Berrangé wrote:
>>>>>> On Thu, Nov 03, 2022 at 02:50:25PM +0530, manish.mishra wrote:
>>>>>>> On 01/11/22 9:15 pm, Daniel P. Berrangé wrote:
>>>>>>>> On Tue, Nov 01, 2022 at 09:10:14PM +0530, manish.mishra wrote:
>>>>>>>>> On 01/11/22 8:21 pm, Daniel P. Berrangé wrote:
>>>>>>>>>> On Tue, Nov 01, 2022 at 02:30:29PM +0000, manish.mishra wrote:
>>>>>>>>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>>>>>>>>> index 739bb683f3..f4b6f278a9 100644
>>>>>>>>>>> --- a/migration/migration.c
>>>>>>>>>>> +++ b/migration/migration.c
>>>>>>>>>>> @@ -733,31 +733,40 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
>>>>>>>>>>>       {
>>>>>>>>>>>           MigrationIncomingState *mis = migration_incoming_get_current();
>>>>>>>>>>>           Error *local_err = NULL;
>>>>>>>>>>> -    bool start_migration;
>>>>>>>>>>>           QEMUFile *f;
>>>>>>>>>>> +    bool default_channel = true;
>>>>>>>>>>> +    uint32_t channel_magic = 0;
>>>>>>>>>>> +    int ret = 0;
>>>>>>>>>>> -    if (!mis->from_src_file) {
>>>>>>>>>>> -        /* The first connection (multifd may have multiple) */
>>>>>>>>>>> +    if (migrate_use_multifd() && !migration_in_postcopy()) {
>>>>>>>>>>> +        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
>>>>>>>>>>> + sizeof(channel_magic), &local_err);
>>>>>>>>>>> +
>>>>>>>>>>> +        if (ret != 1) {
>>>>>>>>>>> +            error_propagate(errp, local_err);
>>>>>>>>>>> +            return;
>>>>>>>>>>> +        }
>>>>>>>>>> ....and thus this will fail for TLS channels AFAICT.
>>>>>>>>> Yes, thanks for quick review Daniel. You pointed this earlier too, sorry missed it, will put another check !migrate_use_tls() in V2.
>>>>>>>> But we need this problem fixed with TLS too, so just excluding it
>>>>>>>> isn't right. IMHO we need to modify the migration code so we can
>>>>>>>> read the magic earlier, instead of peeking.
>>>>>>>>
>>>>>>>>
>>>>>>>> With regards,
>>>>>>>> Daniel
>>>>>>> Hi Daniel, I was trying tls migrations. What i see is that tls session
>>>>>>> creation does handshake. So if we read ahead in ioc_process_incoming
>>>>>>> for default channel. Because client sends magic only after multiFD
>>>>>>> channels are setup, which too requires tls handshake.
>>>>>> By the time we get to migrate_ioc_process_incoming, the TLS handshake
>>>>>> has already been performed.
>>>>>>
>>>>>> migration_channel_process_incoming
>>>>>>        -> migration_ioc_process_incoming
>>>>>>
>>>>>> vs
>>>>>>
>>>>>> migration_channel_process_incoming
>>>>>>        -> migration_tls_channel_process_incoming
>>>>>>            -> migration_tls_incoming_handshake
>>>>>>          -> migration_channel_process_incoming
>>>>>>              ->  migration_ioc_process_incoming
>>>>>>
>>>>> Yes sorry i thought we block on source side till handshake is done but that is not true. I checked then why that deadlock is happening. So this where the dealock is happening.
>>>>>
>>>>> static int ram_save_setup(QEMUFile *f, void *opaque) {
>>>>> +
>>>>> +
>>>>>       ram_control_before_iterate(f, RAM_CONTROL_SETUP);
>>>>>       ram_control_after_iterate(f, RAM_CONTROL_SETUP);
>>>>>
>>>>>       ret =  multifd_send_sync_main(f);
>>>>>       if (ret < 0) {
>>>>>           return ret;
>>>>>       }
>>>>>
>>>>>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>>>>>       qemu_fflush(f);
>>>>>
>>>>>       return 0;
>>>>> }
>>>>>
>>>>> Now if we block in migration_ioc_process_incoming for reading magic
>>>>> value from channel, which is actually sent by client when this
>>>>> qemu_fflush is done. Before this qemu_fflush we wait for
>>>>> multifd_send_sync_main which actually requires that tls handshake is
>>>>> done for multiFD channels as it blocks on sem_sync which posted by
>>>>> multifd_send_thread which is called after handshake||. But then on
>>>>> destination side we are blocked in migration_ioc_process_incoming()
>>>>> waiting to read something from default channel hence handshake for
>>>>> multiFD channels can not happen. This to me looks unresolvable
>>>>> whatever way we try to manipulate stream until we do some changes
>>>>> on source side.
>>>> The TLS handshake is already complete when migration_ioc_process_incoming
>>>> is blocking on read.
>>>>
>>>> Regardless of which channel we're talking about, thue TLS handshake is
>>>> always performed & finished before we try to either send or recv any
>>>> data.
>>> Yes Daniel, agree on that, in this case tls handshake is done for
>>> default channel so we went in migration_ioc_process_incoming for
>>> default channel. But if we try to read some data there, it does not
>>> get because data on default channel is not yet flushed by source.
>>>  From source side data in flushed in above function i pointed. Which
>>> blocks for multiFD channels to be setup with handshake, before
>>> flushing data. I mean data is sent only when buffer is full or
>>> explicitly flushed, till then it is just in buffer. But multiFD
>>> handshake can not complete until we return from
>>> migration_ioc_process_incoming for default channel which infintely
>>> waits because nothing is sent yet on channel.
>> On the source side, if we're in ram_save_setup then the TLS
>> handshake is already complete for the main channel. In fact
>> I think the TLS handshake should act as a serialization
>> point that prevents the entire bug. It should be guaranteed
>> that the main channel is fully received by the dest, and
>> transferring data, before we even get to establishing the
>> multifd channels.
>
>
> Yes, Daniel, tls handshake could make things serielized, but issue is that from source side handshake is done in background with another thread we do not actually block, so it is still possible that multiFD connection is accepted first on destination side.

Oh I see now, tls handshake is done with different thread only for multiFD channel, for main channel handshake is a blocker, so agree this bug should not be possible with tls. So does current patch works with another change that we do not do read peek for tls cases and fall back to older way. Normal read ahead anyway does not work with tls for earlier reason of deadlock.

Thanks

Manish Mishra

>>
>> All we need do is read the magic bytes early, regardless of
>> whether its plain or TLS channel, and it should to the right
>> thing AFAICT.
>>
>
> Yes, but if we try to read early on main channel with tls enabled case it is an issue. Sorry i may not have put above comment cleary. I will try to put scenario step wise.
> 1. main channel is created and tls handshake is done for main channel.
> 2. Destionation side tries to read magic early on main channel in migration_ioc_process_incoming but it is not yet sent by source.
> 3. Source has written magic to main channel file buffer but it is not yet flushed, it is flushed first time in ram_save_setup, i mean data is sent on channel only if qemu file buffer is full or explicitly flushed.
> 4. Source side blocks on multifd_send_sync_main in ram_save_setup before flushing qemu file. But multifd_send_sync_main is blocked for sem_sync until handshake is done for multiFD channels.
> 5. Destination side is still waiting for reading magic on main channel, so unless we return from migration_ioc_process_incoming we can not accept new channel, so handshake of multiFD channel is blocked.
> 6. So basically source is blocked on multiFD channels handshake before sending data on main channel, but destination is blocked waiting for data before it can acknowledge multiFD channels and do handshake, so it kind of creates a deadlock situation.
>
> I am still not sure if i could put it clearly :)
>
> Thanks
>
> Manish Mishra
>
>> With regards,
>> Daniel
>
manish.mishra Nov. 3, 2022, 6:54 p.m. UTC | #13
On 03/11/22 11:47 pm, manish.mishra wrote:
>
> On 03/11/22 11:27 pm, Daniel P. Berrangé wrote:
>> On Thu, Nov 03, 2022 at 11:06:23PM +0530, manish.mishra wrote:
>>> On 03/11/22 10:57 pm, Daniel P. Berrangé wrote:
>>>> On Thu, Nov 03, 2022 at 10:04:54PM +0530, manish.mishra wrote:
>>>>> On 03/11/22 2:59 pm, Daniel P. Berrangé wrote:
>>>>>> On Thu, Nov 03, 2022 at 02:50:25PM +0530, manish.mishra wrote:
>>>>>>> On 01/11/22 9:15 pm, Daniel P. Berrangé wrote:
>>>>>>>> On Tue, Nov 01, 2022 at 09:10:14PM +0530, manish.mishra wrote:
>>>>>>>>> On 01/11/22 8:21 pm, Daniel P. Berrangé wrote:
>>>>>>>>>> On Tue, Nov 01, 2022 at 02:30:29PM +0000, manish.mishra wrote:
>>>>>>>>>>> diff --git a/migration/migration.c b/migration/migration.c
>>>>>>>>>>> index 739bb683f3..f4b6f278a9 100644
>>>>>>>>>>> --- a/migration/migration.c
>>>>>>>>>>> +++ b/migration/migration.c
>>>>>>>>>>> @@ -733,31 +733,40 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
>>>>>>>>>>>       {
>>>>>>>>>>>           MigrationIncomingState *mis = migration_incoming_get_current();
>>>>>>>>>>>           Error *local_err = NULL;
>>>>>>>>>>> -    bool start_migration;
>>>>>>>>>>>           QEMUFile *f;
>>>>>>>>>>> +    bool default_channel = true;
>>>>>>>>>>> +    uint32_t channel_magic = 0;
>>>>>>>>>>> +    int ret = 0;
>>>>>>>>>>> -    if (!mis->from_src_file) {
>>>>>>>>>>> -        /* The first connection (multifd may have multiple) */
>>>>>>>>>>> +    if (migrate_use_multifd() && !migration_in_postcopy()) {
>>>>>>>>>>> +        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
>>>>>>>>>>> + sizeof(channel_magic), &local_err);
>>>>>>>>>>> +
>>>>>>>>>>> +        if (ret != 1) {
>>>>>>>>>>> +            error_propagate(errp, local_err);
>>>>>>>>>>> +            return;
>>>>>>>>>>> +        }
>>>>>>>>>> ....and thus this will fail for TLS channels AFAICT.
>>>>>>>>> Yes, thanks for quick review Daniel. You pointed this earlier too, sorry missed it, will put another check !migrate_use_tls() in V2.
>>>>>>>> But we need this problem fixed with TLS too, so just excluding it
>>>>>>>> isn't right. IMHO we need to modify the migration code so we can
>>>>>>>> read the magic earlier, instead of peeking.
>>>>>>>>
>>>>>>>>
>>>>>>>> With regards,
>>>>>>>> Daniel
>>>>>>> Hi Daniel, I was trying tls migrations. What i see is that tls session
>>>>>>> creation does handshake. So if we read ahead in ioc_process_incoming
>>>>>>> for default channel. Because client sends magic only after multiFD
>>>>>>> channels are setup, which too requires tls handshake.
>>>>>> By the time we get to migrate_ioc_process_incoming, the TLS handshake
>>>>>> has already been performed.
>>>>>>
>>>>>> migration_channel_process_incoming
>>>>>>        -> migration_ioc_process_incoming
>>>>>>
>>>>>> vs
>>>>>>
>>>>>> migration_channel_process_incoming
>>>>>>        -> migration_tls_channel_process_incoming
>>>>>>            -> migration_tls_incoming_handshake
>>>>>>          -> migration_channel_process_incoming
>>>>>>              ->  migration_ioc_process_incoming
>>>>>>
>>>>> Yes sorry i thought we block on source side till handshake is done but that is not true. I checked then why that deadlock is happening. So this where the dealock is happening.
>>>>>
>>>>> static int ram_save_setup(QEMUFile *f, void *opaque) {
>>>>> +
>>>>> +
>>>>>       ram_control_before_iterate(f, RAM_CONTROL_SETUP);
>>>>>       ram_control_after_iterate(f, RAM_CONTROL_SETUP);
>>>>>
>>>>>       ret =  multifd_send_sync_main(f);
>>>>>       if (ret < 0) {
>>>>>           return ret;
>>>>>       }
>>>>>
>>>>>       qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
>>>>>       qemu_fflush(f);
>>>>>
>>>>>       return 0;
>>>>> }
>>>>>
>>>>> Now if we block in migration_ioc_process_incoming for reading magic
>>>>> value from channel, which is actually sent by client when this
>>>>> qemu_fflush is done. Before this qemu_fflush we wait for
>>>>> multifd_send_sync_main which actually requires that tls handshake is
>>>>> done for multiFD channels as it blocks on sem_sync which posted by
>>>>> multifd_send_thread which is called after handshake||. But then on
>>>>> destination side we are blocked in migration_ioc_process_incoming()
>>>>> waiting to read something from default channel hence handshake for
>>>>> multiFD channels can not happen. This to me looks unresolvable
>>>>> whatever way we try to manipulate stream until we do some changes
>>>>> on source side.
>>>> The TLS handshake is already complete when migration_ioc_process_incoming
>>>> is blocking on read.
>>>>
>>>> Regardless of which channel we're talking about, thue TLS handshake is
>>>> always performed & finished before we try to either send or recv any
>>>> data.
>>> Yes Daniel, agree on that, in this case tls handshake is done for
>>> default channel so we went in migration_ioc_process_incoming for
>>> default channel. But if we try to read some data there, it does not
>>> get because data on default channel is not yet flushed by source.
>>>  From source side data in flushed in above function i pointed. Which
>>> blocks for multiFD channels to be setup with handshake, before
>>> flushing data. I mean data is sent only when buffer is full or
>>> explicitly flushed, till then it is just in buffer. But multiFD
>>> handshake can not complete until we return from
>>> migration_ioc_process_incoming for default channel which infintely
>>> waits because nothing is sent yet on channel.
>> On the source side, if we're in ram_save_setup then the TLS
>> handshake is already complete for the main channel. In fact
>> I think the TLS handshake should act as a serialization
>> point that prevents the entire bug. It should be guaranteed
>> that the main channel is fully received by the dest, and
>> transferring data, before we even get to establishing the
>> multifd channels.
>
>
> Yes, Daniel, tls handshake could make things serielized, but issue is that from source side handshake is done in background with another thread we do not actually block, so it is still possible that multiFD connection is accepted first on destination side.


Oh I see now, tls handshake is done with different thread only for multiFD channel, for main channel handshake is a blocker, so agree this bug should not be possible with tls. So does current patch works with another change that we do not do read peek for tls cases and fall back to older way. Normal read ahead anyway does not work with tls for earlier reason of deadlock.

Thanks

Manish Mishra

>
>>
>> All we need do is read the magic bytes early, regardless of
>> whether its plain or TLS channel, and it should to the right
>> thing AFAICT.
>>
>
> Yes, but if we try to read early on main channel with tls enabled case it is an issue. Sorry i may not have put above comment cleary. I will try to put scenario step wise.
> 1. main channel is created and tls handshake is done for main channel.
> 2. Destionation side tries to read magic early on main channel in migration_ioc_process_incoming but it is not yet sent by source.
> 3. Source has written magic to main channel file buffer but it is not yet flushed, it is flushed first time in ram_save_setup, i mean data is sent on channel only if qemu file buffer is full or explicitly flushed.
> 4. Source side blocks on multifd_send_sync_main in ram_save_setup before flushing qemu file. But multifd_send_sync_main is blocked for sem_sync until handshake is done for multiFD channels.
> 5. Destination side is still waiting for reading magic on main channel, so unless we return from migration_ioc_process_incoming we can not accept new channel, so handshake of multiFD channel is blocked.
> 6. So basically source is blocked on multiFD channels handshake before sending data on main channel, but destination is blocked waiting for data before it can acknowledge multiFD channels and do handshake, so it kind of creates a deadlock situation.
>
> I am still not sure if i could put it clearly :)
>
> Thanks
>
> Manish Mishra
>
>> With regards,
>> Daniel
>
Peter Xu Nov. 14, 2022, 4:51 p.m. UTC | #14
Manish,

On Thu, Nov 03, 2022 at 11:47:51PM +0530, manish.mishra wrote:
> Yes, but if we try to read early on main channel with tls enabled case it is an issue. Sorry i may not have put above comment cleary. I will try to put scenario step wise.
> 1. main channel is created and tls handshake is done for main channel.
> 2. Destionation side tries to read magic early on main channel in migration_ioc_process_incoming but it is not yet sent by source.
> 3. Source has written magic to main channel file buffer but it is not yet flushed, it is flushed first time in ram_save_setup, i mean data is sent on channel only if qemu file buffer is full or explicitly flushed.
> 4. Source side blocks on multifd_send_sync_main in ram_save_setup before flushing qemu file. But multifd_send_sync_main is blocked for sem_sync until handshake is done for multiFD channels.
> 5. Destination side is still waiting for reading magic on main channel, so unless we return from migration_ioc_process_incoming we can not accept new channel, so handshake of multiFD channel is blocked.
> 6. So basically source is blocked on multiFD channels handshake before sending data on main channel, but destination is blocked waiting for data before it can acknowledge multiFD channels and do handshake, so it kind of creates a deadlock situation.

Why is this issue only happening with TLS?  It sounds like it'll happen as
long as multifd enabled.

I'm also thinking whether we should flush in qemu_savevm_state_header() so
at least upgraded src qemu will always flush the headers if it never hurts.
manish.mishra Nov. 15, 2022, 7:07 a.m. UTC | #15
Thanks Peter

On 14/11/22 10:21 pm, Peter Xu wrote:
> Manish,
>
> On Thu, Nov 03, 2022 at 11:47:51PM +0530, manish.mishra wrote:
>> Yes, but if we try to read early on main channel with tls enabled case it is an issue. Sorry i may not have put above comment cleary. I will try to put scenario step wise.
>> 1. main channel is created and tls handshake is done for main channel.
>> 2. Destionation side tries to read magic early on main channel in migration_ioc_process_incoming but it is not yet sent by source.
>> 3. Source has written magic to main channel file buffer but it is not yet flushed, it is flushed first time in ram_save_setup, i mean data is sent on channel only if qemu file buffer is full or explicitly flushed.
>> 4. Source side blocks on multifd_send_sync_main in ram_save_setup before flushing qemu file. But multifd_send_sync_main is blocked for sem_sync until handshake is done for multiFD channels.
>> 5. Destination side is still waiting for reading magic on main channel, so unless we return from migration_ioc_process_incoming we can not accept new channel, so handshake of multiFD channel is blocked.
>> 6. So basically source is blocked on multiFD channels handshake before sending data on main channel, but destination is blocked waiting for data before it can acknowledge multiFD channels and do handshake, so it kind of creates a deadlock situation.
> Why is this issue only happening with TLS?  It sounds like it'll happen as
> long as multifd enabled.


Actually this was happening with tls because with tls we do handshake, so a connection is assumed establised only after a tls handshake and we flush data from source only after all channels are established,  but with normal live migration even if connection is not accepted on destination side we can continue as we do not do any handshake. Basically in normal live migration a connection is assumed established if connect() call was successful even if it is not accepted/ack by destination, so that's why this deadlock was not hapening.

>
> I'm also thinking whether we should flush in qemu_savevm_state_header() so
> at least upgraded src qemu will always flush the headers if it never hurts.

yes sure Peter.
>
Thanks

Manish Mishra
diff mbox series

Patch

diff --git a/include/io/channel.h b/include/io/channel.h
index c680ee7480..74177aeeea 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -115,6 +115,10 @@  struct QIOChannelClass {
                         int **fds,
                         size_t *nfds,
                         Error **errp);
+    ssize_t (*io_read_peek)(QIOChannel *ioc,
+                            void *buf,
+                            size_t nbytes,
+                            Error **errp);
     int (*io_close)(QIOChannel *ioc,
                     Error **errp);
     GSource * (*io_create_watch)(QIOChannel *ioc,
@@ -475,6 +479,27 @@  int qio_channel_write_all(QIOChannel *ioc,
                           size_t buflen,
                           Error **errp);
 
+/**
+ * qio_channel_read_peek_all:
+ * @ioc: the channel object
+ * @buf: the memory region to read in data
+ * @nbytes: the number of bytes to read
+ * @errp: pointer to a NULL-initialized error object
+ *
+ * Read given @nbytes data from peek of channel into
+ * memory region @buf.
+ *
+ * The function will be blocked until read size is
+ * equal to requested size.
+ *
+ * Returns: 1 if all bytes were read, 0 if end-of-file
+ *          occurs without data, or -1 on error
+ */
+int qio_channel_read_peek_all(QIOChannel *ioc,
+                              void* buf,
+                              size_t nbytes,
+                              Error **errp);
+
 /**
  * qio_channel_set_blocking:
  * @ioc: the channel object
diff --git a/io/channel-socket.c b/io/channel-socket.c
index b76dca9cc1..b99f5dfda6 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -705,6 +705,32 @@  static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
 }
 #endif /* WIN32 */
 
+static ssize_t qio_channel_socket_read_peek(QIOChannel *ioc,
+                                            void *buf,
+                                            size_t nbytes,
+                                            Error **errp)
+{
+    QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
+    ssize_t bytes = 0;
+
+retry:
+    bytes = recv(sioc->fd, buf, nbytes, MSG_PEEK);
+
+    if (bytes < 0) {
+        if (errno == EINTR) {
+            goto retry;
+        }
+        if (errno == EAGAIN) {
+            return QIO_CHANNEL_ERR_BLOCK;
+        }
+
+        error_setg_errno(errp, errno,
+                         "Unable to read from peek of socket");
+        return -1;
+    }
+
+    return bytes;
+}
 
 #ifdef QEMU_MSG_ZEROCOPY
 static int qio_channel_socket_flush(QIOChannel *ioc,
@@ -902,6 +928,7 @@  static void qio_channel_socket_class_init(ObjectClass *klass,
 
     ioc_klass->io_writev = qio_channel_socket_writev;
     ioc_klass->io_readv = qio_channel_socket_readv;
+    ioc_klass->io_read_peek = qio_channel_socket_read_peek;
     ioc_klass->io_set_blocking = qio_channel_socket_set_blocking;
     ioc_klass->io_close = qio_channel_socket_close;
     ioc_klass->io_shutdown = qio_channel_socket_shutdown;
diff --git a/io/channel.c b/io/channel.c
index 0640941ac5..a2d9b96f3f 100644
--- a/io/channel.c
+++ b/io/channel.c
@@ -346,6 +346,45 @@  int qio_channel_write_all(QIOChannel *ioc,
     return qio_channel_writev_all(ioc, &iov, 1, errp);
 }
 
+int qio_channel_read_peek_all(QIOChannel *ioc,
+                              void* buf,
+                              size_t nbytes,
+                              Error **errp)
+{
+   QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
+   ssize_t bytes = 0;
+
+   if (!klass->io_read_peek) {
+       error_setg(errp, "Channel does not support read peek");
+       return -1;
+   }
+
+   while (bytes < nbytes) {
+       bytes = klass->io_read_peek(ioc,
+                                   buf,
+                                   nbytes,
+                                   errp);
+
+       if (bytes == QIO_CHANNEL_ERR_BLOCK) {
+            if (qemu_in_coroutine()) {
+                qio_channel_yield(ioc, G_IO_OUT);
+            } else {
+                qio_channel_wait(ioc, G_IO_OUT);
+            }
+            continue;
+       }
+       if (bytes == 0) {
+           error_setg(errp,
+                      "Unexpected end-of-file on channel");
+           return 0;
+       }
+       if (bytes < 0) {
+           return -1;
+       }
+   }
+
+   return 1;
+}
 
 int qio_channel_set_blocking(QIOChannel *ioc,
                               bool enabled,
diff --git a/migration/migration.c b/migration/migration.c
index 739bb683f3..f4b6f278a9 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -733,31 +733,40 @@  void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
 {
     MigrationIncomingState *mis = migration_incoming_get_current();
     Error *local_err = NULL;
-    bool start_migration;
     QEMUFile *f;
+    bool default_channel = true;
+    uint32_t channel_magic = 0;
+    int ret = 0;
 
-    if (!mis->from_src_file) {
-        /* The first connection (multifd may have multiple) */
+    if (migrate_use_multifd() && !migration_in_postcopy()) {
+        ret = qio_channel_read_peek_all(ioc, (void *)&channel_magic,
+                                        sizeof(channel_magic), &local_err);
+
+        if (ret != 1) {
+            error_propagate(errp, local_err);
+            return;
+        }
+
+        default_channel = (channel_magic == cpu_to_be32(QEMU_VM_FILE_MAGIC));
+    } else {
+        default_channel = !mis->from_src_file;
+    }
+
+    if (default_channel) {
         f = qemu_file_new_input(ioc);
 
         if (!migration_incoming_setup(f, errp)) {
             return;
         }
-
-        /*
-         * Common migration only needs one channel, so we can start
-         * right now.  Some features need more than one channel, we wait.
-         */
-        start_migration = !migration_needs_multiple_sockets();
     } else {
         /* Multiple connections */
         assert(migration_needs_multiple_sockets());
         if (migrate_use_multifd()) {
-            start_migration = multifd_recv_new_channel(ioc, &local_err);
+            multifd_recv_new_channel(ioc, &local_err);
         } else {
             assert(migrate_postcopy_preempt());
             f = qemu_file_new_input(ioc);
-            start_migration = postcopy_preempt_new_channel(mis, f);
+            postcopy_preempt_new_channel(mis, f);
         }
         if (local_err) {
             error_propagate(errp, local_err);
@@ -765,7 +774,7 @@  void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
         }
     }
 
-    if (start_migration) {
+    if (migration_has_all_channels()) {
         /* If it's a recovery, we're done */
         if (postcopy_try_recover()) {
             return;
diff --git a/migration/multifd.c b/migration/multifd.c
index 586ddc9d65..be86a4d07f 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -1220,11 +1220,9 @@  bool multifd_recv_all_channels_created(void)
 
 /*
  * Try to receive all multifd channels to get ready for the migration.
- * - Return true and do not set @errp when correctly receiving all channels;
- * - Return false and do not set @errp when correctly receiving the current one;
- * - Return false and set @errp when failing to receive the current channel.
+ * Sets @errp when failing to receive the current channel.
  */
-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
 {
     MultiFDRecvParams *p;
     Error *local_err = NULL;
@@ -1237,7 +1235,7 @@  bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
                                 "failed to receive packet"
                                 " via multifd channel %d: ",
                                 qatomic_read(&multifd_recv_state->count));
-        return false;
+        return;
     }
     trace_multifd_recv_new_channel(id);
 
@@ -1247,7 +1245,7 @@  bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
                    id);
         multifd_recv_terminate_threads(local_err);
         error_propagate(errp, local_err);
-        return false;
+        return;
     }
     p->c = ioc;
     object_ref(OBJECT(ioc));
@@ -1258,6 +1256,4 @@  bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp)
     qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
                        QEMU_THREAD_JOINABLE);
     qatomic_inc(&multifd_recv_state->count);
-    return qatomic_read(&multifd_recv_state->count) ==
-           migrate_multifd_channels();
 }
diff --git a/migration/multifd.h b/migration/multifd.h
index 519f498643..913e4ba274 100644
--- a/migration/multifd.h
+++ b/migration/multifd.h
@@ -18,7 +18,7 @@  void multifd_save_cleanup(void);
 int multifd_load_setup(Error **errp);
 int multifd_load_cleanup(Error **errp);
 bool multifd_recv_all_channels_created(void);
-bool multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
+void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
 void multifd_recv_sync_main(void);
 int multifd_send_sync_main(QEMUFile *f);
 int multifd_queue_page(QEMUFile *f, RAMBlock *block, ram_addr_t offset);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index b9a37ef255..f84f783ab4 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -1539,7 +1539,7 @@  void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd)
     }
 }
 
-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
 {
     /*
      * The new loading channel has its own threads, so it needs to be
@@ -1548,9 +1548,6 @@  bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file)
     qemu_file_set_blocking(file, true);
     mis->postcopy_qemufile_dst = file;
     trace_postcopy_preempt_new_channel();
-
-    /* Start the migration immediately */
-    return true;
 }
 
 /*
diff --git a/migration/postcopy-ram.h b/migration/postcopy-ram.h
index 6147bf7d1d..25881c4127 100644
--- a/migration/postcopy-ram.h
+++ b/migration/postcopy-ram.h
@@ -190,7 +190,7 @@  enum PostcopyChannels {
     RAM_CHANNEL_MAX,
 };
 
-bool postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
+void postcopy_preempt_new_channel(MigrationIncomingState *mis, QEMUFile *file);
 int postcopy_preempt_setup(MigrationState *s, Error **errp);
 int postcopy_preempt_wait_channel(MigrationState *s);