Message ID | 20240202102857.110210-6-peterx@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | migration/multifd: Refactor ->send_prepare() and cleanups | expand |
On Fri, Feb 2, 2024 at 2:30 AM <peterx@redhat.com> wrote: > > From: Peter Xu <peterx@redhat.com> > > This array is redundant when p->pages exists. Now we extended the life of > p->pages to the whole period where pending_job is set, it should be safe to > always use p->pages->offset[] rather than p->normal[]. Drop the array. > > Alongside, the normal_num is also redundant, which is the same to > p->pages->num. Can we not drop p->normal and p_normal_num? It is redundant now but I think it will be needed for multifd zero page checking. In multifd zero page, we find out all zero pages and we sort the normal pages and zero pages in two seperate arrays. p->offset is the original array of pages, p->normal will contain the array of normal pages and p->zero will contain the array of zero pages. > > This doesn't apply to recv side, because there's no extra buffering on recv > side, so p->normal[] array is still needed. > > Reviewed-by: Fabiano Rosas <farosas@suse.de> > Signed-off-by: Peter Xu <peterx@redhat.com> > --- > migration/multifd.h | 4 ---- > migration/multifd-zlib.c | 7 ++++--- > migration/multifd-zstd.c | 7 ++++--- > migration/multifd.c | 33 +++++++++++++-------------------- > 4 files changed, 21 insertions(+), 30 deletions(-) > > diff --git a/migration/multifd.h b/migration/multifd.h > index 7c040cb85a..3920bdbcf1 100644 > --- a/migration/multifd.h > +++ b/migration/multifd.h > @@ -122,10 +122,6 @@ typedef struct { > struct iovec *iov; > /* number of iovs used */ > uint32_t iovs_num; > - /* Pages that are not zero */ > - ram_addr_t *normal; > - /* num of non zero pages */ > - uint32_t normal_num; > /* used for compression methods */ > void *data; > } MultiFDSendParams; > diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c > index 37ce48621e..100809abc1 100644 > --- a/migration/multifd-zlib.c > +++ b/migration/multifd-zlib.c > @@ -116,17 +116,18 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) > */ > static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) > { > + MultiFDPages_t *pages = p->pages; > struct zlib_data *z = p->data; > z_stream *zs = &z->zs; > uint32_t out_size = 0; > int ret; > uint32_t i; > > - for (i = 0; i < p->normal_num; i++) { > + for (i = 0; i < pages->num; i++) { > uint32_t available = z->zbuff_len - out_size; > int flush = Z_NO_FLUSH; > > - if (i == p->normal_num - 1) { > + if (i == pages->num - 1) { > flush = Z_SYNC_FLUSH; > } > > @@ -135,7 +136,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) > * with compression. zlib does not guarantee that this is safe, > * therefore copy the page before calling deflate(). > */ > - memcpy(z->buf, p->pages->block->host + p->normal[i], p->page_size); > + memcpy(z->buf, p->pages->block->host + pages->offset[i], p->page_size); > zs->avail_in = p->page_size; > zs->next_in = z->buf; > > diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c > index b471daadcd..2023edd8cc 100644 > --- a/migration/multifd-zstd.c > +++ b/migration/multifd-zstd.c > @@ -113,6 +113,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) > */ > static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) > { > + MultiFDPages_t *pages = p->pages; > struct zstd_data *z = p->data; > int ret; > uint32_t i; > @@ -121,13 +122,13 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) > z->out.size = z->zbuff_len; > z->out.pos = 0; > > - for (i = 0; i < p->normal_num; i++) { > + for (i = 0; i < pages->num; i++) { > ZSTD_EndDirective flush = ZSTD_e_continue; > > - if (i == p->normal_num - 1) { > + if (i == pages->num - 1) { > flush = ZSTD_e_flush; > } > - z->in.src = p->pages->block->host + p->normal[i]; > + z->in.src = p->pages->block->host + pages->offset[i]; > z->in.size = p->page_size; > z->in.pos = 0; > > diff --git a/migration/multifd.c b/migration/multifd.c > index 5633ac245a..8bb1fd95cf 100644 > --- a/migration/multifd.c > +++ b/migration/multifd.c > @@ -90,13 +90,13 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) > { > MultiFDPages_t *pages = p->pages; > > - for (int i = 0; i < p->normal_num; i++) { > - p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i]; > + for (int i = 0; i < pages->num; i++) { > + p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; > p->iov[p->iovs_num].iov_len = p->page_size; > p->iovs_num++; > } > > - p->next_packet_size = p->normal_num * p->page_size; > + p->next_packet_size = pages->num * p->page_size; > p->flags |= MULTIFD_FLAG_NOCOMP; > return 0; > } > @@ -269,21 +269,22 @@ static void multifd_pages_clear(MultiFDPages_t *pages) > static void multifd_send_fill_packet(MultiFDSendParams *p) > { > MultiFDPacket_t *packet = p->packet; > + MultiFDPages_t *pages = p->pages; > int i; > > packet->flags = cpu_to_be32(p->flags); > packet->pages_alloc = cpu_to_be32(p->pages->allocated); > - packet->normal_pages = cpu_to_be32(p->normal_num); > + packet->normal_pages = cpu_to_be32(pages->num); > packet->next_packet_size = cpu_to_be32(p->next_packet_size); > packet->packet_num = cpu_to_be64(p->packet_num); > > - if (p->pages->block) { > - strncpy(packet->ramblock, p->pages->block->idstr, 256); > + if (pages->block) { > + strncpy(packet->ramblock, pages->block->idstr, 256); > } > > - for (i = 0; i < p->normal_num; i++) { > + for (i = 0; i < pages->num; i++) { > /* there are architectures where ram_addr_t is 32 bit */ > - uint64_t temp = p->normal[i]; > + uint64_t temp = pages->offset[i]; > > packet->offset[i] = cpu_to_be64(temp); > } > @@ -570,8 +571,6 @@ void multifd_save_cleanup(void) > p->packet = NULL; > g_free(p->iov); > p->iov = NULL; > - g_free(p->normal); > - p->normal = NULL; > multifd_send_state->ops->send_cleanup(p, &local_err); > if (local_err) { > migrate_set_error(migrate_get_current(), local_err); > @@ -688,8 +687,8 @@ static void *multifd_send_thread(void *opaque) > > if (p->pending_job) { > uint64_t packet_num = p->packet_num; > + MultiFDPages_t *pages = p->pages; > uint32_t flags; > - p->normal_num = 0; > > if (use_zero_copy_send) { > p->iovs_num = 0; > @@ -697,12 +696,7 @@ static void *multifd_send_thread(void *opaque) > p->iovs_num = 1; > } > > - for (int i = 0; i < p->pages->num; i++) { > - p->normal[p->normal_num] = p->pages->offset[i]; > - p->normal_num++; > - } > - > - if (p->normal_num) { > + if (pages->num) { > ret = multifd_send_state->ops->send_prepare(p, &local_err); > if (ret != 0) { > qemu_mutex_unlock(&p->mutex); > @@ -713,10 +707,10 @@ static void *multifd_send_thread(void *opaque) > flags = p->flags; > p->flags = 0; > p->num_packets++; > - p->total_normal_pages += p->normal_num; > + p->total_normal_pages += pages->num; > qemu_mutex_unlock(&p->mutex); > > - trace_multifd_send(p->id, packet_num, p->normal_num, flags, > + trace_multifd_send(p->id, packet_num, pages->num, flags, > p->next_packet_size); > > if (use_zero_copy_send) { > @@ -924,7 +918,6 @@ int multifd_save_setup(Error **errp) > p->name = g_strdup_printf("multifdsend_%d", i); > /* We need one extra place for the packet header */ > p->iov = g_new0(struct iovec, page_count + 1); > - p->normal = g_new0(ram_addr_t, page_count); > p->page_size = qemu_target_page_size(); > p->page_count = page_count; > > -- > 2.43.0 >
Hao Xiang <hao.xiang@bytedance.com> writes: > On Fri, Feb 2, 2024 at 2:30 AM <peterx@redhat.com> wrote: >> >> From: Peter Xu <peterx@redhat.com> >> >> This array is redundant when p->pages exists. Now we extended the life of >> p->pages to the whole period where pending_job is set, it should be safe to >> always use p->pages->offset[] rather than p->normal[]. Drop the array. >> >> Alongside, the normal_num is also redundant, which is the same to >> p->pages->num. > > Can we not drop p->normal and p_normal_num? It is redundant now but I > think it will be needed for multifd zero page checking. In multifd > zero page, we find out all zero pages and we sort the normal pages and > zero pages in two seperate arrays. p->offset is the original array of > pages, p->normal will contain the array of normal pages and p->zero > will contain the array of zero pages. We're moving send_fill_packet into send_prepare(), so you should be able to do whatever data transformation at send_prepare() and add any fields you need into p->pages. If we keep p->normal we will not be able to switch into an opaque payload later on. There should be no mention of pages outside of hooks. This is long-term work, but let's avoid blocking it if possible. >> >> This doesn't apply to recv side, because there's no extra buffering on recv >> side, so p->normal[] array is still needed. >> >> Reviewed-by: Fabiano Rosas <farosas@suse.de> >> Signed-off-by: Peter Xu <peterx@redhat.com> >> --- >> migration/multifd.h | 4 ---- >> migration/multifd-zlib.c | 7 ++++--- >> migration/multifd-zstd.c | 7 ++++--- >> migration/multifd.c | 33 +++++++++++++-------------------- >> 4 files changed, 21 insertions(+), 30 deletions(-) >> >> diff --git a/migration/multifd.h b/migration/multifd.h >> index 7c040cb85a..3920bdbcf1 100644 >> --- a/migration/multifd.h >> +++ b/migration/multifd.h >> @@ -122,10 +122,6 @@ typedef struct { >> struct iovec *iov; >> /* number of iovs used */ >> uint32_t iovs_num; >> - /* Pages that are not zero */ >> - ram_addr_t *normal; >> - /* num of non zero pages */ >> - uint32_t normal_num; >> /* used for compression methods */ >> void *data; >> } MultiFDSendParams; >> diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c >> index 37ce48621e..100809abc1 100644 >> --- a/migration/multifd-zlib.c >> +++ b/migration/multifd-zlib.c >> @@ -116,17 +116,18 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) >> */ >> static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) >> { >> + MultiFDPages_t *pages = p->pages; >> struct zlib_data *z = p->data; >> z_stream *zs = &z->zs; >> uint32_t out_size = 0; >> int ret; >> uint32_t i; >> >> - for (i = 0; i < p->normal_num; i++) { >> + for (i = 0; i < pages->num; i++) { >> uint32_t available = z->zbuff_len - out_size; >> int flush = Z_NO_FLUSH; >> >> - if (i == p->normal_num - 1) { >> + if (i == pages->num - 1) { >> flush = Z_SYNC_FLUSH; >> } >> >> @@ -135,7 +136,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) >> * with compression. zlib does not guarantee that this is safe, >> * therefore copy the page before calling deflate(). >> */ >> - memcpy(z->buf, p->pages->block->host + p->normal[i], p->page_size); >> + memcpy(z->buf, p->pages->block->host + pages->offset[i], p->page_size); >> zs->avail_in = p->page_size; >> zs->next_in = z->buf; >> >> diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c >> index b471daadcd..2023edd8cc 100644 >> --- a/migration/multifd-zstd.c >> +++ b/migration/multifd-zstd.c >> @@ -113,6 +113,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) >> */ >> static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) >> { >> + MultiFDPages_t *pages = p->pages; >> struct zstd_data *z = p->data; >> int ret; >> uint32_t i; >> @@ -121,13 +122,13 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) >> z->out.size = z->zbuff_len; >> z->out.pos = 0; >> >> - for (i = 0; i < p->normal_num; i++) { >> + for (i = 0; i < pages->num; i++) { >> ZSTD_EndDirective flush = ZSTD_e_continue; >> >> - if (i == p->normal_num - 1) { >> + if (i == pages->num - 1) { >> flush = ZSTD_e_flush; >> } >> - z->in.src = p->pages->block->host + p->normal[i]; >> + z->in.src = p->pages->block->host + pages->offset[i]; >> z->in.size = p->page_size; >> z->in.pos = 0; >> >> diff --git a/migration/multifd.c b/migration/multifd.c >> index 5633ac245a..8bb1fd95cf 100644 >> --- a/migration/multifd.c >> +++ b/migration/multifd.c >> @@ -90,13 +90,13 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) >> { >> MultiFDPages_t *pages = p->pages; >> >> - for (int i = 0; i < p->normal_num; i++) { >> - p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i]; >> + for (int i = 0; i < pages->num; i++) { >> + p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; >> p->iov[p->iovs_num].iov_len = p->page_size; >> p->iovs_num++; >> } >> >> - p->next_packet_size = p->normal_num * p->page_size; >> + p->next_packet_size = pages->num * p->page_size; >> p->flags |= MULTIFD_FLAG_NOCOMP; >> return 0; >> } >> @@ -269,21 +269,22 @@ static void multifd_pages_clear(MultiFDPages_t *pages) >> static void multifd_send_fill_packet(MultiFDSendParams *p) >> { >> MultiFDPacket_t *packet = p->packet; >> + MultiFDPages_t *pages = p->pages; >> int i; >> >> packet->flags = cpu_to_be32(p->flags); >> packet->pages_alloc = cpu_to_be32(p->pages->allocated); >> - packet->normal_pages = cpu_to_be32(p->normal_num); >> + packet->normal_pages = cpu_to_be32(pages->num); >> packet->next_packet_size = cpu_to_be32(p->next_packet_size); >> packet->packet_num = cpu_to_be64(p->packet_num); >> >> - if (p->pages->block) { >> - strncpy(packet->ramblock, p->pages->block->idstr, 256); >> + if (pages->block) { >> + strncpy(packet->ramblock, pages->block->idstr, 256); >> } >> >> - for (i = 0; i < p->normal_num; i++) { >> + for (i = 0; i < pages->num; i++) { >> /* there are architectures where ram_addr_t is 32 bit */ >> - uint64_t temp = p->normal[i]; >> + uint64_t temp = pages->offset[i]; >> >> packet->offset[i] = cpu_to_be64(temp); >> } >> @@ -570,8 +571,6 @@ void multifd_save_cleanup(void) >> p->packet = NULL; >> g_free(p->iov); >> p->iov = NULL; >> - g_free(p->normal); >> - p->normal = NULL; >> multifd_send_state->ops->send_cleanup(p, &local_err); >> if (local_err) { >> migrate_set_error(migrate_get_current(), local_err); >> @@ -688,8 +687,8 @@ static void *multifd_send_thread(void *opaque) >> >> if (p->pending_job) { >> uint64_t packet_num = p->packet_num; >> + MultiFDPages_t *pages = p->pages; >> uint32_t flags; >> - p->normal_num = 0; >> >> if (use_zero_copy_send) { >> p->iovs_num = 0; >> @@ -697,12 +696,7 @@ static void *multifd_send_thread(void *opaque) >> p->iovs_num = 1; >> } >> >> - for (int i = 0; i < p->pages->num; i++) { >> - p->normal[p->normal_num] = p->pages->offset[i]; >> - p->normal_num++; >> - } >> - >> - if (p->normal_num) { >> + if (pages->num) { >> ret = multifd_send_state->ops->send_prepare(p, &local_err); >> if (ret != 0) { >> qemu_mutex_unlock(&p->mutex); >> @@ -713,10 +707,10 @@ static void *multifd_send_thread(void *opaque) >> flags = p->flags; >> p->flags = 0; >> p->num_packets++; >> - p->total_normal_pages += p->normal_num; >> + p->total_normal_pages += pages->num; >> qemu_mutex_unlock(&p->mutex); >> >> - trace_multifd_send(p->id, packet_num, p->normal_num, flags, >> + trace_multifd_send(p->id, packet_num, pages->num, flags, >> p->next_packet_size); >> >> if (use_zero_copy_send) { >> @@ -924,7 +918,6 @@ int multifd_save_setup(Error **errp) >> p->name = g_strdup_printf("multifdsend_%d", i); >> /* We need one extra place for the packet header */ >> p->iov = g_new0(struct iovec, page_count + 1); >> - p->normal = g_new0(ram_addr_t, page_count); >> p->page_size = qemu_target_page_size(); >> p->page_count = page_count; >> >> -- >> 2.43.0 >>
On Fri, Feb 9, 2024 at 4:20 AM Fabiano Rosas <farosas@suse.de> wrote: > > Hao Xiang <hao.xiang@bytedance.com> writes: > > > On Fri, Feb 2, 2024 at 2:30 AM <peterx@redhat.com> wrote: > >> > >> From: Peter Xu <peterx@redhat.com> > >> > >> This array is redundant when p->pages exists. Now we extended the life of > >> p->pages to the whole period where pending_job is set, it should be safe to > >> always use p->pages->offset[] rather than p->normal[]. Drop the array. > >> > >> Alongside, the normal_num is also redundant, which is the same to > >> p->pages->num. > > > > Can we not drop p->normal and p_normal_num? It is redundant now but I > > think it will be needed for multifd zero page checking. In multifd > > zero page, we find out all zero pages and we sort the normal pages and > > zero pages in two seperate arrays. p->offset is the original array of > > pages, p->normal will contain the array of normal pages and p->zero > > will contain the array of zero pages. > > We're moving send_fill_packet into send_prepare(), so you should be able > to do whatever data transformation at send_prepare() and add any fields > you need into p->pages. > > If we keep p->normal we will not be able to switch into an opaque > payload later on. There should be no mention of pages outside of > hooks. This is long-term work, but let's avoid blocking it if possible. > Got it. I will make the proper changes. Aside from that, I would like to get opinions from you guys regarding zero page detection interface. Here are the options I am thinking: 1) Do zero page detection in send_prepare(). This means no dedicated hook for zero_page_detection() otherwise we will be calling a hook from inside a hook. But we will need a new function multifd_zero_page_check_send() similar to how we use multifd_send_fill_packet() now. multifd_zero_page_check_send() will need to be called by all send_prepare() implementations. 2) Do zero page detection in a new hook zero_page_detection(). zero_page_detection will be called before send_prepare(). Seems like extra complexity but I can go with that routine if you guys think it's a cleaner way. I am leaning towards 1) right now. > >> > >> This doesn't apply to recv side, because there's no extra buffering on recv > >> side, so p->normal[] array is still needed. > >> > >> Reviewed-by: Fabiano Rosas <farosas@suse.de> > >> Signed-off-by: Peter Xu <peterx@redhat.com> > >> --- > >> migration/multifd.h | 4 ---- > >> migration/multifd-zlib.c | 7 ++++--- > >> migration/multifd-zstd.c | 7 ++++--- > >> migration/multifd.c | 33 +++++++++++++-------------------- > >> 4 files changed, 21 insertions(+), 30 deletions(-) > >> > >> diff --git a/migration/multifd.h b/migration/multifd.h > >> index 7c040cb85a..3920bdbcf1 100644 > >> --- a/migration/multifd.h > >> +++ b/migration/multifd.h > >> @@ -122,10 +122,6 @@ typedef struct { > >> struct iovec *iov; > >> /* number of iovs used */ > >> uint32_t iovs_num; > >> - /* Pages that are not zero */ > >> - ram_addr_t *normal; > >> - /* num of non zero pages */ > >> - uint32_t normal_num; > >> /* used for compression methods */ > >> void *data; > >> } MultiFDSendParams; > >> diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c > >> index 37ce48621e..100809abc1 100644 > >> --- a/migration/multifd-zlib.c > >> +++ b/migration/multifd-zlib.c > >> @@ -116,17 +116,18 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) > >> */ > >> static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) > >> { > >> + MultiFDPages_t *pages = p->pages; > >> struct zlib_data *z = p->data; > >> z_stream *zs = &z->zs; > >> uint32_t out_size = 0; > >> int ret; > >> uint32_t i; > >> > >> - for (i = 0; i < p->normal_num; i++) { > >> + for (i = 0; i < pages->num; i++) { > >> uint32_t available = z->zbuff_len - out_size; > >> int flush = Z_NO_FLUSH; > >> > >> - if (i == p->normal_num - 1) { > >> + if (i == pages->num - 1) { > >> flush = Z_SYNC_FLUSH; > >> } > >> > >> @@ -135,7 +136,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) > >> * with compression. zlib does not guarantee that this is safe, > >> * therefore copy the page before calling deflate(). > >> */ > >> - memcpy(z->buf, p->pages->block->host + p->normal[i], p->page_size); > >> + memcpy(z->buf, p->pages->block->host + pages->offset[i], p->page_size); > >> zs->avail_in = p->page_size; > >> zs->next_in = z->buf; > >> > >> diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c > >> index b471daadcd..2023edd8cc 100644 > >> --- a/migration/multifd-zstd.c > >> +++ b/migration/multifd-zstd.c > >> @@ -113,6 +113,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) > >> */ > >> static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) > >> { > >> + MultiFDPages_t *pages = p->pages; > >> struct zstd_data *z = p->data; > >> int ret; > >> uint32_t i; > >> @@ -121,13 +122,13 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) > >> z->out.size = z->zbuff_len; > >> z->out.pos = 0; > >> > >> - for (i = 0; i < p->normal_num; i++) { > >> + for (i = 0; i < pages->num; i++) { > >> ZSTD_EndDirective flush = ZSTD_e_continue; > >> > >> - if (i == p->normal_num - 1) { > >> + if (i == pages->num - 1) { > >> flush = ZSTD_e_flush; > >> } > >> - z->in.src = p->pages->block->host + p->normal[i]; > >> + z->in.src = p->pages->block->host + pages->offset[i]; > >> z->in.size = p->page_size; > >> z->in.pos = 0; > >> > >> diff --git a/migration/multifd.c b/migration/multifd.c > >> index 5633ac245a..8bb1fd95cf 100644 > >> --- a/migration/multifd.c > >> +++ b/migration/multifd.c > >> @@ -90,13 +90,13 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) > >> { > >> MultiFDPages_t *pages = p->pages; > >> > >> - for (int i = 0; i < p->normal_num; i++) { > >> - p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i]; > >> + for (int i = 0; i < pages->num; i++) { > >> + p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; > >> p->iov[p->iovs_num].iov_len = p->page_size; > >> p->iovs_num++; > >> } > >> > >> - p->next_packet_size = p->normal_num * p->page_size; > >> + p->next_packet_size = pages->num * p->page_size; > >> p->flags |= MULTIFD_FLAG_NOCOMP; > >> return 0; > >> } > >> @@ -269,21 +269,22 @@ static void multifd_pages_clear(MultiFDPages_t *pages) > >> static void multifd_send_fill_packet(MultiFDSendParams *p) > >> { > >> MultiFDPacket_t *packet = p->packet; > >> + MultiFDPages_t *pages = p->pages; > >> int i; > >> > >> packet->flags = cpu_to_be32(p->flags); > >> packet->pages_alloc = cpu_to_be32(p->pages->allocated); > >> - packet->normal_pages = cpu_to_be32(p->normal_num); > >> + packet->normal_pages = cpu_to_be32(pages->num); > >> packet->next_packet_size = cpu_to_be32(p->next_packet_size); > >> packet->packet_num = cpu_to_be64(p->packet_num); > >> > >> - if (p->pages->block) { > >> - strncpy(packet->ramblock, p->pages->block->idstr, 256); > >> + if (pages->block) { > >> + strncpy(packet->ramblock, pages->block->idstr, 256); > >> } > >> > >> - for (i = 0; i < p->normal_num; i++) { > >> + for (i = 0; i < pages->num; i++) { > >> /* there are architectures where ram_addr_t is 32 bit */ > >> - uint64_t temp = p->normal[i]; > >> + uint64_t temp = pages->offset[i]; > >> > >> packet->offset[i] = cpu_to_be64(temp); > >> } > >> @@ -570,8 +571,6 @@ void multifd_save_cleanup(void) > >> p->packet = NULL; > >> g_free(p->iov); > >> p->iov = NULL; > >> - g_free(p->normal); > >> - p->normal = NULL; > >> multifd_send_state->ops->send_cleanup(p, &local_err); > >> if (local_err) { > >> migrate_set_error(migrate_get_current(), local_err); > >> @@ -688,8 +687,8 @@ static void *multifd_send_thread(void *opaque) > >> > >> if (p->pending_job) { > >> uint64_t packet_num = p->packet_num; > >> + MultiFDPages_t *pages = p->pages; > >> uint32_t flags; > >> - p->normal_num = 0; > >> > >> if (use_zero_copy_send) { > >> p->iovs_num = 0; > >> @@ -697,12 +696,7 @@ static void *multifd_send_thread(void *opaque) > >> p->iovs_num = 1; > >> } > >> > >> - for (int i = 0; i < p->pages->num; i++) { > >> - p->normal[p->normal_num] = p->pages->offset[i]; > >> - p->normal_num++; > >> - } > >> - > >> - if (p->normal_num) { > >> + if (pages->num) { > >> ret = multifd_send_state->ops->send_prepare(p, &local_err); > >> if (ret != 0) { > >> qemu_mutex_unlock(&p->mutex); > >> @@ -713,10 +707,10 @@ static void *multifd_send_thread(void *opaque) > >> flags = p->flags; > >> p->flags = 0; > >> p->num_packets++; > >> - p->total_normal_pages += p->normal_num; > >> + p->total_normal_pages += pages->num; > >> qemu_mutex_unlock(&p->mutex); > >> > >> - trace_multifd_send(p->id, packet_num, p->normal_num, flags, > >> + trace_multifd_send(p->id, packet_num, pages->num, flags, > >> p->next_packet_size); > >> > >> if (use_zero_copy_send) { > >> @@ -924,7 +918,6 @@ int multifd_save_setup(Error **errp) > >> p->name = g_strdup_printf("multifdsend_%d", i); > >> /* We need one extra place for the packet header */ > >> p->iov = g_new0(struct iovec, page_count + 1); > >> - p->normal = g_new0(ram_addr_t, page_count); > >> p->page_size = qemu_target_page_size(); > >> p->page_count = page_count; > >> > >> -- > >> 2.43.0 > >>
Hao Xiang <hao.xiang@bytedance.com> writes: > On Fri, Feb 9, 2024 at 4:20 AM Fabiano Rosas <farosas@suse.de> wrote: >> >> Hao Xiang <hao.xiang@bytedance.com> writes: >> >> > On Fri, Feb 2, 2024 at 2:30 AM <peterx@redhat.com> wrote: >> >> >> >> From: Peter Xu <peterx@redhat.com> >> >> >> >> This array is redundant when p->pages exists. Now we extended the life of >> >> p->pages to the whole period where pending_job is set, it should be safe to >> >> always use p->pages->offset[] rather than p->normal[]. Drop the array. >> >> >> >> Alongside, the normal_num is also redundant, which is the same to >> >> p->pages->num. >> > >> > Can we not drop p->normal and p_normal_num? It is redundant now but I >> > think it will be needed for multifd zero page checking. In multifd >> > zero page, we find out all zero pages and we sort the normal pages and >> > zero pages in two seperate arrays. p->offset is the original array of >> > pages, p->normal will contain the array of normal pages and p->zero >> > will contain the array of zero pages. >> >> We're moving send_fill_packet into send_prepare(), so you should be able >> to do whatever data transformation at send_prepare() and add any fields >> you need into p->pages. >> >> If we keep p->normal we will not be able to switch into an opaque >> payload later on. There should be no mention of pages outside of >> hooks. This is long-term work, but let's avoid blocking it if possible. >> > > Got it. I will make the proper changes. > > Aside from that, I would like to get opinions from you guys regarding > zero page detection interface. > Here are the options I am thinking: > 1) Do zero page detection in send_prepare(). > This means no dedicated hook for zero_page_detection() otherwise we > will be calling a hook from inside a hook. But we will need a new > function multifd_zero_page_check_send() similar to how we use > multifd_send_fill_packet() now. multifd_zero_page_check_send() will > need to be called by all send_prepare() implementations. > 2) Do zero page detection in a new hook zero_page_detection(). > zero_page_detection will be called before send_prepare(). Seems like > extra complexity but I can go with that routine if you guys think it's > a cleaner way. > > I am leaning towards 1) right now. That's fine. Zero page detection is only needed for ram migration. Once we start using multifd to transfer generic device state, then there will be no zero page detection. So send_prepare() seems like a good place to put it. >> >> >> >> This doesn't apply to recv side, because there's no extra buffering on recv >> >> side, so p->normal[] array is still needed. >> >> >> >> Reviewed-by: Fabiano Rosas <farosas@suse.de> >> >> Signed-off-by: Peter Xu <peterx@redhat.com> >> >> --- >> >> migration/multifd.h | 4 ---- >> >> migration/multifd-zlib.c | 7 ++++--- >> >> migration/multifd-zstd.c | 7 ++++--- >> >> migration/multifd.c | 33 +++++++++++++-------------------- >> >> 4 files changed, 21 insertions(+), 30 deletions(-) >> >> >> >> diff --git a/migration/multifd.h b/migration/multifd.h >> >> index 7c040cb85a..3920bdbcf1 100644 >> >> --- a/migration/multifd.h >> >> +++ b/migration/multifd.h >> >> @@ -122,10 +122,6 @@ typedef struct { >> >> struct iovec *iov; >> >> /* number of iovs used */ >> >> uint32_t iovs_num; >> >> - /* Pages that are not zero */ >> >> - ram_addr_t *normal; >> >> - /* num of non zero pages */ >> >> - uint32_t normal_num; >> >> /* used for compression methods */ >> >> void *data; >> >> } MultiFDSendParams; >> >> diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c >> >> index 37ce48621e..100809abc1 100644 >> >> --- a/migration/multifd-zlib.c >> >> +++ b/migration/multifd-zlib.c >> >> @@ -116,17 +116,18 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) >> >> */ >> >> static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) >> >> { >> >> + MultiFDPages_t *pages = p->pages; >> >> struct zlib_data *z = p->data; >> >> z_stream *zs = &z->zs; >> >> uint32_t out_size = 0; >> >> int ret; >> >> uint32_t i; >> >> >> >> - for (i = 0; i < p->normal_num; i++) { >> >> + for (i = 0; i < pages->num; i++) { >> >> uint32_t available = z->zbuff_len - out_size; >> >> int flush = Z_NO_FLUSH; >> >> >> >> - if (i == p->normal_num - 1) { >> >> + if (i == pages->num - 1) { >> >> flush = Z_SYNC_FLUSH; >> >> } >> >> >> >> @@ -135,7 +136,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) >> >> * with compression. zlib does not guarantee that this is safe, >> >> * therefore copy the page before calling deflate(). >> >> */ >> >> - memcpy(z->buf, p->pages->block->host + p->normal[i], p->page_size); >> >> + memcpy(z->buf, p->pages->block->host + pages->offset[i], p->page_size); >> >> zs->avail_in = p->page_size; >> >> zs->next_in = z->buf; >> >> >> >> diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c >> >> index b471daadcd..2023edd8cc 100644 >> >> --- a/migration/multifd-zstd.c >> >> +++ b/migration/multifd-zstd.c >> >> @@ -113,6 +113,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) >> >> */ >> >> static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) >> >> { >> >> + MultiFDPages_t *pages = p->pages; >> >> struct zstd_data *z = p->data; >> >> int ret; >> >> uint32_t i; >> >> @@ -121,13 +122,13 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) >> >> z->out.size = z->zbuff_len; >> >> z->out.pos = 0; >> >> >> >> - for (i = 0; i < p->normal_num; i++) { >> >> + for (i = 0; i < pages->num; i++) { >> >> ZSTD_EndDirective flush = ZSTD_e_continue; >> >> >> >> - if (i == p->normal_num - 1) { >> >> + if (i == pages->num - 1) { >> >> flush = ZSTD_e_flush; >> >> } >> >> - z->in.src = p->pages->block->host + p->normal[i]; >> >> + z->in.src = p->pages->block->host + pages->offset[i]; >> >> z->in.size = p->page_size; >> >> z->in.pos = 0; >> >> >> >> diff --git a/migration/multifd.c b/migration/multifd.c >> >> index 5633ac245a..8bb1fd95cf 100644 >> >> --- a/migration/multifd.c >> >> +++ b/migration/multifd.c >> >> @@ -90,13 +90,13 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) >> >> { >> >> MultiFDPages_t *pages = p->pages; >> >> >> >> - for (int i = 0; i < p->normal_num; i++) { >> >> - p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i]; >> >> + for (int i = 0; i < pages->num; i++) { >> >> + p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; >> >> p->iov[p->iovs_num].iov_len = p->page_size; >> >> p->iovs_num++; >> >> } >> >> >> >> - p->next_packet_size = p->normal_num * p->page_size; >> >> + p->next_packet_size = pages->num * p->page_size; >> >> p->flags |= MULTIFD_FLAG_NOCOMP; >> >> return 0; >> >> } >> >> @@ -269,21 +269,22 @@ static void multifd_pages_clear(MultiFDPages_t *pages) >> >> static void multifd_send_fill_packet(MultiFDSendParams *p) >> >> { >> >> MultiFDPacket_t *packet = p->packet; >> >> + MultiFDPages_t *pages = p->pages; >> >> int i; >> >> >> >> packet->flags = cpu_to_be32(p->flags); >> >> packet->pages_alloc = cpu_to_be32(p->pages->allocated); >> >> - packet->normal_pages = cpu_to_be32(p->normal_num); >> >> + packet->normal_pages = cpu_to_be32(pages->num); >> >> packet->next_packet_size = cpu_to_be32(p->next_packet_size); >> >> packet->packet_num = cpu_to_be64(p->packet_num); >> >> >> >> - if (p->pages->block) { >> >> - strncpy(packet->ramblock, p->pages->block->idstr, 256); >> >> + if (pages->block) { >> >> + strncpy(packet->ramblock, pages->block->idstr, 256); >> >> } >> >> >> >> - for (i = 0; i < p->normal_num; i++) { >> >> + for (i = 0; i < pages->num; i++) { >> >> /* there are architectures where ram_addr_t is 32 bit */ >> >> - uint64_t temp = p->normal[i]; >> >> + uint64_t temp = pages->offset[i]; >> >> >> >> packet->offset[i] = cpu_to_be64(temp); >> >> } >> >> @@ -570,8 +571,6 @@ void multifd_save_cleanup(void) >> >> p->packet = NULL; >> >> g_free(p->iov); >> >> p->iov = NULL; >> >> - g_free(p->normal); >> >> - p->normal = NULL; >> >> multifd_send_state->ops->send_cleanup(p, &local_err); >> >> if (local_err) { >> >> migrate_set_error(migrate_get_current(), local_err); >> >> @@ -688,8 +687,8 @@ static void *multifd_send_thread(void *opaque) >> >> >> >> if (p->pending_job) { >> >> uint64_t packet_num = p->packet_num; >> >> + MultiFDPages_t *pages = p->pages; >> >> uint32_t flags; >> >> - p->normal_num = 0; >> >> >> >> if (use_zero_copy_send) { >> >> p->iovs_num = 0; >> >> @@ -697,12 +696,7 @@ static void *multifd_send_thread(void *opaque) >> >> p->iovs_num = 1; >> >> } >> >> >> >> - for (int i = 0; i < p->pages->num; i++) { >> >> - p->normal[p->normal_num] = p->pages->offset[i]; >> >> - p->normal_num++; >> >> - } >> >> - >> >> - if (p->normal_num) { >> >> + if (pages->num) { >> >> ret = multifd_send_state->ops->send_prepare(p, &local_err); >> >> if (ret != 0) { >> >> qemu_mutex_unlock(&p->mutex); >> >> @@ -713,10 +707,10 @@ static void *multifd_send_thread(void *opaque) >> >> flags = p->flags; >> >> p->flags = 0; >> >> p->num_packets++; >> >> - p->total_normal_pages += p->normal_num; >> >> + p->total_normal_pages += pages->num; >> >> qemu_mutex_unlock(&p->mutex); >> >> >> >> - trace_multifd_send(p->id, packet_num, p->normal_num, flags, >> >> + trace_multifd_send(p->id, packet_num, pages->num, flags, >> >> p->next_packet_size); >> >> >> >> if (use_zero_copy_send) { >> >> @@ -924,7 +918,6 @@ int multifd_save_setup(Error **errp) >> >> p->name = g_strdup_printf("multifdsend_%d", i); >> >> /* We need one extra place for the packet header */ >> >> p->iov = g_new0(struct iovec, page_count + 1); >> >> - p->normal = g_new0(ram_addr_t, page_count); >> >> p->page_size = qemu_target_page_size(); >> >> p->page_count = page_count; >> >> >> >> -- >> >> 2.43.0 >> >>
diff --git a/migration/multifd.h b/migration/multifd.h index 7c040cb85a..3920bdbcf1 100644 --- a/migration/multifd.h +++ b/migration/multifd.h @@ -122,10 +122,6 @@ typedef struct { struct iovec *iov; /* number of iovs used */ uint32_t iovs_num; - /* Pages that are not zero */ - ram_addr_t *normal; - /* num of non zero pages */ - uint32_t normal_num; /* used for compression methods */ void *data; } MultiFDSendParams; diff --git a/migration/multifd-zlib.c b/migration/multifd-zlib.c index 37ce48621e..100809abc1 100644 --- a/migration/multifd-zlib.c +++ b/migration/multifd-zlib.c @@ -116,17 +116,18 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp) */ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) { + MultiFDPages_t *pages = p->pages; struct zlib_data *z = p->data; z_stream *zs = &z->zs; uint32_t out_size = 0; int ret; uint32_t i; - for (i = 0; i < p->normal_num; i++) { + for (i = 0; i < pages->num; i++) { uint32_t available = z->zbuff_len - out_size; int flush = Z_NO_FLUSH; - if (i == p->normal_num - 1) { + if (i == pages->num - 1) { flush = Z_SYNC_FLUSH; } @@ -135,7 +136,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp) * with compression. zlib does not guarantee that this is safe, * therefore copy the page before calling deflate(). */ - memcpy(z->buf, p->pages->block->host + p->normal[i], p->page_size); + memcpy(z->buf, p->pages->block->host + pages->offset[i], p->page_size); zs->avail_in = p->page_size; zs->next_in = z->buf; diff --git a/migration/multifd-zstd.c b/migration/multifd-zstd.c index b471daadcd..2023edd8cc 100644 --- a/migration/multifd-zstd.c +++ b/migration/multifd-zstd.c @@ -113,6 +113,7 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp) */ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) { + MultiFDPages_t *pages = p->pages; struct zstd_data *z = p->data; int ret; uint32_t i; @@ -121,13 +122,13 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp) z->out.size = z->zbuff_len; z->out.pos = 0; - for (i = 0; i < p->normal_num; i++) { + for (i = 0; i < pages->num; i++) { ZSTD_EndDirective flush = ZSTD_e_continue; - if (i == p->normal_num - 1) { + if (i == pages->num - 1) { flush = ZSTD_e_flush; } - z->in.src = p->pages->block->host + p->normal[i]; + z->in.src = p->pages->block->host + pages->offset[i]; z->in.size = p->page_size; z->in.pos = 0; diff --git a/migration/multifd.c b/migration/multifd.c index 5633ac245a..8bb1fd95cf 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -90,13 +90,13 @@ static int nocomp_send_prepare(MultiFDSendParams *p, Error **errp) { MultiFDPages_t *pages = p->pages; - for (int i = 0; i < p->normal_num; i++) { - p->iov[p->iovs_num].iov_base = pages->block->host + p->normal[i]; + for (int i = 0; i < pages->num; i++) { + p->iov[p->iovs_num].iov_base = pages->block->host + pages->offset[i]; p->iov[p->iovs_num].iov_len = p->page_size; p->iovs_num++; } - p->next_packet_size = p->normal_num * p->page_size; + p->next_packet_size = pages->num * p->page_size; p->flags |= MULTIFD_FLAG_NOCOMP; return 0; } @@ -269,21 +269,22 @@ static void multifd_pages_clear(MultiFDPages_t *pages) static void multifd_send_fill_packet(MultiFDSendParams *p) { MultiFDPacket_t *packet = p->packet; + MultiFDPages_t *pages = p->pages; int i; packet->flags = cpu_to_be32(p->flags); packet->pages_alloc = cpu_to_be32(p->pages->allocated); - packet->normal_pages = cpu_to_be32(p->normal_num); + packet->normal_pages = cpu_to_be32(pages->num); packet->next_packet_size = cpu_to_be32(p->next_packet_size); packet->packet_num = cpu_to_be64(p->packet_num); - if (p->pages->block) { - strncpy(packet->ramblock, p->pages->block->idstr, 256); + if (pages->block) { + strncpy(packet->ramblock, pages->block->idstr, 256); } - for (i = 0; i < p->normal_num; i++) { + for (i = 0; i < pages->num; i++) { /* there are architectures where ram_addr_t is 32 bit */ - uint64_t temp = p->normal[i]; + uint64_t temp = pages->offset[i]; packet->offset[i] = cpu_to_be64(temp); } @@ -570,8 +571,6 @@ void multifd_save_cleanup(void) p->packet = NULL; g_free(p->iov); p->iov = NULL; - g_free(p->normal); - p->normal = NULL; multifd_send_state->ops->send_cleanup(p, &local_err); if (local_err) { migrate_set_error(migrate_get_current(), local_err); @@ -688,8 +687,8 @@ static void *multifd_send_thread(void *opaque) if (p->pending_job) { uint64_t packet_num = p->packet_num; + MultiFDPages_t *pages = p->pages; uint32_t flags; - p->normal_num = 0; if (use_zero_copy_send) { p->iovs_num = 0; @@ -697,12 +696,7 @@ static void *multifd_send_thread(void *opaque) p->iovs_num = 1; } - for (int i = 0; i < p->pages->num; i++) { - p->normal[p->normal_num] = p->pages->offset[i]; - p->normal_num++; - } - - if (p->normal_num) { + if (pages->num) { ret = multifd_send_state->ops->send_prepare(p, &local_err); if (ret != 0) { qemu_mutex_unlock(&p->mutex); @@ -713,10 +707,10 @@ static void *multifd_send_thread(void *opaque) flags = p->flags; p->flags = 0; p->num_packets++; - p->total_normal_pages += p->normal_num; + p->total_normal_pages += pages->num; qemu_mutex_unlock(&p->mutex); - trace_multifd_send(p->id, packet_num, p->normal_num, flags, + trace_multifd_send(p->id, packet_num, pages->num, flags, p->next_packet_size); if (use_zero_copy_send) { @@ -924,7 +918,6 @@ int multifd_save_setup(Error **errp) p->name = g_strdup_printf("multifdsend_%d", i); /* We need one extra place for the packet header */ p->iov = g_new0(struct iovec, page_count + 1); - p->normal = g_new0(ram_addr_t, page_count); p->page_size = qemu_target_page_size(); p->page_count = page_count;