diff mbox

[v1] migration: change blocktime type to uint32_t

Message ID 1516898620-9486-2-git-send-email-a.perevalov@samsung.com (mailing list archive)
State New, archived
Headers show

Commit Message

Alexey Perevalov Jan. 25, 2018, 4:43 p.m. UTC
Initially int64_t was used, but on PowerPC architecture,
clang doesn't have atomic_*_8 function, so it produces
link time error.

QEMU is working with time as with 64bit value, but by
fact 32 bit is enough with CLOCK_REALTIME. In this case
blocktime will keep only 1200 hours time interval.

Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com>
---
 hmp.c                    |  4 ++--
 migration/postcopy-ram.c | 47 ++++++++++++++++++++++++++++++-----------------
 migration/trace-events   |  4 ++--
 qapi/migration.json      |  4 ++--
 4 files changed, 36 insertions(+), 23 deletions(-)

Comments

Philippe Mathieu-Daudé Jan. 25, 2018, 5:55 p.m. UTC | #1
Hi Alexey,

On Thu, Jan 25, 2018 at 1:43 PM, Alexey Perevalov
<a.perevalov@samsung.com> wrote:
> Initially int64_t was used, but on PowerPC architecture,
> clang doesn't have atomic_*_8 function, so it produces
> link time error.
>
> QEMU is working with time as with 64bit value, but by
> fact 32 bit is enough with CLOCK_REALTIME. In this case
> blocktime will keep only 1200 hours time interval.
>
> Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com>
> ---
>  hmp.c                    |  4 ++--
>  migration/postcopy-ram.c | 47 ++++++++++++++++++++++++++++++-----------------
>  migration/trace-events   |  4 ++--
>  qapi/migration.json      |  4 ++--
>  4 files changed, 36 insertions(+), 23 deletions(-)
>
> diff --git a/hmp.c b/hmp.c
> index c6bab53..3c376b3 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -265,7 +265,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
>      }
>
>      if (info->has_postcopy_blocktime) {
> -        monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
> +        monitor_printf(mon, "postcopy blocktime: %u\n",
>                         info->postcopy_blocktime);
>      }
>
> @@ -273,7 +273,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
>          Visitor *v;
>          char *str;
>          v = string_output_visitor_new(false, &str);
> -        visit_type_int64List(v, NULL, &info->postcopy_vcpu_blocktime, NULL);
> +        visit_type_uint32List(v, NULL, &info->postcopy_vcpu_blocktime, NULL);
>          visit_complete(v, &str);
>          monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
>          g_free(str);
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 7814da5..ce91de8 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -63,14 +63,14 @@ struct PostcopyDiscardState {
>
>  typedef struct PostcopyBlocktimeContext {
>      /* time when page fault initiated per vCPU */
> -    int64_t *page_fault_vcpu_time;
> +    uint32_t *page_fault_vcpu_time;
>      /* page address per vCPU */
>      uintptr_t *vcpu_addr;
> -    int64_t total_blocktime;
> +    uint32_t total_blocktime;
>      /* blocktime per vCPU */
> -    int64_t *vcpu_blocktime;
> +    uint32_t *vcpu_blocktime;
>      /* point in time when last page fault was initiated */
> -    int64_t last_begin;
> +    uint32_t last_begin;
>      /* number of vCPU are suspended */
>      int smp_cpus_down;
>
> @@ -99,22 +99,22 @@ static void migration_exit_cb(Notifier *n, void *data)
>  static struct PostcopyBlocktimeContext *blocktime_context_new(void)
>  {
>      PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
> -    ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
> +    ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
>      ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
> -    ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
> +    ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus);
>
>      ctx->exit_notifier.notify = migration_exit_cb;
>      qemu_add_exit_notifier(&ctx->exit_notifier);
>      return ctx;
>  }
>
> -static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
> +static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
>  {
> -    int64List *list = NULL, *entry = NULL;
> +    uint32List *list = NULL, *entry = NULL;
>      int i;
>
>      for (i = smp_cpus - 1; i >= 0; i--) {
> -        entry = g_new0(int64List, 1);
> +        entry = g_new0(uint32List, 1);
>          entry->value = ctx->vcpu_blocktime[i];
>          entry->next = list;
>          list = entry;
> @@ -145,7 +145,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info)
>      info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc);
>  }
>
> -static uint64_t get_postcopy_total_blocktime(void)
> +static uint32_t get_postcopy_total_blocktime(void)
>  {
>      MigrationIncomingState *mis = migration_incoming_get_current();
>      PostcopyBlocktimeContext *bc = mis->blocktime_ctx;
> @@ -619,6 +619,16 @@ static int get_mem_fault_cpu_index(uint32_t pid)
>      return -1;
>  }
>
> +static uint32_t get_least_significant_part(int64_t value)
> +{
> +    unsigned char *t = (unsigned char *)&value;
> +#if defined(HOST_WORDS_BIGENDIAN)
> +    return t[4] << 24 | t[5] << 16 | t[6] << 8 | t[7] << 0;
> +#else
> +    return t[0] << 0 | t[1] << 8 | t[2] << 16 | t[3] << 24;
> +#endif /* HOST_WORDS_BIGENDIAN */

Isn't it the same than:

         return ldq_he_p(&value); /* silently casted to uint32_t */

> +}
> +
>  /*
>   * This function is being called when pagefault occurs. It
>   * tracks down vCPU blocking time.
> @@ -634,6 +644,7 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
>      MigrationIncomingState *mis = migration_incoming_get_current();
>      PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
>      int64_t now_ms;
> +    uint32_t least_now;
>
>      if (!dc || ptid == 0) {
>          return;
> @@ -644,13 +655,14 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
>      }
>
>      now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> +    least_now = get_least_significant_part(now_ms);
>      if (dc->vcpu_addr[cpu] == 0) {
>          atomic_inc(&dc->smp_cpus_down);
>      }
>
> -    atomic_xchg__nocheck(&dc->last_begin, now_ms);
> -    atomic_xchg__nocheck(&dc->page_fault_vcpu_time[cpu], now_ms);
> -    atomic_xchg__nocheck(&dc->vcpu_addr[cpu], addr);
> +    atomic_xchg(&dc->last_begin, least_now);
> +    atomic_xchg(&dc->page_fault_vcpu_time[cpu], least_now);
> +    atomic_xchg(&dc->vcpu_addr[cpu], addr);
>
>      /* check it here, not at the begining of the function,
>       * due to, check could accur early than bitmap_set in
> @@ -699,20 +711,21 @@ static void mark_postcopy_blocktime_end(uintptr_t addr)
>      int i, affected_cpu = 0;
>      int64_t now_ms;
>      bool vcpu_total_blocktime = false;
> -    int64_t read_vcpu_time;
> +    uint32_t read_vcpu_time, least_now;
>
>      if (!dc) {
>          return;
>      }
>
>      now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> +    least_now = get_least_significant_part(now_ms);
>
>      /* lookup cpu, to clear it,
>       * that algorithm looks straighforward, but it's not
>       * optimal, more optimal algorithm is keeping tree or hash
>       * where key is address value is a list of  */
>      for (i = 0; i < smp_cpus; i++) {
> -        uint64_t vcpu_blocktime = 0;
> +        uint32_t vcpu_blocktime = 0;
>
>          read_vcpu_time = atomic_fetch_add(&dc->page_fault_vcpu_time[i], 0);
>          if (atomic_fetch_add(&dc->vcpu_addr[i], 0) != addr ||
> @@ -720,7 +733,7 @@ static void mark_postcopy_blocktime_end(uintptr_t addr)
>              continue;
>          }
>          atomic_xchg__nocheck(&dc->vcpu_addr[i], 0);
> -        vcpu_blocktime = now_ms - read_vcpu_time;
> +        vcpu_blocktime = least_now - read_vcpu_time;
>          affected_cpu += 1;
>          /* we need to know is that mark_postcopy_end was due to
>           * faulted page, another possible case it's prefetched
> @@ -735,7 +748,7 @@ static void mark_postcopy_blocktime_end(uintptr_t addr)
>
>      atomic_sub(&dc->smp_cpus_down, affected_cpu);
>      if (vcpu_total_blocktime) {
> -        dc->total_blocktime += now_ms - atomic_fetch_add(&dc->last_begin, 0);
> +        dc->total_blocktime += least_now - atomic_fetch_add(&dc->last_begin, 0);
>      }
>      trace_mark_postcopy_blocktime_end(addr, dc, dc->total_blocktime,
>                                        affected_cpu);
> diff --git a/migration/trace-events b/migration/trace-events
> index 141e773..0defbc3 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -115,8 +115,8 @@ process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d"
>  process_incoming_migration_co_postcopy_end_main(void) ""
>  migration_set_incoming_channel(void *ioc, const char *ioctype) "ioc=%p ioctype=%s"
>  migration_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostname)  "ioc=%p ioctype=%s hostname=%s"
> -mark_postcopy_blocktime_begin(uint64_t addr, void *dd, int64_t time, int cpu, int received) "addr: 0x%" PRIx64 ", dd: %p, time: %" PRId64 ", cpu: %d, already_received: %d"
> -mark_postcopy_blocktime_end(uint64_t addr, void *dd, int64_t time, int affected_cpu) "addr: 0x%" PRIx64 ", dd: %p, time: %" PRId64 ", affected_cpu: %d"
> +mark_postcopy_blocktime_begin(uint64_t addr, void *dd, uint32_t time, int cpu, int received) "addr: 0x%" PRIx64 ", dd: %p, time: %u, cpu: %d, already_received: %d"
> +mark_postcopy_blocktime_end(uint64_t addr, void *dd, uint32_t time, int affected_cpu) "addr: 0x%" PRIx64 ", dd: %p, time: %u, affected_cpu: %d"
>
>  # migration/rdma.c
>  qemu_rdma_accept_incoming_migration(void) ""
> diff --git a/qapi/migration.json b/qapi/migration.json
> index 70e7b67..ee55d7c 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -175,8 +175,8 @@
>             '*setup-time': 'int',
>             '*cpu-throttle-percentage': 'int',
>             '*error-desc': 'str',
> -           '*postcopy-blocktime' : 'int64',
> -           '*postcopy-vcpu-blocktime': ['int64']} }
> +           '*postcopy-blocktime' : 'uint32',
> +           '*postcopy-vcpu-blocktime': ['uint32']} }
>
>  ##
>  # @query-migrate:
> --
> 2.7.4
>
Dr. David Alan Gilbert Jan. 25, 2018, 8:02 p.m. UTC | #2
* Alexey Perevalov (a.perevalov@samsung.com) wrote:
> Initially int64_t was used, but on PowerPC architecture,
> clang doesn't have atomic_*_8 function, so it produces
> link time error.
> 
> QEMU is working with time as with 64bit value, but by
> fact 32 bit is enough with CLOCK_REALTIME. In this case
> blocktime will keep only 1200 hours time interval.
> 
> Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com>
> ---
>  hmp.c                    |  4 ++--
>  migration/postcopy-ram.c | 47 ++++++++++++++++++++++++++++++-----------------
>  migration/trace-events   |  4 ++--
>  qapi/migration.json      |  4 ++--
>  4 files changed, 36 insertions(+), 23 deletions(-)
> 
> diff --git a/hmp.c b/hmp.c
> index c6bab53..3c376b3 100644
> --- a/hmp.c
> +++ b/hmp.c
> @@ -265,7 +265,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
>      }
>  
>      if (info->has_postcopy_blocktime) {
> -        monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
> +        monitor_printf(mon, "postcopy blocktime: %u\n",
>                         info->postcopy_blocktime);
>      }
>  
> @@ -273,7 +273,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
>          Visitor *v;
>          char *str;
>          v = string_output_visitor_new(false, &str);
> -        visit_type_int64List(v, NULL, &info->postcopy_vcpu_blocktime, NULL);
> +        visit_type_uint32List(v, NULL, &info->postcopy_vcpu_blocktime, NULL);
>          visit_complete(v, &str);
>          monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
>          g_free(str);
> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
> index 7814da5..ce91de8 100644
> --- a/migration/postcopy-ram.c
> +++ b/migration/postcopy-ram.c
> @@ -63,14 +63,14 @@ struct PostcopyDiscardState {
>  
>  typedef struct PostcopyBlocktimeContext {
>      /* time when page fault initiated per vCPU */
> -    int64_t *page_fault_vcpu_time;
> +    uint32_t *page_fault_vcpu_time;
>      /* page address per vCPU */
>      uintptr_t *vcpu_addr;
> -    int64_t total_blocktime;
> +    uint32_t total_blocktime;
>      /* blocktime per vCPU */
> -    int64_t *vcpu_blocktime;
> +    uint32_t *vcpu_blocktime;
>      /* point in time when last page fault was initiated */
> -    int64_t last_begin;
> +    uint32_t last_begin;
>      /* number of vCPU are suspended */
>      int smp_cpus_down;
>  
> @@ -99,22 +99,22 @@ static void migration_exit_cb(Notifier *n, void *data)
>  static struct PostcopyBlocktimeContext *blocktime_context_new(void)
>  {
>      PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
> -    ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
> +    ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
>      ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
> -    ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
> +    ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus);
>  
>      ctx->exit_notifier.notify = migration_exit_cb;
>      qemu_add_exit_notifier(&ctx->exit_notifier);
>      return ctx;
>  }
>  
> -static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
> +static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
>  {
> -    int64List *list = NULL, *entry = NULL;
> +    uint32List *list = NULL, *entry = NULL;
>      int i;
>  
>      for (i = smp_cpus - 1; i >= 0; i--) {
> -        entry = g_new0(int64List, 1);
> +        entry = g_new0(uint32List, 1);
>          entry->value = ctx->vcpu_blocktime[i];
>          entry->next = list;
>          list = entry;
> @@ -145,7 +145,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info)
>      info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc);
>  }
>  
> -static uint64_t get_postcopy_total_blocktime(void)
> +static uint32_t get_postcopy_total_blocktime(void)
>  {
>      MigrationIncomingState *mis = migration_incoming_get_current();
>      PostcopyBlocktimeContext *bc = mis->blocktime_ctx;
> @@ -619,6 +619,16 @@ static int get_mem_fault_cpu_index(uint32_t pid)
>      return -1;
>  }
>  
> +static uint32_t get_least_significant_part(int64_t value)
> +{
> +    unsigned char *t = (unsigned char *)&value;
> +#if defined(HOST_WORDS_BIGENDIAN)
> +    return t[4] << 24 | t[5] << 16 | t[6] << 8 | t[7] << 0;
> +#else
> +    return t[0] << 0 | t[1] << 8 | t[2] << 16 | t[3] << 24;
> +#endif /* HOST_WORDS_BIGENDIAN */
> +}

This doesn't feel right.
Firstly, we're doing a check for the magic value of read_vcpu_time==0 in
mark_postcopy_blocktime_end - so we have to be careful not to hit it.
Just masking the bottom 32bits of time means we've got a (rare) chance
of hitting that; but we've got a much less rare change of hitting
the case where one of the measurements happens after the roll-over
of the bottom 32bits.
If you stored a time at the start of the postcopy and just
subtracted that from 'now' you're probably OK though.

Anding a 64bit value with 0xffffffff and casting is probably much
easier than shifting etc.

Dave


>  /*
>   * This function is being called when pagefault occurs. It
>   * tracks down vCPU blocking time.
> @@ -634,6 +644,7 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
>      MigrationIncomingState *mis = migration_incoming_get_current();
>      PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
>      int64_t now_ms;
> +    uint32_t least_now;
>  
>      if (!dc || ptid == 0) {
>          return;
> @@ -644,13 +655,14 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
>      }
>  
>      now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> +    least_now = get_least_significant_part(now_ms);
>      if (dc->vcpu_addr[cpu] == 0) {
>          atomic_inc(&dc->smp_cpus_down);
>      }
>  
> -    atomic_xchg__nocheck(&dc->last_begin, now_ms);
> -    atomic_xchg__nocheck(&dc->page_fault_vcpu_time[cpu], now_ms);
> -    atomic_xchg__nocheck(&dc->vcpu_addr[cpu], addr);
> +    atomic_xchg(&dc->last_begin, least_now);
> +    atomic_xchg(&dc->page_fault_vcpu_time[cpu], least_now);
> +    atomic_xchg(&dc->vcpu_addr[cpu], addr);
>  
>      /* check it here, not at the begining of the function,
>       * due to, check could accur early than bitmap_set in
> @@ -699,20 +711,21 @@ static void mark_postcopy_blocktime_end(uintptr_t addr)
>      int i, affected_cpu = 0;
>      int64_t now_ms;
>      bool vcpu_total_blocktime = false;
> -    int64_t read_vcpu_time;
> +    uint32_t read_vcpu_time, least_now;
>  
>      if (!dc) {
>          return;
>      }
>  
>      now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
> +    least_now = get_least_significant_part(now_ms);
>  
>      /* lookup cpu, to clear it,
>       * that algorithm looks straighforward, but it's not
>       * optimal, more optimal algorithm is keeping tree or hash
>       * where key is address value is a list of  */
>      for (i = 0; i < smp_cpus; i++) {
> -        uint64_t vcpu_blocktime = 0;
> +        uint32_t vcpu_blocktime = 0;
>  
>          read_vcpu_time = atomic_fetch_add(&dc->page_fault_vcpu_time[i], 0);
>          if (atomic_fetch_add(&dc->vcpu_addr[i], 0) != addr ||
> @@ -720,7 +733,7 @@ static void mark_postcopy_blocktime_end(uintptr_t addr)
>              continue;
>          }
>          atomic_xchg__nocheck(&dc->vcpu_addr[i], 0);
> -        vcpu_blocktime = now_ms - read_vcpu_time;
> +        vcpu_blocktime = least_now - read_vcpu_time;
>          affected_cpu += 1;
>          /* we need to know is that mark_postcopy_end was due to
>           * faulted page, another possible case it's prefetched
> @@ -735,7 +748,7 @@ static void mark_postcopy_blocktime_end(uintptr_t addr)
>  
>      atomic_sub(&dc->smp_cpus_down, affected_cpu);
>      if (vcpu_total_blocktime) {
> -        dc->total_blocktime += now_ms - atomic_fetch_add(&dc->last_begin, 0);
> +        dc->total_blocktime += least_now - atomic_fetch_add(&dc->last_begin, 0);
>      }
>      trace_mark_postcopy_blocktime_end(addr, dc, dc->total_blocktime,
>                                        affected_cpu);
> diff --git a/migration/trace-events b/migration/trace-events
> index 141e773..0defbc3 100644
> --- a/migration/trace-events
> +++ b/migration/trace-events
> @@ -115,8 +115,8 @@ process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d"
>  process_incoming_migration_co_postcopy_end_main(void) ""
>  migration_set_incoming_channel(void *ioc, const char *ioctype) "ioc=%p ioctype=%s"
>  migration_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostname)  "ioc=%p ioctype=%s hostname=%s"
> -mark_postcopy_blocktime_begin(uint64_t addr, void *dd, int64_t time, int cpu, int received) "addr: 0x%" PRIx64 ", dd: %p, time: %" PRId64 ", cpu: %d, already_received: %d"
> -mark_postcopy_blocktime_end(uint64_t addr, void *dd, int64_t time, int affected_cpu) "addr: 0x%" PRIx64 ", dd: %p, time: %" PRId64 ", affected_cpu: %d"
> +mark_postcopy_blocktime_begin(uint64_t addr, void *dd, uint32_t time, int cpu, int received) "addr: 0x%" PRIx64 ", dd: %p, time: %u, cpu: %d, already_received: %d"
> +mark_postcopy_blocktime_end(uint64_t addr, void *dd, uint32_t time, int affected_cpu) "addr: 0x%" PRIx64 ", dd: %p, time: %u, affected_cpu: %d"
>  
>  # migration/rdma.c
>  qemu_rdma_accept_incoming_migration(void) ""
> diff --git a/qapi/migration.json b/qapi/migration.json
> index 70e7b67..ee55d7c 100644
> --- a/qapi/migration.json
> +++ b/qapi/migration.json
> @@ -175,8 +175,8 @@
>             '*setup-time': 'int',
>             '*cpu-throttle-percentage': 'int',
>             '*error-desc': 'str',
> -           '*postcopy-blocktime' : 'int64',
> -           '*postcopy-vcpu-blocktime': ['int64']} }
> +           '*postcopy-blocktime' : 'uint32',
> +           '*postcopy-vcpu-blocktime': ['uint32']} }
>  
>  ##
>  # @query-migrate:
> -- 
> 2.7.4
> 
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Eric Blake Jan. 25, 2018, 8:37 p.m. UTC | #3
On 01/25/2018 10:43 AM, Alexey Perevalov wrote:
> Initially int64_t was used, but on PowerPC architecture,
> clang doesn't have atomic_*_8 function, so it produces
> link time error.
> 
> QEMU is working with time as with 64bit value, but by
> fact 32 bit is enough with CLOCK_REALTIME. In this case
> blocktime will keep only 1200 hours time interval.
> 
> Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com>
> ---

> +++ b/qapi/migration.json
> @@ -175,8 +175,8 @@
>             '*setup-time': 'int',
>             '*cpu-throttle-percentage': 'int',
>             '*error-desc': 'str',
> -           '*postcopy-blocktime' : 'int64',
> -           '*postcopy-vcpu-blocktime': ['int64']} }
> +           '*postcopy-blocktime' : 'uint32',
> +           '*postcopy-vcpu-blocktime': ['uint32']} }

Questionable to change types if this had been released already - but
these fields are marked 'since 2.12' so you can change them at will up
until the release.

Acked-by: Eric Blake <eblake@redhat.com>
Alexey Perevalov Jan. 29, 2018, 7:47 a.m. UTC | #4
On 01/25/2018 11:02 PM, Dr. David Alan Gilbert wrote:
> * Alexey Perevalov (a.perevalov@samsung.com) wrote:
>> Initially int64_t was used, but on PowerPC architecture,
>> clang doesn't have atomic_*_8 function, so it produces
>> link time error.
>>
>> QEMU is working with time as with 64bit value, but by
>> fact 32 bit is enough with CLOCK_REALTIME. In this case
>> blocktime will keep only 1200 hours time interval.
>>
>> Signed-off-by: Alexey Perevalov <a.perevalov@samsung.com>
>> ---
>>   hmp.c                    |  4 ++--
>>   migration/postcopy-ram.c | 47 ++++++++++++++++++++++++++++++-----------------
>>   migration/trace-events   |  4 ++--
>>   qapi/migration.json      |  4 ++--
>>   4 files changed, 36 insertions(+), 23 deletions(-)
>>
>> diff --git a/hmp.c b/hmp.c
>> index c6bab53..3c376b3 100644
>> --- a/hmp.c
>> +++ b/hmp.c
>> @@ -265,7 +265,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
>>       }
>>   
>>       if (info->has_postcopy_blocktime) {
>> -        monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
>> +        monitor_printf(mon, "postcopy blocktime: %u\n",
>>                          info->postcopy_blocktime);
>>       }
>>   
>> @@ -273,7 +273,7 @@ void hmp_info_migrate(Monitor *mon, const QDict *qdict)
>>           Visitor *v;
>>           char *str;
>>           v = string_output_visitor_new(false, &str);
>> -        visit_type_int64List(v, NULL, &info->postcopy_vcpu_blocktime, NULL);
>> +        visit_type_uint32List(v, NULL, &info->postcopy_vcpu_blocktime, NULL);
>>           visit_complete(v, &str);
>>           monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
>>           g_free(str);
>> diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
>> index 7814da5..ce91de8 100644
>> --- a/migration/postcopy-ram.c
>> +++ b/migration/postcopy-ram.c
>> @@ -63,14 +63,14 @@ struct PostcopyDiscardState {
>>   
>>   typedef struct PostcopyBlocktimeContext {
>>       /* time when page fault initiated per vCPU */
>> -    int64_t *page_fault_vcpu_time;
>> +    uint32_t *page_fault_vcpu_time;
>>       /* page address per vCPU */
>>       uintptr_t *vcpu_addr;
>> -    int64_t total_blocktime;
>> +    uint32_t total_blocktime;
>>       /* blocktime per vCPU */
>> -    int64_t *vcpu_blocktime;
>> +    uint32_t *vcpu_blocktime;
>>       /* point in time when last page fault was initiated */
>> -    int64_t last_begin;
>> +    uint32_t last_begin;
>>       /* number of vCPU are suspended */
>>       int smp_cpus_down;
>>   
>> @@ -99,22 +99,22 @@ static void migration_exit_cb(Notifier *n, void *data)
>>   static struct PostcopyBlocktimeContext *blocktime_context_new(void)
>>   {
>>       PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
>> -    ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
>> +    ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
>>       ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
>> -    ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
>> +    ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus);
>>   
>>       ctx->exit_notifier.notify = migration_exit_cb;
>>       qemu_add_exit_notifier(&ctx->exit_notifier);
>>       return ctx;
>>   }
>>   
>> -static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
>> +static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
>>   {
>> -    int64List *list = NULL, *entry = NULL;
>> +    uint32List *list = NULL, *entry = NULL;
>>       int i;
>>   
>>       for (i = smp_cpus - 1; i >= 0; i--) {
>> -        entry = g_new0(int64List, 1);
>> +        entry = g_new0(uint32List, 1);
>>           entry->value = ctx->vcpu_blocktime[i];
>>           entry->next = list;
>>           list = entry;
>> @@ -145,7 +145,7 @@ void fill_destination_postcopy_migration_info(MigrationInfo *info)
>>       info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc);
>>   }
>>   
>> -static uint64_t get_postcopy_total_blocktime(void)
>> +static uint32_t get_postcopy_total_blocktime(void)
>>   {
>>       MigrationIncomingState *mis = migration_incoming_get_current();
>>       PostcopyBlocktimeContext *bc = mis->blocktime_ctx;
>> @@ -619,6 +619,16 @@ static int get_mem_fault_cpu_index(uint32_t pid)
>>       return -1;
>>   }
>>   
>> +static uint32_t get_least_significant_part(int64_t value)
>> +{
>> +    unsigned char *t = (unsigned char *)&value;
>> +#if defined(HOST_WORDS_BIGENDIAN)
>> +    return t[4] << 24 | t[5] << 16 | t[6] << 8 | t[7] << 0;
>> +#else
>> +    return t[0] << 0 | t[1] << 8 | t[2] << 16 | t[3] << 24;
>> +#endif /* HOST_WORDS_BIGENDIAN */
>> +}
> This doesn't feel right.
> Firstly, we're doing a check for the magic value of read_vcpu_time==0 in
> mark_postcopy_blocktime_end - so we have to be careful not to hit it.
> Just masking the bottom 32bits of time means we've got a (rare) chance
> of hitting that; but we've got a much less rare change of hitting
> the case where one of the measurements happens after the roll-over
> of the bottom 32bits.
> If you stored a time at the start of the postcopy and just
> subtracted that from 'now' you're probably OK though.
Here not so clearly for me.
I though we get some "now" and it doesn't matter how,
anding or shifting or qemu's internal API returns already
32bit value, frankly saying I was wonder why qemu_clock_get_ms
have to return 64 bit value, when only one part is filling even on 64bit
machine, maybe it was done for the sake of API unification.
 From my point of view, code should work as before.


>
> Anding a 64bit value with 0xffffffff and casting is probably much
> easier than shifting etc.
It's right point, I just missed it in standard, I thought it compiler 
implementation-defined.

>
> Dave
>
>
>>   /*
>>    * This function is being called when pagefault occurs. It
>>    * tracks down vCPU blocking time.
>> @@ -634,6 +644,7 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
>>       MigrationIncomingState *mis = migration_incoming_get_current();
>>       PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
>>       int64_t now_ms;
>> +    uint32_t least_now;
>>   
>>       if (!dc || ptid == 0) {
>>           return;
>> @@ -644,13 +655,14 @@ static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
>>       }
>>   
>>       now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
>> +    least_now = get_least_significant_part(now_ms);
>>       if (dc->vcpu_addr[cpu] == 0) {
>>           atomic_inc(&dc->smp_cpus_down);
>>       }
>>   
>> -    atomic_xchg__nocheck(&dc->last_begin, now_ms);
>> -    atomic_xchg__nocheck(&dc->page_fault_vcpu_time[cpu], now_ms);
>> -    atomic_xchg__nocheck(&dc->vcpu_addr[cpu], addr);
>> +    atomic_xchg(&dc->last_begin, least_now);
>> +    atomic_xchg(&dc->page_fault_vcpu_time[cpu], least_now);
>> +    atomic_xchg(&dc->vcpu_addr[cpu], addr);
>>   
>>       /* check it here, not at the begining of the function,
>>        * due to, check could accur early than bitmap_set in
>> @@ -699,20 +711,21 @@ static void mark_postcopy_blocktime_end(uintptr_t addr)
>>       int i, affected_cpu = 0;
>>       int64_t now_ms;
>>       bool vcpu_total_blocktime = false;
>> -    int64_t read_vcpu_time;
>> +    uint32_t read_vcpu_time, least_now;
>>   
>>       if (!dc) {
>>           return;
>>       }
>>   
>>       now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
>> +    least_now = get_least_significant_part(now_ms);
>>   
>>       /* lookup cpu, to clear it,
>>        * that algorithm looks straighforward, but it's not
>>        * optimal, more optimal algorithm is keeping tree or hash
>>        * where key is address value is a list of  */
>>       for (i = 0; i < smp_cpus; i++) {
>> -        uint64_t vcpu_blocktime = 0;
>> +        uint32_t vcpu_blocktime = 0;
>>   
>>           read_vcpu_time = atomic_fetch_add(&dc->page_fault_vcpu_time[i], 0);
>>           if (atomic_fetch_add(&dc->vcpu_addr[i], 0) != addr ||
>> @@ -720,7 +733,7 @@ static void mark_postcopy_blocktime_end(uintptr_t addr)
>>               continue;
>>           }
>>           atomic_xchg__nocheck(&dc->vcpu_addr[i], 0);
>> -        vcpu_blocktime = now_ms - read_vcpu_time;
>> +        vcpu_blocktime = least_now - read_vcpu_time;
>>           affected_cpu += 1;
>>           /* we need to know is that mark_postcopy_end was due to
>>            * faulted page, another possible case it's prefetched
>> @@ -735,7 +748,7 @@ static void mark_postcopy_blocktime_end(uintptr_t addr)
>>   
>>       atomic_sub(&dc->smp_cpus_down, affected_cpu);
>>       if (vcpu_total_blocktime) {
>> -        dc->total_blocktime += now_ms - atomic_fetch_add(&dc->last_begin, 0);
>> +        dc->total_blocktime += least_now - atomic_fetch_add(&dc->last_begin, 0);
>>       }
>>       trace_mark_postcopy_blocktime_end(addr, dc, dc->total_blocktime,
>>                                         affected_cpu);
>> diff --git a/migration/trace-events b/migration/trace-events
>> index 141e773..0defbc3 100644
>> --- a/migration/trace-events
>> +++ b/migration/trace-events
>> @@ -115,8 +115,8 @@ process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d"
>>   process_incoming_migration_co_postcopy_end_main(void) ""
>>   migration_set_incoming_channel(void *ioc, const char *ioctype) "ioc=%p ioctype=%s"
>>   migration_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostname)  "ioc=%p ioctype=%s hostname=%s"
>> -mark_postcopy_blocktime_begin(uint64_t addr, void *dd, int64_t time, int cpu, int received) "addr: 0x%" PRIx64 ", dd: %p, time: %" PRId64 ", cpu: %d, already_received: %d"
>> -mark_postcopy_blocktime_end(uint64_t addr, void *dd, int64_t time, int affected_cpu) "addr: 0x%" PRIx64 ", dd: %p, time: %" PRId64 ", affected_cpu: %d"
>> +mark_postcopy_blocktime_begin(uint64_t addr, void *dd, uint32_t time, int cpu, int received) "addr: 0x%" PRIx64 ", dd: %p, time: %u, cpu: %d, already_received: %d"
>> +mark_postcopy_blocktime_end(uint64_t addr, void *dd, uint32_t time, int affected_cpu) "addr: 0x%" PRIx64 ", dd: %p, time: %u, affected_cpu: %d"
>>   
>>   # migration/rdma.c
>>   qemu_rdma_accept_incoming_migration(void) ""
>> diff --git a/qapi/migration.json b/qapi/migration.json
>> index 70e7b67..ee55d7c 100644
>> --- a/qapi/migration.json
>> +++ b/qapi/migration.json
>> @@ -175,8 +175,8 @@
>>              '*setup-time': 'int',
>>              '*cpu-throttle-percentage': 'int',
>>              '*error-desc': 'str',
>> -           '*postcopy-blocktime' : 'int64',
>> -           '*postcopy-vcpu-blocktime': ['int64']} }
>> +           '*postcopy-blocktime' : 'uint32',
>> +           '*postcopy-vcpu-blocktime': ['uint32']} }
>>   
>>   ##
>>   # @query-migrate:
>> -- 
>> 2.7.4
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
>
>
diff mbox

Patch

diff --git a/hmp.c b/hmp.c
index c6bab53..3c376b3 100644
--- a/hmp.c
+++ b/hmp.c
@@ -265,7 +265,7 @@  void hmp_info_migrate(Monitor *mon, const QDict *qdict)
     }
 
     if (info->has_postcopy_blocktime) {
-        monitor_printf(mon, "postcopy blocktime: %" PRId64 "\n",
+        monitor_printf(mon, "postcopy blocktime: %u\n",
                        info->postcopy_blocktime);
     }
 
@@ -273,7 +273,7 @@  void hmp_info_migrate(Monitor *mon, const QDict *qdict)
         Visitor *v;
         char *str;
         v = string_output_visitor_new(false, &str);
-        visit_type_int64List(v, NULL, &info->postcopy_vcpu_blocktime, NULL);
+        visit_type_uint32List(v, NULL, &info->postcopy_vcpu_blocktime, NULL);
         visit_complete(v, &str);
         monitor_printf(mon, "postcopy vcpu blocktime: %s\n", str);
         g_free(str);
diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c
index 7814da5..ce91de8 100644
--- a/migration/postcopy-ram.c
+++ b/migration/postcopy-ram.c
@@ -63,14 +63,14 @@  struct PostcopyDiscardState {
 
 typedef struct PostcopyBlocktimeContext {
     /* time when page fault initiated per vCPU */
-    int64_t *page_fault_vcpu_time;
+    uint32_t *page_fault_vcpu_time;
     /* page address per vCPU */
     uintptr_t *vcpu_addr;
-    int64_t total_blocktime;
+    uint32_t total_blocktime;
     /* blocktime per vCPU */
-    int64_t *vcpu_blocktime;
+    uint32_t *vcpu_blocktime;
     /* point in time when last page fault was initiated */
-    int64_t last_begin;
+    uint32_t last_begin;
     /* number of vCPU are suspended */
     int smp_cpus_down;
 
@@ -99,22 +99,22 @@  static void migration_exit_cb(Notifier *n, void *data)
 static struct PostcopyBlocktimeContext *blocktime_context_new(void)
 {
     PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
-    ctx->page_fault_vcpu_time = g_new0(int64_t, smp_cpus);
+    ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
     ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
-    ctx->vcpu_blocktime = g_new0(int64_t, smp_cpus);
+    ctx->vcpu_blocktime = g_new0(uint32_t, smp_cpus);
 
     ctx->exit_notifier.notify = migration_exit_cb;
     qemu_add_exit_notifier(&ctx->exit_notifier);
     return ctx;
 }
 
-static int64List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
+static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
 {
-    int64List *list = NULL, *entry = NULL;
+    uint32List *list = NULL, *entry = NULL;
     int i;
 
     for (i = smp_cpus - 1; i >= 0; i--) {
-        entry = g_new0(int64List, 1);
+        entry = g_new0(uint32List, 1);
         entry->value = ctx->vcpu_blocktime[i];
         entry->next = list;
         list = entry;
@@ -145,7 +145,7 @@  void fill_destination_postcopy_migration_info(MigrationInfo *info)
     info->postcopy_vcpu_blocktime = get_vcpu_blocktime_list(bc);
 }
 
-static uint64_t get_postcopy_total_blocktime(void)
+static uint32_t get_postcopy_total_blocktime(void)
 {
     MigrationIncomingState *mis = migration_incoming_get_current();
     PostcopyBlocktimeContext *bc = mis->blocktime_ctx;
@@ -619,6 +619,16 @@  static int get_mem_fault_cpu_index(uint32_t pid)
     return -1;
 }
 
+static uint32_t get_least_significant_part(int64_t value)
+{
+    unsigned char *t = (unsigned char *)&value;
+#if defined(HOST_WORDS_BIGENDIAN)
+    return t[4] << 24 | t[5] << 16 | t[6] << 8 | t[7] << 0;
+#else
+    return t[0] << 0 | t[1] << 8 | t[2] << 16 | t[3] << 24;
+#endif /* HOST_WORDS_BIGENDIAN */
+}
+
 /*
  * This function is being called when pagefault occurs. It
  * tracks down vCPU blocking time.
@@ -634,6 +644,7 @@  static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
     MigrationIncomingState *mis = migration_incoming_get_current();
     PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
     int64_t now_ms;
+    uint32_t least_now;
 
     if (!dc || ptid == 0) {
         return;
@@ -644,13 +655,14 @@  static void mark_postcopy_blocktime_begin(uintptr_t addr, uint32_t ptid,
     }
 
     now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    least_now = get_least_significant_part(now_ms);
     if (dc->vcpu_addr[cpu] == 0) {
         atomic_inc(&dc->smp_cpus_down);
     }
 
-    atomic_xchg__nocheck(&dc->last_begin, now_ms);
-    atomic_xchg__nocheck(&dc->page_fault_vcpu_time[cpu], now_ms);
-    atomic_xchg__nocheck(&dc->vcpu_addr[cpu], addr);
+    atomic_xchg(&dc->last_begin, least_now);
+    atomic_xchg(&dc->page_fault_vcpu_time[cpu], least_now);
+    atomic_xchg(&dc->vcpu_addr[cpu], addr);
 
     /* check it here, not at the begining of the function,
      * due to, check could accur early than bitmap_set in
@@ -699,20 +711,21 @@  static void mark_postcopy_blocktime_end(uintptr_t addr)
     int i, affected_cpu = 0;
     int64_t now_ms;
     bool vcpu_total_blocktime = false;
-    int64_t read_vcpu_time;
+    uint32_t read_vcpu_time, least_now;
 
     if (!dc) {
         return;
     }
 
     now_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    least_now = get_least_significant_part(now_ms);
 
     /* lookup cpu, to clear it,
      * that algorithm looks straighforward, but it's not
      * optimal, more optimal algorithm is keeping tree or hash
      * where key is address value is a list of  */
     for (i = 0; i < smp_cpus; i++) {
-        uint64_t vcpu_blocktime = 0;
+        uint32_t vcpu_blocktime = 0;
 
         read_vcpu_time = atomic_fetch_add(&dc->page_fault_vcpu_time[i], 0);
         if (atomic_fetch_add(&dc->vcpu_addr[i], 0) != addr ||
@@ -720,7 +733,7 @@  static void mark_postcopy_blocktime_end(uintptr_t addr)
             continue;
         }
         atomic_xchg__nocheck(&dc->vcpu_addr[i], 0);
-        vcpu_blocktime = now_ms - read_vcpu_time;
+        vcpu_blocktime = least_now - read_vcpu_time;
         affected_cpu += 1;
         /* we need to know is that mark_postcopy_end was due to
          * faulted page, another possible case it's prefetched
@@ -735,7 +748,7 @@  static void mark_postcopy_blocktime_end(uintptr_t addr)
 
     atomic_sub(&dc->smp_cpus_down, affected_cpu);
     if (vcpu_total_blocktime) {
-        dc->total_blocktime += now_ms - atomic_fetch_add(&dc->last_begin, 0);
+        dc->total_blocktime += least_now - atomic_fetch_add(&dc->last_begin, 0);
     }
     trace_mark_postcopy_blocktime_end(addr, dc, dc->total_blocktime,
                                       affected_cpu);
diff --git a/migration/trace-events b/migration/trace-events
index 141e773..0defbc3 100644
--- a/migration/trace-events
+++ b/migration/trace-events
@@ -115,8 +115,8 @@  process_incoming_migration_co_end(int ret, int ps) "ret=%d postcopy-state=%d"
 process_incoming_migration_co_postcopy_end_main(void) ""
 migration_set_incoming_channel(void *ioc, const char *ioctype) "ioc=%p ioctype=%s"
 migration_set_outgoing_channel(void *ioc, const char *ioctype, const char *hostname)  "ioc=%p ioctype=%s hostname=%s"
-mark_postcopy_blocktime_begin(uint64_t addr, void *dd, int64_t time, int cpu, int received) "addr: 0x%" PRIx64 ", dd: %p, time: %" PRId64 ", cpu: %d, already_received: %d"
-mark_postcopy_blocktime_end(uint64_t addr, void *dd, int64_t time, int affected_cpu) "addr: 0x%" PRIx64 ", dd: %p, time: %" PRId64 ", affected_cpu: %d"
+mark_postcopy_blocktime_begin(uint64_t addr, void *dd, uint32_t time, int cpu, int received) "addr: 0x%" PRIx64 ", dd: %p, time: %u, cpu: %d, already_received: %d"
+mark_postcopy_blocktime_end(uint64_t addr, void *dd, uint32_t time, int affected_cpu) "addr: 0x%" PRIx64 ", dd: %p, time: %u, affected_cpu: %d"
 
 # migration/rdma.c
 qemu_rdma_accept_incoming_migration(void) ""
diff --git a/qapi/migration.json b/qapi/migration.json
index 70e7b67..ee55d7c 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -175,8 +175,8 @@ 
            '*setup-time': 'int',
            '*cpu-throttle-percentage': 'int',
            '*error-desc': 'str',
-           '*postcopy-blocktime' : 'int64',
-           '*postcopy-vcpu-blocktime': ['int64']} }
+           '*postcopy-blocktime' : 'uint32',
+           '*postcopy-vcpu-blocktime': ['uint32']} }
 
 ##
 # @query-migrate: